Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions inttest/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ check-addons: TIMEOUT=10m
# Autopilot 3x3 HA test can take a while to run
check-ap-ha3x3: export K0S_UPDATE_TO_VERSION ?= $(shell ../k0s version)

# Variant of ap-single that launches the controller with a non-default
# --status-socket and --data-dir, exercising the StatusSocketPath plumbing
# through the autopilot signal controllers (regression test for the
# hardcoded-default-socket bug).
check-ap-single-custom-socket: K0S_INTTEST_PACKAGE = ap-single

check-ap-updater: .update-server.stamp
check-ap-updater-periodic: .update-server.stamp
check-ap-updater-periodic: TIMEOUT=10m
Expand Down
1 change: 1 addition & 0 deletions inttest/Makefile.variables
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ smoketests := \
check-ap-removedapis \
check-ap-selector \
check-ap-single \
check-ap-single-custom-socket \
check-ap-updater \
check-ap-updater-periodic \
check-backup \
Expand Down
53 changes: 52 additions & 1 deletion inttest/ap-single/single_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
package single

import (
"os"
"strings"
"testing"

apconst "github.com/k0sproject/k0s/pkg/autopilot/constant"
Expand All @@ -18,8 +20,24 @@ import (

const (
ManifestTestDirPerms = "775"

// customStatusSocketPath is a non-default socket location used by the
// "ap-single-custom-socket" target to verify that autopilot signal
// controllers honor --status-socket. Default would be /run/k0s/status.sock.
customStatusSocketPath = "/run/k0s/custom/status.sock"
// customDataDir places the data dir (and therefore many derived paths) in
// a non-default location, exercising the wider --data-dir surface.
customDataDir = "/var/lib/k0s-custom"
)

// useCustomStatusSocket reports whether the suite was invoked through the
// "ap-single-custom-socket" check target. Driven by K0S_INTTEST_TARGET, the
// same env-var convention used by other parameterized inttests
// (cplb-userspace, dualstack, network-conformance, ...).
func useCustomStatusSocket() bool {
return strings.Contains(os.Getenv("K0S_INTTEST_TARGET"), "custom-socket")
}

type plansSingleControllerSuite struct {
common.BootlooseSuite
}
Expand All @@ -35,7 +53,19 @@ func (s *plansSingleControllerSuite) SetupTest() {
defer ssh.Disconnect()
require.NoError(ssh.Exec(ctx, "cp /dist/k0s /tmp/k0s", common.SSHStreams{}))

require.NoError(s.InitController(0, "--single", "--disable-components=metrics-server"))
args := []string{"--single", "--disable-components=metrics-server"}
if useCustomStatusSocket() {
// Pre-create the parent directories so k0s doesn't have to mkdir at
// odd paths during early boot. These match the flags below.
require.NoError(ssh.Exec(ctx, "mkdir -p /run/k0s/custom "+customDataDir, common.SSHStreams{}))
args = append(args,
"--status-socket="+customStatusSocketPath,
"--data-dir="+customDataDir,
)
s.T().Logf("Launching controller with custom status socket %q and data dir %q",
customStatusSocketPath, customDataDir)
}
require.NoError(s.InitController(0, args...))

client, err := s.KubeClient(nodeName)
require.NoError(err)
Expand Down Expand Up @@ -97,6 +127,27 @@ spec:
s.Empty(cmd.K0sUpdate.Workers)
s.Equal(appc.SignalCompleted, cmd.K0sUpdate.Controllers[0].State)
}

if useCustomStatusSocket() {
// Defense against #3719: an AP test can report PlanCompleted even
// when the cluster is unhealthy. Cross-check that:
// 1. The custom status socket was actually created (the controller
// honored --status-socket).
// 2. The default /run/k0s/status.sock was NOT created (no caller
// fell back to the hardcoded path).
// If autopilot's signal controllers had still hardcoded the default
// path (the bug this PR fixes), step 2 would fail because the
// post-restart status probe would either create or contact the
// default socket location.
ssh, err := s.SSH(ctx, s.ControllerNode(0))
s.Require().NoError(err)
defer ssh.Disconnect()

s.NoError(ssh.Exec(ctx, "test -S "+customStatusSocketPath, common.SSHStreams{}),
"custom status socket %q should exist", customStatusSocketPath)
s.Error(ssh.Exec(ctx, "test -e /run/k0s/status.sock", common.SSHStreams{}),
"default status socket /run/k0s/status.sock should NOT exist when --status-socket is set")
}
}

// TestPlansSingleControllerSuite sets up a suite using a single controller, running various
Expand Down
1 change: 1 addition & 0 deletions pkg/autopilot/controller/root/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ type RootConfig struct {
InvocationID string
KubeConfig string
K0sDataDir string
StatusSocketPath string
KubeletExtraArgs string
KubeAPIPort int
Mode string
Expand Down
8 changes: 6 additions & 2 deletions pkg/autopilot/controller/root_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
aproot "github.com/k0sproject/k0s/pkg/autopilot/controller/root"
"github.com/k0sproject/k0s/pkg/autopilot/controller/signal"
"github.com/k0sproject/k0s/pkg/autopilot/controller/updates"
k0sstatus "github.com/k0sproject/k0s/pkg/component/status"
"github.com/k0sproject/k0s/pkg/kubernetes"
"github.com/k0sproject/k0s/pkg/leaderelection"

Expand Down Expand Up @@ -63,6 +64,9 @@ var _ aproot.Root = (*rootController)(nil)

// NewRootController builds a root for autopilot "controller" operations.
func NewRootController(cfg aproot.RootConfig, logger *logrus.Entry, enableWorker bool, acf apcli.FactoryInterface, clusterInfoCollector *updates.ClusterInfoCollector, apiAddress netip.Addr) (aproot.Root, error) {
if cfg.StatusSocketPath == "" {
cfg.StatusSocketPath = k0sstatus.DefaultSocketPath
}
c := &rootController{
cfg: cfg,
log: logger,
Expand Down Expand Up @@ -237,7 +241,7 @@ func (c *rootController) startSubControllerRoutine(ctx context.Context, logger *
}
clusterID := string(ns.UID)

if err := signal.RegisterControllers(ctx, logger, mgr, delegateMap[apdel.ControllerDelegateController], c.cfg.K0sDataDir, c.enableWorker, clusterID, event, c.cfg.InvocationID); err != nil {
if err := signal.RegisterControllers(ctx, logger, mgr, delegateMap[apdel.ControllerDelegateController], c.cfg.K0sDataDir, c.cfg.StatusSocketPath, c.enableWorker, clusterID, event, c.cfg.InvocationID); err != nil {
logger.WithError(err).Error("unable to register signal controllers")
return err
}
Expand All @@ -247,7 +251,7 @@ func (c *rootController) startSubControllerRoutine(ctx context.Context, logger *
return err
}

if err := updates.RegisterControllers(ctx, logger, mgr, c.autopilotClientFactory, c.clusterInfoCollector, leaderMode, clusterID); err != nil {
if err := updates.RegisterControllers(ctx, logger, mgr, c.autopilotClientFactory, c.clusterInfoCollector, leaderMode, clusterID, c.cfg.StatusSocketPath); err != nil {
logger.WithError(err).Error("unable to register updates controllers")
return err
}
Expand Down
6 changes: 5 additions & 1 deletion pkg/autopilot/controller/root_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
apdel "github.com/k0sproject/k0s/pkg/autopilot/controller/delegate"
aproot "github.com/k0sproject/k0s/pkg/autopilot/controller/root"
"github.com/k0sproject/k0s/pkg/autopilot/controller/signal"
"github.com/k0sproject/k0s/pkg/component/status"
"github.com/k0sproject/k0s/pkg/leaderelection"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -41,6 +42,9 @@ var _ aproot.Root = (*rootWorker)(nil)

// NewRootWorker builds a root for autopilot "worker" operations.
func NewRootWorker(cfg aproot.RootConfig, logger *logrus.Entry, cf apcli.FactoryInterface) (aproot.Root, error) {
if cfg.StatusSocketPath == "" {
cfg.StatusSocketPath = status.DefaultSocketPath
}
c := &rootWorker{
cfg: cfg,
log: logger,
Expand Down Expand Up @@ -111,7 +115,7 @@ func (w *rootWorker) Run(ctx context.Context) error {
return fmt.Errorf("unable to register indexers: %w", err)
}

if err := signal.RegisterControllers(ctx, logger, mgr, apdel.NodeControllerDelegate(), w.cfg.K0sDataDir, true, clusterID, leaderelection.StatusPending, w.cfg.InvocationID); err != nil {
if err := signal.RegisterControllers(ctx, logger, mgr, apdel.NodeControllerDelegate(), w.cfg.K0sDataDir, w.cfg.StatusSocketPath, true, clusterID, leaderelection.StatusPending, w.cfg.InvocationID); err != nil {
return fmt.Errorf("unable to register signal controllers: %w", err)
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/autopilot/controller/signal/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ import (

// RegisterControllers registers all of the autopilot controllers used by both controller
// and worker modes.
func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Manager, delegate apdel.ControllerDelegate, k0sDataDir string, enableWorker bool, clusterID string, leaseStatus leaderelection.Status, invocationID string) error {
if err := k0s.RegisterControllers(ctx, logger, mgr, delegate, enableWorker, clusterID, leaseStatus, invocationID); err != nil {
func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Manager, delegate apdel.ControllerDelegate, k0sDataDir string, statusSocketPath string, enableWorker bool, clusterID string, leaseStatus leaderelection.Status, invocationID string) error {
if err := k0s.RegisterControllers(ctx, logger, mgr, delegate, statusSocketPath, enableWorker, clusterID, leaseStatus, invocationID); err != nil {
return fmt.Errorf("unable to register k0s controllers: %w", err)
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/autopilot/controller/signal/k0s/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (

// RegisterControllers registers all of the autopilot controllers used for updating `k0s`
// to the controller-runtime manager.
func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Manager, delegate apdel.ControllerDelegate, enableWorker bool, clusterID string, leaseStatus leaderelection.Status, invocationID string) error {
func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Manager, delegate apdel.ControllerDelegate, statusSocketPath string, enableWorker bool, clusterID string, leaseStatus leaderelection.Status, invocationID string) error {
logger = logger.WithField("controller", delegate.Name())

hostname, err := apcomm.FindEffectiveHostname()
Expand All @@ -47,7 +47,7 @@ func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Ma
logger.Infof("Using effective hostname = '%v'", hostname)

k0sVersionHandler := func() (string, error) {
return getK0sVersion(status.DefaultSocketPath)
return getK0sVersion(statusSocketPath)
}

if enableWorker {
Expand Down Expand Up @@ -92,11 +92,11 @@ func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Ma
return fmt.Errorf("unable to register applying-update controller: %w", err)
}

if err := registerRestart(logger, mgr, restartEventFilter(hostname, apsigpred.DefaultErrorHandler(logger, "k0s restart")), delegate); err != nil {
if err := registerRestart(logger, mgr, restartEventFilter(hostname, apsigpred.DefaultErrorHandler(logger, "k0s restart")), delegate, statusSocketPath); err != nil {
return fmt.Errorf("unable to register restart controller: %w", err)
}

if err := registerRestarted(logger, mgr, restartedEventFilter(hostname, apsigpred.DefaultErrorHandler(logger, "k0s restarted")), delegate); err != nil {
if err := registerRestarted(logger, mgr, restartedEventFilter(hostname, apsigpred.DefaultErrorHandler(logger, "k0s restarted")), delegate, statusSocketPath); err != nil {
return fmt.Errorf("unable to register restarted controller: %w", err)
}

Expand Down
20 changes: 11 additions & 9 deletions pkg/autopilot/controller/signal/k0s/restart_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,17 @@ func restartEventFilter(hostname string, handler apsigpred.ErrorHandler) crpred.
}

type restart struct {
log *logrus.Entry
client crcli.Client
delegate apdel.ControllerDelegate
log *logrus.Entry
client crcli.Client
delegate apdel.ControllerDelegate
statusSocketPath string
}

// registerRestart registers the 'restart' controller to the controller-runtime manager.
//
// This controller is only interested in changes to signal nodes where its signaling
// status is marked as `Restart`
func registerRestart(logger *logrus.Entry, mgr crman.Manager, eventFilter crpred.Predicate, delegate apdel.ControllerDelegate) error {
func registerRestart(logger *logrus.Entry, mgr crman.Manager, eventFilter crpred.Predicate, delegate apdel.ControllerDelegate, statusSocketPath string) error {
name := strings.ToLower(delegate.Name()) + "_k0s_restart"
logger.Info("Registering reconciler: ", name)

Expand All @@ -69,9 +70,10 @@ func registerRestart(logger *logrus.Entry, mgr crman.Manager, eventFilter crpred
WithEventFilter(eventFilter).
Complete(
&restart{
log: logger.WithFields(logrus.Fields{"reconciler": "k0s-restart", "object": delegate.Name()}),
client: mgr.GetClient(),
delegate: delegate,
log: logger.WithFields(logrus.Fields{"reconciler": "k0s-restart", "object": delegate.Name()}),
client: mgr.GetClient(),
delegate: delegate,
statusSocketPath: statusSocketPath,
},
)
}
Expand All @@ -93,7 +95,7 @@ func (r *restart) Reconcile(ctx context.Context, req cr.Request) (cr.Result, err

// Get the current version of k0s
logger.Info("Determining the current version of k0s")
k0sVersion, err := getK0sVersion(status.DefaultSocketPath)
k0sVersion, err := getK0sVersion(r.statusSocketPath)
if err != nil {
logger.Info("Unable to determine current verion of k0s; requeuing")
return cr.Result{}, fmt.Errorf("unable to get k0s version: %w", err)
Expand Down Expand Up @@ -133,7 +135,7 @@ func (r *restart) Reconcile(ctx context.Context, req cr.Request) (cr.Result, err

logger.Info("Preparing to restart k0s")

k0sPid, err := getK0sPid(status.DefaultSocketPath)
k0sPid, err := getK0sPid(r.statusSocketPath)
if err != nil {
logger.Info("Unable to determine current k0s pid; requeuing")
return cr.Result{RequeueAfter: restartRequeueDuration}, fmt.Errorf("unable to get k0s pid: %w", err)
Expand Down
19 changes: 10 additions & 9 deletions pkg/autopilot/controller/signal/k0s/restarted_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
apdel "github.com/k0sproject/k0s/pkg/autopilot/controller/delegate"
apsigpred "github.com/k0sproject/k0s/pkg/autopilot/controller/signal/common/predicate"
apsigv2 "github.com/k0sproject/k0s/pkg/autopilot/signaling/v2"
"github.com/k0sproject/k0s/pkg/component/status"

"github.com/sirupsen/logrus"
cr "sigs.k8s.io/controller-runtime"
Expand All @@ -25,9 +24,10 @@ import (
)

type restarted struct {
log *logrus.Entry
client crcli.Client
delegate apdel.ControllerDelegate
log *logrus.Entry
client crcli.Client
delegate apdel.ControllerDelegate
statusSocketPath string
}

// restartedEventFilter creates a controller-runtime predicate that governs which
Expand All @@ -51,7 +51,7 @@ func restartedEventFilter(hostname string, handler apsigpred.ErrorHandler) crpre
//
// This controller is only interested in changes to signal nodes where its signaling
// status is marked as `Restart`
func registerRestarted(logger *logrus.Entry, mgr crman.Manager, eventFilter crpred.Predicate, delegate apdel.ControllerDelegate) error {
func registerRestarted(logger *logrus.Entry, mgr crman.Manager, eventFilter crpred.Predicate, delegate apdel.ControllerDelegate, statusSocketPath string) error {
name := strings.ToLower(delegate.Name()) + "_k0s_restarted"
logger.Info("Registering reconciler: ", name)

Expand All @@ -61,9 +61,10 @@ func registerRestarted(logger *logrus.Entry, mgr crman.Manager, eventFilter crpr
WithEventFilter(eventFilter).
Complete(
&restarted{
log: logger.WithFields(logrus.Fields{"reconciler": "k0s-restarted", "object": delegate.Name()}),
client: mgr.GetClient(),
delegate: delegate,
log: logger.WithFields(logrus.Fields{"reconciler": "k0s-restarted", "object": delegate.Name()}),
client: mgr.GetClient(),
delegate: delegate,
statusSocketPath: statusSocketPath,
},
)
}
Expand All @@ -87,7 +88,7 @@ func (r *restarted) Reconcile(ctx context.Context, req cr.Request) (cr.Result, e

// Get the current version of k0s
logger.Info("Determining the current version of k0s")
k0sVersion, err := getK0sVersion(status.DefaultSocketPath)
k0sVersion, err := getK0sVersion(r.statusSocketPath)
if err != nil {
logger.Info("Unable to determine current verion of k0s; requeuing")
return cr.Result{}, fmt.Errorf("unable to get k0s version: %w", err)
Expand Down
28 changes: 15 additions & 13 deletions pkg/autopilot/controller/updates/update_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,32 @@ import (
)

type updateController struct {
log *logrus.Entry
client crcli.Client
clientFactory apcli.FactoryInterface
collector *ClusterInfoCollector
log *logrus.Entry
client crcli.Client
clientFactory apcli.FactoryInterface
collector *ClusterInfoCollector
statusSocketPath string

clusterID string

updaters map[string]updater
parentCtx context.Context
}

func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Manager, clientFactory apcli.FactoryInterface, collector *ClusterInfoCollector, leaderMode bool, clusterID string) error {
func RegisterControllers(ctx context.Context, logger *logrus.Entry, mgr crman.Manager, clientFactory apcli.FactoryInterface, collector *ClusterInfoCollector, leaderMode bool, clusterID string, statusSocketPath string) error {
return cr.NewControllerManagedBy(mgr).
Named("updater").
For(&apv1beta2.UpdateConfig{}).
Complete(
&updateController{
log: logger.WithField("reconciler", "updater"),
client: mgr.GetClient(),
clientFactory: clientFactory,
collector: collector,
clusterID: clusterID,
updaters: make(map[string]updater),
parentCtx: ctx,
log: logger.WithField("reconciler", "updater"),
client: mgr.GetClient(),
clientFactory: clientFactory,
collector: collector,
statusSocketPath: statusSocketPath,
clusterID: clusterID,
updaters: make(map[string]updater),
parentCtx: ctx,
},
)
}
Expand Down Expand Up @@ -94,7 +96,7 @@ func (u *updateController) Reconcile(ctx context.Context, req cr.Request) (cr.Re
}
u.log.Debugf("creating new updater for '%s'", req.NamespacedName)
// Create new updater
updater, err := newUpdater(u.parentCtx, *updaterConfig, u.client, u.clientFactory, u.clusterID, u.collector, token)
updater, err := newUpdater(u.parentCtx, *updaterConfig, u.client, u.clientFactory, u.clusterID, u.collector, token, u.statusSocketPath)
if err != nil {
u.log.Errorf("failed to create updater for '%s': %s", req.NamespacedName, err)
return cr.Result{}, err
Expand Down
Loading
Loading