diff --git a/internal/controller/metal3.io/baremetalhost_controller.go b/internal/controller/metal3.io/baremetalhost_controller.go index b2bb51a9f4..68c79b6ffb 100644 --- a/internal/controller/metal3.io/baremetalhost_controller.go +++ b/internal/controller/metal3.io/baremetalhost_controller.go @@ -240,16 +240,21 @@ func (r *BareMetalHostReconciler) Reconcile(ctx context.Context, request ctrl.Re return result, err } + // Always compute conditions since some (e.g. Healthy) can change + // based on external state from Ironic regardless of state machine + // transitions. + conditionsBefore := slices.Clone(host.GetConditions()) + computeConditions(ctx, host, prov) + conditionsChanged := !reflect.DeepEqual(conditionsBefore, host.GetConditions()) + // Only save status when we're told to, otherwise we // introduce an infinite loop reconciling the same object over and // over when there is an unrecoverable error (tracked through the // error state of the host). - if actResult.Dirty() { - // Save Host + if actResult.Dirty() || conditionsChanged { info.log.Info("saving host status", "operational status", host.OperationalStatus(), "provisioning state", host.Status.Provisioning.State) - computeConditions(ctx, host, prov) err = r.saveHostStatus(ctx, host) if err != nil { return ctrl.Result{}, fmt.Errorf("failed to save host status after %q: %w", initialState, err) @@ -2304,15 +2309,17 @@ func computeConditions(ctx context.Context, host *metal3api.BareMetalHost, prov setConditionUnknown(host, metal3api.HealthyCondition, metal3api.UnknownHealthReason) return } - switch prov.GetHealth(ctx) { - case provisioner.HealthOK: - setConditionTrue(host, metal3api.HealthyCondition, metal3api.HealthyReason) - case provisioner.HealthWarning: - setConditionFalse(host, metal3api.HealthyCondition, metal3api.WarningHealthReason) - case provisioner.HealthCritical: - setConditionFalse(host, metal3api.HealthyCondition, metal3api.CriticalHealthReason) - default: - setConditionUnknown(host, metal3api.HealthyCondition, metal3api.UnknownHealthReason) + if health := prov.GetHealth(ctx); health != "" { + switch health { + case provisioner.HealthOK: + setConditionTrue(host, metal3api.HealthyCondition, metal3api.HealthyReason) + case provisioner.HealthWarning: + setConditionFalse(host, metal3api.HealthyCondition, metal3api.WarningHealthReason) + case provisioner.HealthCritical: + setConditionFalse(host, metal3api.HealthyCondition, metal3api.CriticalHealthReason) + default: + setConditionUnknown(host, metal3api.HealthyCondition, metal3api.UnknownHealthReason) + } } } diff --git a/internal/controller/metal3.io/baremetalhost_controller_test.go b/internal/controller/metal3.io/baremetalhost_controller_test.go index a2dcd0482f..4de4adf77a 100644 --- a/internal/controller/metal3.io/baremetalhost_controller_test.go +++ b/internal/controller/metal3.io/baremetalhost_controller_test.go @@ -3187,12 +3187,6 @@ func TestComputeHealthyCondition(t *testing.T) { ExpectedStatus metav1.ConditionStatus ExpectedReason string }{ - { - Scenario: "empty health reports unknown", - Health: "", - ExpectedStatus: metav1.ConditionUnknown, - ExpectedReason: metal3api.UnknownHealthReason, - }, { Scenario: "OK health", Health: provisioner.HealthOK, @@ -3232,6 +3226,17 @@ func TestComputeHealthyCondition(t *testing.T) { assert.Equal(t, tc.ExpectedReason, cond.Reason) }) } + + t.Run("empty health leaves condition unchanged", func(t *testing.T) { + host := bmhWithStatus(metal3api.OperationalStatusOK, metal3api.StateAvailable) + fix := &fixture.Fixture{Health: ""} + prov, err := fix.NewProvisioner(t.Context(), provisioner.BuildHostData(*host, bmc.Credentials{}), nil) + require.NoError(t, err) + computeConditions(t.Context(), host, prov) + + cond := conditions.Get(host, metal3api.HealthyCondition) + assert.Nil(t, cond, "empty health should not set the Healthy condition") + }) } func TestComputeConditions(t *testing.T) { @@ -3331,8 +3336,12 @@ func TestComputeConditions(t *testing.T) { assert.True(t, conditions.IsFalse(tc.BareMetalHost, metal3api.ReadyCondition)) } cond := conditions.Get(tc.BareMetalHost, metal3api.HealthyCondition) - require.NotNil(t, cond, "Healthy condition should always be set") - assert.Equal(t, metav1.ConditionUnknown, cond.Status, "Healthy should be Unknown when health is not reported") + if tc.provisioner == nil { + require.NotNil(t, cond, "Healthy condition should be set when provisioner is nil") + assert.Equal(t, metav1.ConditionUnknown, cond.Status, "Healthy should be Unknown when provisioner is nil") + } else { + assert.Nil(t, cond, "Healthy condition should not be set when provisioner reports no health data") + } }) } } diff --git a/pkg/provisioner/ironic/clients/features.go b/pkg/provisioner/ironic/clients/features.go index c7374d3788..76fc17cb31 100644 --- a/pkg/provisioner/ironic/clients/features.go +++ b/pkg/provisioner/ironic/clients/features.go @@ -45,7 +45,8 @@ func (af AvailableFeatures) Log(logger logr.Logger) { "maxVersion", fmt.Sprintf("1.%d", af.MaxVersion), "chosenVersion", af.ChooseMicroversion(), "virtualMediaGET", af.HasVirtualMediaGetAPI(), - "disablePowerOff", af.HasDisablePowerOff()) + "disablePowerOff", af.HasDisablePowerOff(), + "healthAPI", af.HasHealthAPI()) } func (af AvailableFeatures) HasVirtualMediaGetAPI() bool { @@ -56,7 +57,15 @@ func (af AvailableFeatures) HasDisablePowerOff() bool { return af.MaxVersion >= 95 //nolint:mnd } +func (af AvailableFeatures) HasHealthAPI() bool { + return af.MaxVersion >= 109 //nolint:mnd +} + func (af AvailableFeatures) ChooseMicroversion() string { + if af.HasHealthAPI() { + return "1.109" + } + if af.HasDisablePowerOff() { return "1.95" } diff --git a/pkg/provisioner/ironic/clients/features_test.go b/pkg/provisioner/ironic/clients/features_test.go index df3d594e67..05364050cf 100644 --- a/pkg/provisioner/ironic/clients/features_test.go +++ b/pkg/provisioner/ironic/clients/features_test.go @@ -6,7 +6,6 @@ import ( ) func TestAvailableFeatures_ChooseMicroversion(t *testing.T) { - microVersion := "1.95" type fields struct { MaxVersion int } @@ -23,18 +22,32 @@ func TestAvailableFeatures_ChooseMicroversion(t *testing.T) { want: baselineVersionString, }, { - name: fmt.Sprintf("MaxVersion = %d return %s", 89, microVersion), + name: "MaxVersion = 95 return 1.95", feature: fields{ MaxVersion: 95, }, - want: microVersion, + want: "1.95", }, { - name: fmt.Sprintf("MaxVersion > %d return %s", 89, microVersion), + name: "MaxVersion = 100 return 1.95", feature: fields{ MaxVersion: 100, }, - want: microVersion, + want: "1.95", + }, + { + name: "MaxVersion = 109 return 1.109", + feature: fields{ + MaxVersion: 109, + }, + want: "1.109", + }, + { + name: "MaxVersion > 109 return 1.109", + feature: fields{ + MaxVersion: 115, + }, + want: "1.109", }, } for _, tt := range tests { diff --git a/pkg/provisioner/ironic/ironic.go b/pkg/provisioner/ironic/ironic.go index afae65bf4d..e99cee6b78 100644 --- a/pkg/provisioner/ironic/ironic.go +++ b/pkg/provisioner/ironic/ironic.go @@ -2004,7 +2004,9 @@ func (p *ironicProvisioner) HasPowerFailure(ctx context.Context) bool { func (p *ironicProvisioner) GetHealth(ctx context.Context) string { node, err := p.getNode(ctx) if err != nil { - p.log.Error(err, "ignored error while checking health status") + if !errors.Is(err, provisioner.ErrNeedsRegistration) { + p.log.Error(err, "ignored error while checking health status") + } return "" } return node.Health