Skip to content

Commit 9488e3f

Browse files
committed
Fix Healthy condition not updating from Ironic health data
Three bugs prevented the Healthy condition from working correctly on provisioned hosts: 1. computeConditions was only called inside the actResult.Dirty() block. For steady-state provisioned hosts where nothing changes during reconciliation, conditions were never recomputed. This meant the Healthy condition (populated from Ironic's health monitoring API) stayed at its initial Unknown value forever, since health status changes externally in Ironic and doesn't trigger state machine transitions. 2. Empty health from GetHealth (e.g. when the node is not yet registered) was treated as "unknown" via the default switch case, overwriting a previously valid Healthy=True on every idle reconcile. Now empty health means "no data, leave the existing condition unchanged" — only non-empty values from Ironic update the condition. 3. conditionsBefore used a slice reference sharing the same backing array as host.Status.Conditions. In-place modifications by conditions.Set were visible through both references, making reflect.DeepEqual always return true. Deep copy the slice before comparing so condition-only changes trigger a status save. Also suppress ErrNeedsRegistration log noise in GetHealth, which fires on every reconcile before the node is registered in Ironic. Assisted-By: Claude Opus 4.6 Signed-off-by: Jacob Anders <jacob-anders-dev@proton.me> (cherry picked from commit 0f32c4f)
1 parent fb6a6d5 commit 9488e3f

File tree

3 files changed

+41
-21
lines changed

3 files changed

+41
-21
lines changed

internal/controller/metal3.io/baremetalhost_controller.go

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -240,16 +240,23 @@ func (r *BareMetalHostReconciler) Reconcile(ctx context.Context, request ctrl.Re
240240
return result, err
241241
}
242242

243+
// Always compute conditions since some (e.g. Healthy) can change
244+
// based on external state from Ironic regardless of state machine
245+
// transitions.
246+
existing := host.GetConditions()
247+
conditionsBefore := make([]metav1.Condition, len(existing))
248+
copy(conditionsBefore, existing)
249+
computeConditions(ctx, host, prov)
250+
conditionsChanged := !reflect.DeepEqual(conditionsBefore, host.GetConditions())
251+
243252
// Only save status when we're told to, otherwise we
244253
// introduce an infinite loop reconciling the same object over and
245254
// over when there is an unrecoverable error (tracked through the
246255
// error state of the host).
247-
if actResult.Dirty() {
248-
// Save Host
256+
if actResult.Dirty() || conditionsChanged {
249257
info.log.Info("saving host status",
250258
"operational status", host.OperationalStatus(),
251259
"provisioning state", host.Status.Provisioning.State)
252-
computeConditions(ctx, host, prov)
253260
err = r.saveHostStatus(ctx, host)
254261
if err != nil {
255262
return ctrl.Result{}, fmt.Errorf("failed to save host status after %q: %w", initialState, err)
@@ -2304,15 +2311,17 @@ func computeConditions(ctx context.Context, host *metal3api.BareMetalHost, prov
23042311
setConditionUnknown(host, metal3api.HealthyCondition, metal3api.UnknownHealthReason)
23052312
return
23062313
}
2307-
switch prov.GetHealth(ctx) {
2308-
case provisioner.HealthOK:
2309-
setConditionTrue(host, metal3api.HealthyCondition, metal3api.HealthyReason)
2310-
case provisioner.HealthWarning:
2311-
setConditionFalse(host, metal3api.HealthyCondition, metal3api.WarningHealthReason)
2312-
case provisioner.HealthCritical:
2313-
setConditionFalse(host, metal3api.HealthyCondition, metal3api.CriticalHealthReason)
2314-
default:
2315-
setConditionUnknown(host, metal3api.HealthyCondition, metal3api.UnknownHealthReason)
2314+
if health := prov.GetHealth(ctx); health != "" {
2315+
switch health {
2316+
case provisioner.HealthOK:
2317+
setConditionTrue(host, metal3api.HealthyCondition, metal3api.HealthyReason)
2318+
case provisioner.HealthWarning:
2319+
setConditionFalse(host, metal3api.HealthyCondition, metal3api.WarningHealthReason)
2320+
case provisioner.HealthCritical:
2321+
setConditionFalse(host, metal3api.HealthyCondition, metal3api.CriticalHealthReason)
2322+
default:
2323+
setConditionUnknown(host, metal3api.HealthyCondition, metal3api.UnknownHealthReason)
2324+
}
23162325
}
23172326
}
23182327

internal/controller/metal3.io/baremetalhost_controller_test.go

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3187,12 +3187,6 @@ func TestComputeHealthyCondition(t *testing.T) {
31873187
ExpectedStatus metav1.ConditionStatus
31883188
ExpectedReason string
31893189
}{
3190-
{
3191-
Scenario: "empty health reports unknown",
3192-
Health: "",
3193-
ExpectedStatus: metav1.ConditionUnknown,
3194-
ExpectedReason: metal3api.UnknownHealthReason,
3195-
},
31963190
{
31973191
Scenario: "OK health",
31983192
Health: provisioner.HealthOK,
@@ -3232,6 +3226,17 @@ func TestComputeHealthyCondition(t *testing.T) {
32323226
assert.Equal(t, tc.ExpectedReason, cond.Reason)
32333227
})
32343228
}
3229+
3230+
t.Run("empty health leaves condition unchanged", func(t *testing.T) {
3231+
host := bmhWithStatus(metal3api.OperationalStatusOK, metal3api.StateAvailable)
3232+
fix := &fixture.Fixture{Health: ""}
3233+
prov, err := fix.NewProvisioner(t.Context(), provisioner.BuildHostData(*host, bmc.Credentials{}), nil)
3234+
require.NoError(t, err)
3235+
computeConditions(t.Context(), host, prov)
3236+
3237+
cond := conditions.Get(host, metal3api.HealthyCondition)
3238+
assert.Nil(t, cond, "empty health should not set the Healthy condition")
3239+
})
32353240
}
32363241

32373242
func TestComputeConditions(t *testing.T) {
@@ -3331,8 +3336,12 @@ func TestComputeConditions(t *testing.T) {
33313336
assert.True(t, conditions.IsFalse(tc.BareMetalHost, metal3api.ReadyCondition))
33323337
}
33333338
cond := conditions.Get(tc.BareMetalHost, metal3api.HealthyCondition)
3334-
require.NotNil(t, cond, "Healthy condition should always be set")
3335-
assert.Equal(t, metav1.ConditionUnknown, cond.Status, "Healthy should be Unknown when health is not reported")
3339+
if tc.provisioner == nil {
3340+
require.NotNil(t, cond, "Healthy condition should be set when provisioner is nil")
3341+
assert.Equal(t, metav1.ConditionUnknown, cond.Status, "Healthy should be Unknown when provisioner is nil")
3342+
} else {
3343+
assert.Nil(t, cond, "Healthy condition should not be set when provisioner reports no health data")
3344+
}
33363345
})
33373346
}
33383347
}

pkg/provisioner/ironic/ironic.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2004,7 +2004,9 @@ func (p *ironicProvisioner) HasPowerFailure(ctx context.Context) bool {
20042004
func (p *ironicProvisioner) GetHealth(ctx context.Context) string {
20052005
node, err := p.getNode(ctx)
20062006
if err != nil {
2007-
p.log.Error(err, "ignored error while checking health status")
2007+
if !errors.Is(err, provisioner.ErrNeedsRegistration) {
2008+
p.log.Error(err, "ignored error while checking health status")
2009+
}
20082010
return ""
20092011
}
20102012
return node.Health

0 commit comments

Comments
 (0)