Skip to content

Commit 38c9868

Browse files
committed
Add retry-on-conflict for hypervisor status patches
Multiple controllers updating the same Hypervisor resource were causing "the object has been modified" errors due to stale resourceVersions. Add PatchHypervisorStatusWithRetry helper that re-fetches the resource before each patch attempt and uses exponential backoff retry logic. Update aggregates, offboarding, and traits controllers to use this helper.
1 parent a55b6d7 commit 38c9868

10 files changed

Lines changed: 232 additions & 123 deletions

internal/controller/aggregates_controller.go

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"fmt"
2424

2525
corev1 "k8s.io/api/core/v1"
26-
"k8s.io/apimachinery/pkg/api/equality"
2726
"k8s.io/apimachinery/pkg/api/meta"
2827
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2928
"k8s.io/apimachinery/pkg/runtime"
@@ -64,7 +63,6 @@ func (ac *AggregatesController) Reconcile(ctx context.Context, req ctrl.Request)
6463
return ctrl.Result{}, nil
6564
}
6665

67-
base := hv.DeepCopy()
6866
desiredAggregateNames, desiredCondition := ac.determineDesiredState(hv)
6967

7068
// Extract current aggregate names for comparison
@@ -73,39 +71,51 @@ func (ac *AggregatesController) Reconcile(ctx context.Context, req ctrl.Request)
7371
currentAggregateNames[i] = agg.Name
7472
}
7573

74+
var newAggregates []kvmv1.Aggregate
75+
aggregatesChanged := false
7676
if !slicesEqualUnordered(desiredAggregateNames, currentAggregateNames) {
7777
// Apply aggregates to OpenStack and update status
7878
aggregates, err := openstack.ApplyAggregates(ctx, ac.computeClient, hv.Name, desiredAggregateNames)
7979
if err != nil {
80-
// Set error condition
80+
// Set error condition with retry on conflict
8181
condition := metav1.Condition{
8282
Type: kvmv1.ConditionTypeAggregatesUpdated,
8383
Status: metav1.ConditionFalse,
8484
Reason: kvmv1.ConditionReasonFailed,
8585
Message: fmt.Errorf("failed to apply aggregates: %w", err).Error(),
8686
}
8787

88-
if meta.SetStatusCondition(&hv.Status.Conditions, condition) {
89-
if err2 := ac.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base,
90-
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(AggregatesControllerName)); err2 != nil {
91-
return ctrl.Result{}, errors.Join(err, err2)
92-
}
88+
if err2 := utils.PatchHypervisorStatusWithRetry(ctx, ac.Client, hv.Name, AggregatesControllerName, func(h *kvmv1.Hypervisor) {
89+
meta.SetStatusCondition(&h.Status.Conditions, condition)
90+
}); err2 != nil {
91+
return ctrl.Result{}, errors.Join(err, err2)
9392
}
9493
return ctrl.Result{}, err
9594
}
9695

97-
hv.Status.Aggregates = aggregates
96+
newAggregates = aggregates
97+
aggregatesChanged = true
9898
}
9999

100-
// Set the condition based on the determined desired state
101-
meta.SetStatusCondition(&hv.Status.Conditions, desiredCondition)
102-
103-
if equality.Semantic.DeepEqual(base, hv) {
100+
// Skip the round-trip when nothing would change
101+
existing := meta.FindStatusCondition(hv.Status.Conditions, desiredCondition.Type)
102+
conditionUnchanged := existing != nil &&
103+
existing.Status == desiredCondition.Status &&
104+
existing.Reason == desiredCondition.Reason &&
105+
existing.Message == desiredCondition.Message
106+
if !aggregatesChanged && conditionUnchanged {
104107
return ctrl.Result{}, nil
105108
}
106109

107-
return ctrl.Result{}, ac.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base,
108-
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(AggregatesControllerName))
110+
// Patch status with retry on conflict
111+
err := utils.PatchHypervisorStatusWithRetry(ctx, ac.Client, hv.Name, AggregatesControllerName, func(h *kvmv1.Hypervisor) {
112+
if aggregatesChanged {
113+
h.Status.Aggregates = newAggregates
114+
}
115+
meta.SetStatusCondition(&h.Status.Conditions, desiredCondition)
116+
})
117+
118+
return ctrl.Result{}, err
109119
}
110120

111121
// determineDesiredState returns the desired aggregates and the corresponding condition

internal/controller/hypervisor_controller.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838

3939
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
4040
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/global"
41+
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils"
4142
)
4243

4344
const (
@@ -121,8 +122,16 @@ func (hv *HypervisorController) Reconcile(ctx context.Context, req ctrl.Request)
121122
}
122123

123124
if !equality.Semantic.DeepEqual(hypervisor, base) {
124-
return ctrl.Result{}, hv.Status().Patch(ctx, hypervisor, k8sclient.MergeFromWithOptions(base,
125-
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorControllerName))
125+
// Capture values to apply - only mutate fields this controller owns
126+
newInternalIP := hypervisor.Status.InternalIP
127+
terminatingCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTerminating)
128+
129+
return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, hv.Client, hypervisor.Name, HypervisorControllerName, func(h *kvmv1.Hypervisor) {
130+
h.Status.InternalIP = newInternalIP
131+
if terminatingCondition != nil {
132+
meta.SetStatusCondition(&h.Status.Conditions, *terminatingCondition)
133+
}
134+
})
126135
}
127136

128137
syncLabelsAndAnnotations(nodeLabels, hypervisor, node)

internal/controller/hypervisor_instance_ha_controller.go

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
logger "sigs.k8s.io/controller-runtime/pkg/log"
3131

3232
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
33+
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils"
3334
)
3435

3536
const (
@@ -99,17 +100,17 @@ func (r *HypervisorInstanceHaController) Reconcile(ctx context.Context, req ctrl
99100
}
100101

101102
if err := disableInstanceHA(hv); err != nil {
102-
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
103+
condition := metav1.Condition{
103104
Type: kvmv1.ConditionTypeHaEnabled,
104105
Status: metav1.ConditionUnknown,
105106
Message: err.Error(),
106107
Reason: kvmv1.ConditionReasonFailed,
107-
})
108-
109-
if patchErr := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old, k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorInstanceHaControllerName)); patchErr != nil {
110-
return ctrl.Result{}, errors.Join(err, patchErr)
111108
}
112-
return ctrl.Result{}, err
109+
110+
patchErr := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) {
111+
meta.SetStatusCondition(&h.Status.Conditions, condition)
112+
})
113+
return ctrl.Result{}, errors.Join(err, patchErr)
113114
}
114115
} else {
115116
if !meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
@@ -123,25 +124,31 @@ func (r *HypervisorInstanceHaController) Reconcile(ctx context.Context, req ctrl
123124
}
124125

125126
if err := enableInstanceHA(hv); err != nil {
126-
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
127+
condition := metav1.Condition{
127128
Type: kvmv1.ConditionTypeHaEnabled,
128129
Status: metav1.ConditionUnknown,
129130
Message: err.Error(),
130131
Reason: kvmv1.ConditionReasonFailed,
131-
})
132-
133-
if patchErr := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old, k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorInstanceHaControllerName)); patchErr != nil {
134-
return ctrl.Result{}, errors.Join(err, patchErr)
135132
}
136-
return ctrl.Result{}, err
133+
134+
patchErr := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) {
135+
meta.SetStatusCondition(&h.Status.Conditions, condition)
136+
})
137+
return ctrl.Result{}, errors.Join(err, patchErr)
137138
}
138139
}
139140

140141
if equality.Semantic.DeepEqual(hv, old) {
141142
return ctrl.Result{}, nil
142143
}
143144

144-
return ctrl.Result{}, r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old, k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorInstanceHaControllerName))
145+
// Only set the HaEnabled condition this controller owns
146+
haCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHaEnabled)
147+
return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) {
148+
if haCondition != nil {
149+
meta.SetStatusCondition(&h.Status.Conditions, *haCondition)
150+
}
151+
})
145152
}
146153

147154
// SetupWithManager sets up the controller with the Manager.

internal/controller/hypervisor_maintenance_controller.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939

4040
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
4141
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack"
42+
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils"
4243
)
4344

4445
const (
@@ -82,8 +83,22 @@ func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req c
8283
return ctrl.Result{}, nil
8384
}
8485

85-
return ctrl.Result{}, hec.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old,
86-
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(MaintenanceControllerName))
86+
// Capture only the fields this controller owns
87+
disabledCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled)
88+
evictingCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeEvicting)
89+
evicted := hv.Status.Evicted
90+
91+
return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, hec.Client, hv.Name, HypervisorMaintenanceControllerName, func(h *kvmv1.Hypervisor) {
92+
if disabledCondition != nil {
93+
meta.SetStatusCondition(&h.Status.Conditions, *disabledCondition)
94+
}
95+
if evictingCondition != nil {
96+
meta.SetStatusCondition(&h.Status.Conditions, *evictingCondition)
97+
} else {
98+
meta.RemoveStatusCondition(&h.Status.Conditions, kvmv1.ConditionTypeEvicting)
99+
}
100+
h.Status.Evicted = evicted
101+
})
87102
}
88103

89104
func (hec *HypervisorMaintenanceController) reconcileComputeService(ctx context.Context, hv *kvmv1.Hypervisor) error {

internal/controller/hypervisor_taint_controller.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"sigs.k8s.io/controller-runtime/pkg/predicate"
3131

3232
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
33+
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils"
3334
)
3435

3536
const (
@@ -84,8 +85,13 @@ func (r *HypervisorTaintController) Reconcile(ctx context.Context, req ctrl.Requ
8485
return ctrl.Result{}, nil
8586
}
8687

87-
return ctrl.Result{}, r.Status().Patch(ctx, hypervisor, k8sclient.MergeFromWithOptions(before,
88-
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorTaintControllerName))
88+
// Only set the Tainted condition this controller owns
89+
taintedCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTainted)
90+
return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hypervisor.Name, HypervisorTaintControllerName, func(h *kvmv1.Hypervisor) {
91+
if taintedCondition != nil {
92+
meta.SetStatusCondition(&h.Status.Conditions, *taintedCondition)
93+
}
94+
})
8995
}
9096

9197
func (r *HypervisorTaintController) SetupWithManager(mgr ctrl.Manager) error {

internal/controller/offboarding_controller.go

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535

3636
kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
3737
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack"
38+
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils"
3839
)
3940

4041
const (
@@ -146,30 +147,30 @@ func (r *HypervisorOffboardingReconciler) Reconcile(ctx context.Context, req ctr
146147
}
147148

148149
func (r *HypervisorOffboardingReconciler) setOffboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) {
149-
base := hv.DeepCopy()
150-
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
151-
Type: kvmv1.ConditionTypeOffboarded,
152-
Status: metav1.ConditionFalse,
153-
Reason: "Offboarding",
154-
Message: message,
150+
err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OffboardingControllerName, func(h *kvmv1.Hypervisor) {
151+
meta.SetStatusCondition(&h.Status.Conditions, metav1.Condition{
152+
Type: kvmv1.ConditionTypeOffboarded,
153+
Status: metav1.ConditionFalse,
154+
Reason: "Offboarding",
155+
Message: message,
156+
})
155157
})
156-
if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base,
157-
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil {
158+
if err != nil {
158159
return ctrl.Result{}, fmt.Errorf("cannot update hypervisor status due to %w", err)
159160
}
160161
return ctrl.Result{}, nil
161162
}
162163

163164
func (r *HypervisorOffboardingReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error {
164-
base := hv.DeepCopy()
165-
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
166-
Type: kvmv1.ConditionTypeOffboarded,
167-
Status: metav1.ConditionTrue,
168-
Reason: "Offboarded",
169-
Message: "Offboarding successful",
165+
err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OffboardingControllerName, func(h *kvmv1.Hypervisor) {
166+
meta.SetStatusCondition(&h.Status.Conditions, metav1.Condition{
167+
Type: kvmv1.ConditionTypeOffboarded,
168+
Status: metav1.ConditionTrue,
169+
Reason: "Offboarded",
170+
Message: "Offboarding successful",
171+
})
170172
})
171-
if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base,
172-
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil {
173+
if err != nil {
173174
return fmt.Errorf("cannot update hypervisor status due to %w", err)
174175
}
175176
return nil

0 commit comments

Comments
 (0)