Skip to content
This repository was archived by the owner on Sep 24, 2025. It is now read-only.

Commit dab4cc0

Browse files
authored
fix(hooks): always remove finalizers on create if hook exists (#770)
* fix(hooks): always remove finalizers Signed-off-by: Alexandre Gaudreault <[email protected]> * unit test Signed-off-by: Alexandre Gaudreault <[email protected]> --------- Signed-off-by: Alexandre Gaudreault <[email protected]>
1 parent dc952c1 commit dab4cc0

File tree

2 files changed

+153
-20
lines changed

2 files changed

+153
-20
lines changed

pkg/sync/sync_context.go

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -663,11 +663,7 @@ func (sc *syncContext) removeHookFinalizer(task *syncTask) error {
663663
updateErr := sc.updateResource(task)
664664
if apierrors.IsConflict(updateErr) {
665665
sc.log.WithValues("task", task).V(1).Info("Retrying hook finalizer removal due to conflict on update")
666-
resIf, err := sc.getResourceIf(task, "get")
667-
if err != nil {
668-
return fmt.Errorf("failed to get resource interface: %w", err)
669-
}
670-
liveObj, err := resIf.Get(context.TODO(), task.liveObj.GetName(), metav1.GetOptions{})
666+
liveObj, err := sc.getResource(task)
671667
if apierrors.IsNotFound(err) {
672668
sc.log.WithValues("task", task).V(1).Info("Resource is already deleted")
673669
return nil
@@ -687,6 +683,19 @@ func (sc *syncContext) removeHookFinalizer(task *syncTask) error {
687683
})
688684
}
689685

686+
func (sc *syncContext) getResource(task *syncTask) (*unstructured.Unstructured, error) {
687+
sc.log.WithValues("task", task).V(1).Info("Getting resource")
688+
resIf, err := sc.getResourceIf(task, "get")
689+
if err != nil {
690+
return nil, err
691+
}
692+
liveObj, err := resIf.Get(context.TODO(), task.name(), metav1.GetOptions{})
693+
if err != nil {
694+
return nil, fmt.Errorf("failed to get resource: %w", err)
695+
}
696+
return liveObj, nil
697+
}
698+
690699
func (sc *syncContext) updateResource(task *syncTask) error {
691700
sc.log.WithValues("task", task).V(1).Info("Updating resource")
692701
resIf, err := sc.getResourceIf(task, "update")
@@ -1367,6 +1376,31 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState {
13671376
createTasks = append(createTasks, task)
13681377
}
13691378
}
1379+
1380+
// remove finalizers from previous sync on existing hooks to make sure the operation is idempotent
1381+
{
1382+
ss := newStateSync(state)
1383+
existingHooks := tasks.Filter(func(t *syncTask) bool { return t.isHook() && t.pending() && t.liveObj != nil })
1384+
for _, task := range existingHooks {
1385+
t := task
1386+
ss.Go(func(state runState) runState {
1387+
logCtx := sc.log.WithValues("dryRun", dryRun, "task", t)
1388+
logCtx.V(1).Info("Removing finalizers")
1389+
if !dryRun {
1390+
if err := sc.removeHookFinalizer(t); err != nil {
1391+
state = failed
1392+
sc.setResourceResult(t, t.syncStatus, common.OperationError, fmt.Sprintf("failed to remove hook finalizer: %v", err))
1393+
}
1394+
}
1395+
return state
1396+
})
1397+
}
1398+
state = ss.Wait()
1399+
}
1400+
if state != successful {
1401+
return state
1402+
}
1403+
13701404
// prune first
13711405
{
13721406
if !sc.pruneConfirmed {
@@ -1418,15 +1452,19 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState {
14181452
for _, task := range hooksPendingDeletion {
14191453
t := task
14201454
ss.Go(func(state runState) runState {
1421-
sc.log.WithValues("dryRun", dryRun, "task", t).V(1).Info("Deleting")
1455+
log := sc.log.WithValues("dryRun", dryRun, "task", t).V(1)
1456+
log.Info("Deleting")
14221457
if !dryRun {
14231458
err := sc.deleteResource(t)
14241459
if err != nil {
14251460
// it is possible to get a race condition here, such that the resource does not exist when
1426-
// delete is requested, we treat this as a nop
1461+
// delete is requested, we treat this as a nopand remove the liveObj
14271462
if !apierrors.IsNotFound(err) {
14281463
state = failed
1429-
sc.setResourceResult(t, "", common.OperationError, fmt.Sprintf("failed to delete resource: %v", err))
1464+
sc.setResourceResult(t, t.syncStatus, common.OperationError, fmt.Sprintf("failed to delete resource: %v", err))
1465+
} else {
1466+
log.Info("Resource not found, treating as no-op and removing liveObj")
1467+
t.liveObj = nil
14301468
}
14311469
} else {
14321470
// if there is anything that needs deleting, we are at best now in pending and

pkg/sync/sync_context_test.go

Lines changed: 107 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,15 @@ func newTestSyncCtx(getResourceFunc *func(ctx context.Context, config *rest.Conf
4646
&metav1.APIResourceList{
4747
GroupVersion: "v1",
4848
APIResources: []metav1.APIResource{
49-
{Kind: "Pod", Group: "", Version: "v1", Namespaced: true, Verbs: standardVerbs},
50-
{Kind: "Service", Group: "", Version: "v1", Namespaced: true, Verbs: standardVerbs},
51-
{Kind: "Namespace", Group: "", Version: "v1", Namespaced: false, Verbs: standardVerbs},
49+
{Name: "pods", Kind: "Pod", Group: "", Version: "v1", Namespaced: true, Verbs: standardVerbs},
50+
{Name: "services", Kind: "Service", Group: "", Version: "v1", Namespaced: true, Verbs: standardVerbs},
51+
{Name: "namespaces", Kind: "Namespace", Group: "", Version: "v1", Namespaced: false, Verbs: standardVerbs},
5252
},
5353
},
5454
&metav1.APIResourceList{
5555
GroupVersion: "apps/v1",
5656
APIResources: []metav1.APIResource{
57-
{Kind: "Deployment", Group: "apps", Version: "v1", Namespaced: true, Verbs: standardVerbs},
57+
{Name: "deployments", Kind: "Deployment", Group: "apps", Version: "v1", Namespaced: true, Verbs: standardVerbs},
5858
},
5959
})
6060
sc := syncContext{
@@ -854,6 +854,39 @@ func withReplaceAndServerSideApplyAnnotations(un *unstructured.Unstructured) *un
854854
return un
855855
}
856856

857+
func TestSync_HookWithReplaceAndBeforeHookCreation_AlreadyDeleted(t *testing.T) {
858+
// This test a race condition when Delete is called on an already deleted object
859+
// LiveObj is set, but then the resource is deleted asynchronously in kubernetes
860+
syncCtx := newTestSyncCtx(nil)
861+
862+
target := withReplaceAnnotation(testingutils.NewPod())
863+
target.SetNamespace(testingutils.FakeArgoCDNamespace)
864+
target = testingutils.Annotate(target, synccommon.AnnotationKeyHookDeletePolicy, string(synccommon.HookDeletePolicyBeforeHookCreation))
865+
target = testingutils.Annotate(target, synccommon.AnnotationKeyHook, string(synccommon.SyncPhasePreSync))
866+
live := target.DeepCopy()
867+
868+
syncCtx.resources = groupResources(ReconciliationResult{
869+
Live: []*unstructured.Unstructured{live},
870+
Target: []*unstructured.Unstructured{target},
871+
})
872+
syncCtx.hooks = []*unstructured.Unstructured{live}
873+
874+
client := fake.NewSimpleDynamicClient(runtime.NewScheme())
875+
deleted := false
876+
client.PrependReactor("delete", "pods", func(_ testcore.Action) (bool, runtime.Object, error) {
877+
deleted = true
878+
// simulate the race conditions where liveObj was not null, but is now deleted in k8s
879+
return true, nil, apierrors.NewNotFound(corev1.Resource("pods"), live.GetName())
880+
})
881+
syncCtx.dynamicIf = client
882+
883+
syncCtx.Sync()
884+
885+
resourceOps, _ := syncCtx.resourceOps.(*kubetest.MockResourceOps)
886+
assert.Equal(t, "create", resourceOps.GetLastResourceCommand(kube.GetResourceKey(target)))
887+
assert.True(t, deleted)
888+
}
889+
857890
func TestSync_ServerSideApply(t *testing.T) {
858891
testCases := []struct {
859892
name string
@@ -1285,22 +1318,84 @@ func TestSyncFailureHookWithFailedSync(t *testing.T) {
12851318
}
12861319

12871320
func TestBeforeHookCreation(t *testing.T) {
1321+
finalizerRemoved := false
12881322
syncCtx := newTestSyncCtx(nil)
1289-
hook := testingutils.Annotate(testingutils.Annotate(testingutils.NewPod(), synccommon.AnnotationKeyHook, "Sync"), synccommon.AnnotationKeyHookDeletePolicy, "BeforeHookCreation")
1290-
hook.SetNamespace(testingutils.FakeArgoCDNamespace)
1323+
hookObj := testingutils.Annotate(testingutils.Annotate(testingutils.NewPod(), synccommon.AnnotationKeyHook, "Sync"), synccommon.AnnotationKeyHookDeletePolicy, "BeforeHookCreation")
1324+
hookObj.SetFinalizers([]string{hook.HookFinalizer})
1325+
hookObj.SetNamespace(testingutils.FakeArgoCDNamespace)
12911326
syncCtx.resources = groupResources(ReconciliationResult{
1292-
Live: []*unstructured.Unstructured{hook},
1327+
Live: []*unstructured.Unstructured{hookObj},
12931328
Target: []*unstructured.Unstructured{nil},
12941329
})
1295-
syncCtx.hooks = []*unstructured.Unstructured{hook}
1296-
syncCtx.dynamicIf = fake.NewSimpleDynamicClient(runtime.NewScheme())
1330+
syncCtx.hooks = []*unstructured.Unstructured{hookObj}
1331+
client := fake.NewSimpleDynamicClient(runtime.NewScheme(), hookObj)
1332+
client.PrependReactor("update", "pods", func(_ testcore.Action) (bool, runtime.Object, error) {
1333+
finalizerRemoved = true
1334+
return false, nil, nil
1335+
})
1336+
syncCtx.dynamicIf = client
12971337

1338+
// First sync will delete the existing hook
12981339
syncCtx.Sync()
1340+
phase, _, _ := syncCtx.GetState()
1341+
assert.Equal(t, synccommon.OperationRunning, phase)
1342+
assert.True(t, finalizerRemoved)
12991343

1300-
_, _, resources := syncCtx.GetState()
1344+
// Second sync will create the hook
1345+
syncCtx.Sync()
1346+
phase, message, resources := syncCtx.GetState()
1347+
assert.Equal(t, synccommon.OperationRunning, phase)
13011348
assert.Len(t, resources, 1)
1302-
assert.Empty(t, resources[0].Message)
1303-
assert.Equal(t, "waiting for completion of hook /Pod/my-pod", syncCtx.message)
1349+
assert.Equal(t, synccommon.OperationRunning, resources[0].HookPhase)
1350+
assert.Equal(t, "waiting for completion of hook /Pod/my-pod", message)
1351+
}
1352+
1353+
func TestSync_ExistingHooksWithFinalizer(t *testing.T) {
1354+
newHook := func(name string, hookType synccommon.HookType, deletePolicy synccommon.HookDeletePolicy) *unstructured.Unstructured {
1355+
obj := testingutils.NewPod()
1356+
obj.SetName(name)
1357+
obj.SetNamespace(testingutils.FakeArgoCDNamespace)
1358+
testingutils.Annotate(obj, synccommon.AnnotationKeyHook, string(hookType))
1359+
testingutils.Annotate(obj, synccommon.AnnotationKeyHookDeletePolicy, string(deletePolicy))
1360+
obj.SetFinalizers([]string{hook.HookFinalizer})
1361+
return obj
1362+
}
1363+
1364+
hook1 := newHook("existing-hook-1", synccommon.HookTypePreSync, synccommon.HookDeletePolicyBeforeHookCreation)
1365+
hook2 := newHook("existing-hook-2", synccommon.HookTypePreSync, synccommon.HookDeletePolicyHookFailed)
1366+
hook3 := newHook("existing-hook-3", synccommon.HookTypePreSync, synccommon.HookDeletePolicyHookSucceeded)
1367+
1368+
syncCtx := newTestSyncCtx(nil)
1369+
fakeDynamicClient := fake.NewSimpleDynamicClient(runtime.NewScheme(), hook1, hook2, hook3)
1370+
syncCtx.dynamicIf = fakeDynamicClient
1371+
updatedCount := 0
1372+
fakeDynamicClient.PrependReactor("update", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
1373+
// Removing the finalizers
1374+
updatedCount++
1375+
return false, nil, nil
1376+
})
1377+
deletedCount := 0
1378+
fakeDynamicClient.PrependReactor("delete", "*", func(_ testcore.Action) (handled bool, ret runtime.Object, err error) {
1379+
// because of HookDeletePolicyBeforeHookCreation
1380+
deletedCount++
1381+
return false, nil, nil
1382+
})
1383+
syncCtx.resources = groupResources(ReconciliationResult{
1384+
Live: []*unstructured.Unstructured{hook1, hook2, hook3},
1385+
Target: []*unstructured.Unstructured{nil, nil, nil},
1386+
})
1387+
syncCtx.hooks = []*unstructured.Unstructured{hook1, hook2, hook3}
1388+
1389+
syncCtx.Sync()
1390+
phase, _, _ := syncCtx.GetState()
1391+
1392+
assert.Equal(t, synccommon.OperationRunning, phase)
1393+
assert.Equal(t, 3, updatedCount)
1394+
assert.Equal(t, 1, deletedCount)
1395+
1396+
_, err := syncCtx.getResource(&syncTask{liveObj: hook1})
1397+
require.Error(t, err, "Expected resource to be deleted")
1398+
assert.True(t, apierrors.IsNotFound(err))
13041399
}
13051400

13061401
func TestRunSyncFailHooksFailed(t *testing.T) {

0 commit comments

Comments
 (0)