diff --git a/pkg/controller/garbagecollector/garbagecollector.go b/pkg/controller/garbagecollector/garbagecollector.go index d945b576977..a829714b4b9 100644 --- a/pkg/controller/garbagecollector/garbagecollector.go +++ b/pkg/controller/garbagecollector/garbagecollector.go @@ -620,7 +620,7 @@ func (gc *GarbageCollector) attemptToDeleteItem(ctx context.Context, item *node) // FinalizerDeletingDependents from the item, resulting in the final // deletion of the item. policy := metav1.DeletePropagationForeground - return gc.deleteObject(item.identity, &policy) + return gc.deleteObject(item.identity, latest.ResourceVersion, latest.OwnerReferences, &policy) default: // item doesn't have any solid owner, so it needs to be garbage // collected. Also, none of item's owners is waiting for the deletion of @@ -641,7 +641,7 @@ func (gc *GarbageCollector) attemptToDeleteItem(ctx context.Context, item *node) "item", item.identity, "propagationPolicy", policy, ) - return gc.deleteObject(item.identity, &policy) + return gc.deleteObject(item.identity, latest.ResourceVersion, latest.OwnerReferences, &policy) } } diff --git a/pkg/controller/garbagecollector/garbagecollector_test.go b/pkg/controller/garbagecollector/garbagecollector_test.go index 84005d2ab6d..8e6b9ac0903 100644 --- a/pkg/controller/garbagecollector/garbagecollector_test.go +++ b/pkg/controller/garbagecollector/garbagecollector_test.go @@ -41,6 +41,7 @@ import ( "k8s.io/utils/ptr" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/meta/testrestmapper" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -2313,6 +2314,123 @@ func TestConflictingData(t *testing.T) { }), }, }, + { + // https://github.com/kubernetes/kubernetes/issues/114603 + name: "resourceVersion conflict between Get/Delete while processing object deletion", + steps: []step{ + // setup, 0,1 + createObjectInClient("", "v1", "pods", "ns1", makeMetadataObj(pod1ns1)), // good parent + createObjectInClient("", "v1", "pods", "ns1", withRV(makeMetadataObj(pod2ns1, pod1ns1), "42")), // good child + // events, 2,3 + processEvent(makeAddEvent(pod1ns1)), + processEvent(withRV(makeAddEvent(pod2ns1, pod1ns1), "42")), + // delete parent, 4,5,6 + deleteObjectFromClient("", "v1", "pods", "ns1", pod1ns1.Name), + processEvent(makeDeleteEvent(pod1ns1)), + assertState(state{ + absentOwnerCache: []objectReference{pod1ns1}, + graphNodes: []*node{ + makeNode(pod2ns1, withOwners(pod1ns1))}, + pendingAttemptToDelete: []*node{ + makeNode(pod2ns1, withOwners(pod1ns1)), + }, + }), + + // add reactor to enforce RV precondition, 7 + prependReactor("delete", "pods", (func() func(ctx stepContext) clientgotesting.ReactionFunc { + call := 0 + return func(ctx stepContext) clientgotesting.ReactionFunc { + return func(action clientgotesting.Action) (handled bool, ret runtime.Object, err error) { + call++ + deleteAction, ok := action.(clientgotesting.DeleteAction) + if !ok { + ctx.t.Error("missing DeleteAction") + return false, nil, nil + } + + preconditionRV := "" + if preconditions := deleteAction.GetDeleteOptions().Preconditions; preconditions != nil { + if rv := preconditions.ResourceVersion; rv != nil { + preconditionRV = *rv + } + } + + objectRV := "" + var metadataObj *metav1.PartialObjectMetadata + if obj, err := ctx.metadataClient.Tracker().Get(deleteAction.GetResource(), deleteAction.GetNamespace(), deleteAction.GetName(), metav1.GetOptions{}); err == nil { + metadataObj = obj.(*metav1.PartialObjectMetadata) + objectRV = metadataObj.ResourceVersion + } + + switch call { + case 1: + if preconditionRV == "42" && objectRV == "42" { + // simulate a concurrent change that modifies the owner references + ctx.t.Log("changing rv 42 --> 43 concurrently") + metadataObj.OwnerReferences = []metav1.OwnerReference{role1v1.OwnerReference} + metadataObj.ResourceVersion = "43" + if err := ctx.metadataClient.Tracker().Update(deleteAction.GetResource(), metadataObj, deleteAction.GetNamespace()); err != nil { + ctx.t.Errorf("unexpected error updating tracker: %v", err) + } + return true, nil, errors.NewConflict(deleteAction.GetResource().GroupResource(), deleteAction.GetName(), fmt.Errorf("expected 42, got 43")) + } else { + ctx.t.Errorf("expected delete with rv=42 precondition on call 1, got %q, %q", preconditionRV, objectRV) + } + case 2: + if preconditionRV == "43" && objectRV == "43" { + // simulate a concurrent change that *does not* modify the owner references + ctx.t.Log("changing rv 43 --> 44 concurrently") + metadataObj.ResourceVersion = "44" + if err := ctx.metadataClient.Tracker().Update(deleteAction.GetResource(), metadataObj, deleteAction.GetNamespace()); err != nil { + ctx.t.Errorf("unexpected error updating tracker: %v", err) + } + return true, nil, errors.NewConflict(deleteAction.GetResource().GroupResource(), deleteAction.GetName(), fmt.Errorf("expected 43, got 44")) + } else { + ctx.t.Errorf("expected delete with rv=43 precondition on call 2, got %q, %q", preconditionRV, objectRV) + } + case 3: + if preconditionRV != "" { + ctx.t.Errorf("expected delete with no rv precondition on call 3, got %q", preconditionRV) + } + default: + ctx.t.Errorf("expected delete call %d", call) + } + return false, nil, nil + } + } + })()), + + // 8,9 + processAttemptToDelete(1), + assertState(state{ + clientActions: []string{ + "get /v1, Resource=pods ns=ns1 name=podname2", // first get sees rv=42, pod1ns1 owner + "delete /v1, Resource=pods ns=ns1 name=podname2", // first delete with rv=42 precondition gets confict, triggers a live get + "get /v1, Resource=pods ns=ns1 name=podname2", // get has new ownerReferences, exits attemptToDelete + }, + absentOwnerCache: []objectReference{pod1ns1}, + graphNodes: []*node{ + makeNode(pod2ns1, withOwners(pod1ns1))}, + pendingAttemptToDelete: []*node{ + makeNode(pod2ns1, withOwners(pod1ns1))}, + }), + + // reattempt delete, 10,11 + processAttemptToDelete(1), + assertState(state{ + clientActions: []string{ + "get /v1, Resource=pods ns=ns1 name=podname2", // first get sees rv=43, role1v1 owner + "get rbac.authorization.k8s.io/v1, Resource=roles ns=ns1 name=role1", // verify missing owner + "delete /v1, Resource=pods ns=ns1 name=podname2", // first delete RV precondition triggers a live Get + "get /v1, Resource=pods ns=ns1 name=podname2", // the object has same ownerReferences, causing unconditional Delete + "delete /v1, Resource=pods ns=ns1 name=podname2", // unconditional Delete + }, + absentOwnerCache: []objectReference{pod1ns1, role1v1}, + graphNodes: []*node{ + makeNode(pod2ns1, withOwners(pod1ns1))}, + }), + }, + }, } alwaysStarted := make(chan struct{}) @@ -2369,6 +2487,7 @@ func TestConflictingData(t *testing.T) { uidToNode: make(map[types.UID]*node), }, attemptToDelete: attemptToDelete, + attemptToOrphan: attemptToOrphan, absentOwnerCache: absentOwnerCache, }, } @@ -2473,6 +2592,16 @@ func makeObj(identity objectReference, owners ...objectReference) *metaonly.Meta return obj } +func withRV[T any](obj T, rv string) T { + switch t := any(obj).(type) { + case *metav1.PartialObjectMetadata: + t.ResourceVersion = rv + case *event: + withRV(t.obj, rv) + } + return obj +} + func makeMetadataObj(identity objectReference, owners ...objectReference) *metav1.PartialObjectMetadata { obj := &metav1.PartialObjectMetadata{ TypeMeta: metav1.TypeMeta{APIVersion: identity.APIVersion, Kind: identity.Kind}, @@ -2523,6 +2652,18 @@ func processPendingGraphChanges(count int) step { } } +type reactionFuncFactory func(ctx stepContext) clientgotesting.ReactionFunc + +func prependReactor(verb, resource string, reaction reactionFuncFactory) step { + return step{ + name: "prependReactor", + check: func(ctx stepContext) { + ctx.t.Helper() + ctx.metadataClient.PrependReactor(verb, resource, reaction(ctx)) + }, + } +} + func processAttemptToDelete(count int) step { return step{ name: "processAttemptToDelete", diff --git a/pkg/controller/garbagecollector/operations.go b/pkg/controller/garbagecollector/operations.go index c15cb11c6a0..a3000252728 100644 --- a/pkg/controller/garbagecollector/operations.go +++ b/pkg/controller/garbagecollector/operations.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "reflect" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -49,15 +50,33 @@ func (gc *GarbageCollector) apiResource(apiVersion, kind string) (schema.GroupVe return mapping.Resource, mapping.Scope == meta.RESTScopeNamespace, nil } -func (gc *GarbageCollector) deleteObject(item objectReference, policy *metav1.DeletionPropagation) error { +func (gc *GarbageCollector) deleteObject(item objectReference, resourceVersion string, ownersAtResourceVersion []metav1.OwnerReference, policy *metav1.DeletionPropagation) error { resource, namespaced, err := gc.apiResource(item.APIVersion, item.Kind) if err != nil { return err } uid := item.UID preconditions := metav1.Preconditions{UID: &uid} + if len(resourceVersion) > 0 { + preconditions.ResourceVersion = &resourceVersion + } deleteOptions := metav1.DeleteOptions{Preconditions: &preconditions, PropagationPolicy: policy} - return gc.metadataClient.Resource(resource).Namespace(resourceDefaultNamespace(namespaced, item.Namespace)).Delete(context.TODO(), item.Name, deleteOptions) + resourceClient := gc.metadataClient.Resource(resource).Namespace(resourceDefaultNamespace(namespaced, item.Namespace)) + err = resourceClient.Delete(context.TODO(), item.Name, deleteOptions) + if errors.IsConflict(err) && len(resourceVersion) > 0 { + // check if the ownerReferences changed + liveObject, liveErr := resourceClient.Get(context.TODO(), item.Name, metav1.GetOptions{}) + if errors.IsNotFound(liveErr) { + // object we wanted to delete is gone, success! + return nil + } + if liveErr == nil && liveObject.UID == item.UID && liveObject.ResourceVersion != resourceVersion && reflect.DeepEqual(liveObject.OwnerReferences, ownersAtResourceVersion) { + // object changed, causing a conflict error, but ownerReferences did not change. + // retry delete without resourceVersion precondition so rapid updates unrelated to ownerReferences don't starve GC + return gc.deleteObject(item, "", nil, policy) + } + } + return err } func (gc *GarbageCollector) getObject(item objectReference) (*metav1.PartialObjectMetadata, error) { diff --git a/staging/src/k8s.io/client-go/dynamic/fake/simple.go b/staging/src/k8s.io/client-go/dynamic/fake/simple.go index af02409427b..43d908051c7 100644 --- a/staging/src/k8s.io/client-go/dynamic/fake/simple.go +++ b/staging/src/k8s.io/client-go/dynamic/fake/simple.go @@ -279,19 +279,19 @@ func (c *dynamicResourceClient) Delete(ctx context.Context, name string, opts me switch { case len(c.namespace) == 0 && len(subresources) == 0: _, err = c.client.Fake. - Invokes(testing.NewRootDeleteAction(c.resource, name), &metav1.Status{Status: "dynamic delete fail"}) + Invokes(testing.NewRootDeleteActionWithOptions(c.resource, name, opts), &metav1.Status{Status: "dynamic delete fail"}) case len(c.namespace) == 0 && len(subresources) > 0: _, err = c.client.Fake. - Invokes(testing.NewRootDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), name), &metav1.Status{Status: "dynamic delete fail"}) + Invokes(testing.NewRootDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), name, opts), &metav1.Status{Status: "dynamic delete fail"}) case len(c.namespace) > 0 && len(subresources) == 0: _, err = c.client.Fake. - Invokes(testing.NewDeleteAction(c.resource, c.namespace, name), &metav1.Status{Status: "dynamic delete fail"}) + Invokes(testing.NewDeleteActionWithOptions(c.resource, c.namespace, name, opts), &metav1.Status{Status: "dynamic delete fail"}) case len(c.namespace) > 0 && len(subresources) > 0: _, err = c.client.Fake. - Invokes(testing.NewDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), c.namespace, name), &metav1.Status{Status: "dynamic delete fail"}) + Invokes(testing.NewDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), c.namespace, name, opts), &metav1.Status{Status: "dynamic delete fail"}) } return err diff --git a/staging/src/k8s.io/client-go/metadata/fake/simple.go b/staging/src/k8s.io/client-go/metadata/fake/simple.go index 495f9eadd3b..47beb1f8de3 100644 --- a/staging/src/k8s.io/client-go/metadata/fake/simple.go +++ b/staging/src/k8s.io/client-go/metadata/fake/simple.go @@ -235,19 +235,19 @@ func (c *metadataResourceClient) Delete(ctx context.Context, name string, opts m switch { case len(c.namespace) == 0 && len(subresources) == 0: _, err = c.client.Fake. - Invokes(testing.NewRootDeleteAction(c.resource, name), &metav1.Status{Status: "metadata delete fail"}) + Invokes(testing.NewRootDeleteActionWithOptions(c.resource, name, opts), &metav1.Status{Status: "metadata delete fail"}) case len(c.namespace) == 0 && len(subresources) > 0: _, err = c.client.Fake. - Invokes(testing.NewRootDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), name), &metav1.Status{Status: "metadata delete fail"}) + Invokes(testing.NewRootDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), name, opts), &metav1.Status{Status: "metadata delete fail"}) case len(c.namespace) > 0 && len(subresources) == 0: _, err = c.client.Fake. - Invokes(testing.NewDeleteAction(c.resource, c.namespace, name), &metav1.Status{Status: "metadata delete fail"}) + Invokes(testing.NewDeleteActionWithOptions(c.resource, c.namespace, name, opts), &metav1.Status{Status: "metadata delete fail"}) case len(c.namespace) > 0 && len(subresources) > 0: _, err = c.client.Fake. - Invokes(testing.NewDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), c.namespace, name), &metav1.Status{Status: "metadata delete fail"}) + Invokes(testing.NewDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), c.namespace, name, opts), &metav1.Status{Status: "metadata delete fail"}) } return err