mirror of
https://github.com/outbackdingo/kubernetes.git
synced 2026-01-27 10:19:35 +00:00
Merge pull request #132632 from sdowell/gc-rv-race
fix: add RV check on GC delete calls
This commit is contained in:
@@ -620,7 +620,7 @@ func (gc *GarbageCollector) attemptToDeleteItem(ctx context.Context, item *node)
|
||||
// FinalizerDeletingDependents from the item, resulting in the final
|
||||
// deletion of the item.
|
||||
policy := metav1.DeletePropagationForeground
|
||||
return gc.deleteObject(item.identity, &policy)
|
||||
return gc.deleteObject(item.identity, latest.ResourceVersion, latest.OwnerReferences, &policy)
|
||||
default:
|
||||
// item doesn't have any solid owner, so it needs to be garbage
|
||||
// collected. Also, none of item's owners is waiting for the deletion of
|
||||
@@ -641,7 +641,7 @@ func (gc *GarbageCollector) attemptToDeleteItem(ctx context.Context, item *node)
|
||||
"item", item.identity,
|
||||
"propagationPolicy", policy,
|
||||
)
|
||||
return gc.deleteObject(item.identity, &policy)
|
||||
return gc.deleteObject(item.identity, latest.ResourceVersion, latest.OwnerReferences, &policy)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ import (
|
||||
"k8s.io/utils/ptr"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
"k8s.io/apimachinery/pkg/api/meta/testrestmapper"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
@@ -2313,6 +2314,123 @@ func TestConflictingData(t *testing.T) {
|
||||
}),
|
||||
},
|
||||
},
|
||||
{
|
||||
// https://github.com/kubernetes/kubernetes/issues/114603
|
||||
name: "resourceVersion conflict between Get/Delete while processing object deletion",
|
||||
steps: []step{
|
||||
// setup, 0,1
|
||||
createObjectInClient("", "v1", "pods", "ns1", makeMetadataObj(pod1ns1)), // good parent
|
||||
createObjectInClient("", "v1", "pods", "ns1", withRV(makeMetadataObj(pod2ns1, pod1ns1), "42")), // good child
|
||||
// events, 2,3
|
||||
processEvent(makeAddEvent(pod1ns1)),
|
||||
processEvent(withRV(makeAddEvent(pod2ns1, pod1ns1), "42")),
|
||||
// delete parent, 4,5,6
|
||||
deleteObjectFromClient("", "v1", "pods", "ns1", pod1ns1.Name),
|
||||
processEvent(makeDeleteEvent(pod1ns1)),
|
||||
assertState(state{
|
||||
absentOwnerCache: []objectReference{pod1ns1},
|
||||
graphNodes: []*node{
|
||||
makeNode(pod2ns1, withOwners(pod1ns1))},
|
||||
pendingAttemptToDelete: []*node{
|
||||
makeNode(pod2ns1, withOwners(pod1ns1)),
|
||||
},
|
||||
}),
|
||||
|
||||
// add reactor to enforce RV precondition, 7
|
||||
prependReactor("delete", "pods", (func() func(ctx stepContext) clientgotesting.ReactionFunc {
|
||||
call := 0
|
||||
return func(ctx stepContext) clientgotesting.ReactionFunc {
|
||||
return func(action clientgotesting.Action) (handled bool, ret runtime.Object, err error) {
|
||||
call++
|
||||
deleteAction, ok := action.(clientgotesting.DeleteAction)
|
||||
if !ok {
|
||||
ctx.t.Error("missing DeleteAction")
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
preconditionRV := ""
|
||||
if preconditions := deleteAction.GetDeleteOptions().Preconditions; preconditions != nil {
|
||||
if rv := preconditions.ResourceVersion; rv != nil {
|
||||
preconditionRV = *rv
|
||||
}
|
||||
}
|
||||
|
||||
objectRV := ""
|
||||
var metadataObj *metav1.PartialObjectMetadata
|
||||
if obj, err := ctx.metadataClient.Tracker().Get(deleteAction.GetResource(), deleteAction.GetNamespace(), deleteAction.GetName(), metav1.GetOptions{}); err == nil {
|
||||
metadataObj = obj.(*metav1.PartialObjectMetadata)
|
||||
objectRV = metadataObj.ResourceVersion
|
||||
}
|
||||
|
||||
switch call {
|
||||
case 1:
|
||||
if preconditionRV == "42" && objectRV == "42" {
|
||||
// simulate a concurrent change that modifies the owner references
|
||||
ctx.t.Log("changing rv 42 --> 43 concurrently")
|
||||
metadataObj.OwnerReferences = []metav1.OwnerReference{role1v1.OwnerReference}
|
||||
metadataObj.ResourceVersion = "43"
|
||||
if err := ctx.metadataClient.Tracker().Update(deleteAction.GetResource(), metadataObj, deleteAction.GetNamespace()); err != nil {
|
||||
ctx.t.Errorf("unexpected error updating tracker: %v", err)
|
||||
}
|
||||
return true, nil, errors.NewConflict(deleteAction.GetResource().GroupResource(), deleteAction.GetName(), fmt.Errorf("expected 42, got 43"))
|
||||
} else {
|
||||
ctx.t.Errorf("expected delete with rv=42 precondition on call 1, got %q, %q", preconditionRV, objectRV)
|
||||
}
|
||||
case 2:
|
||||
if preconditionRV == "43" && objectRV == "43" {
|
||||
// simulate a concurrent change that *does not* modify the owner references
|
||||
ctx.t.Log("changing rv 43 --> 44 concurrently")
|
||||
metadataObj.ResourceVersion = "44"
|
||||
if err := ctx.metadataClient.Tracker().Update(deleteAction.GetResource(), metadataObj, deleteAction.GetNamespace()); err != nil {
|
||||
ctx.t.Errorf("unexpected error updating tracker: %v", err)
|
||||
}
|
||||
return true, nil, errors.NewConflict(deleteAction.GetResource().GroupResource(), deleteAction.GetName(), fmt.Errorf("expected 43, got 44"))
|
||||
} else {
|
||||
ctx.t.Errorf("expected delete with rv=43 precondition on call 2, got %q, %q", preconditionRV, objectRV)
|
||||
}
|
||||
case 3:
|
||||
if preconditionRV != "" {
|
||||
ctx.t.Errorf("expected delete with no rv precondition on call 3, got %q", preconditionRV)
|
||||
}
|
||||
default:
|
||||
ctx.t.Errorf("expected delete call %d", call)
|
||||
}
|
||||
return false, nil, nil
|
||||
}
|
||||
}
|
||||
})()),
|
||||
|
||||
// 8,9
|
||||
processAttemptToDelete(1),
|
||||
assertState(state{
|
||||
clientActions: []string{
|
||||
"get /v1, Resource=pods ns=ns1 name=podname2", // first get sees rv=42, pod1ns1 owner
|
||||
"delete /v1, Resource=pods ns=ns1 name=podname2", // first delete with rv=42 precondition gets confict, triggers a live get
|
||||
"get /v1, Resource=pods ns=ns1 name=podname2", // get has new ownerReferences, exits attemptToDelete
|
||||
},
|
||||
absentOwnerCache: []objectReference{pod1ns1},
|
||||
graphNodes: []*node{
|
||||
makeNode(pod2ns1, withOwners(pod1ns1))},
|
||||
pendingAttemptToDelete: []*node{
|
||||
makeNode(pod2ns1, withOwners(pod1ns1))},
|
||||
}),
|
||||
|
||||
// reattempt delete, 10,11
|
||||
processAttemptToDelete(1),
|
||||
assertState(state{
|
||||
clientActions: []string{
|
||||
"get /v1, Resource=pods ns=ns1 name=podname2", // first get sees rv=43, role1v1 owner
|
||||
"get rbac.authorization.k8s.io/v1, Resource=roles ns=ns1 name=role1", // verify missing owner
|
||||
"delete /v1, Resource=pods ns=ns1 name=podname2", // first delete RV precondition triggers a live Get
|
||||
"get /v1, Resource=pods ns=ns1 name=podname2", // the object has same ownerReferences, causing unconditional Delete
|
||||
"delete /v1, Resource=pods ns=ns1 name=podname2", // unconditional Delete
|
||||
},
|
||||
absentOwnerCache: []objectReference{pod1ns1, role1v1},
|
||||
graphNodes: []*node{
|
||||
makeNode(pod2ns1, withOwners(pod1ns1))},
|
||||
}),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
alwaysStarted := make(chan struct{})
|
||||
@@ -2369,6 +2487,7 @@ func TestConflictingData(t *testing.T) {
|
||||
uidToNode: make(map[types.UID]*node),
|
||||
},
|
||||
attemptToDelete: attemptToDelete,
|
||||
attemptToOrphan: attemptToOrphan,
|
||||
absentOwnerCache: absentOwnerCache,
|
||||
},
|
||||
}
|
||||
@@ -2473,6 +2592,16 @@ func makeObj(identity objectReference, owners ...objectReference) *metaonly.Meta
|
||||
return obj
|
||||
}
|
||||
|
||||
func withRV[T any](obj T, rv string) T {
|
||||
switch t := any(obj).(type) {
|
||||
case *metav1.PartialObjectMetadata:
|
||||
t.ResourceVersion = rv
|
||||
case *event:
|
||||
withRV(t.obj, rv)
|
||||
}
|
||||
return obj
|
||||
}
|
||||
|
||||
func makeMetadataObj(identity objectReference, owners ...objectReference) *metav1.PartialObjectMetadata {
|
||||
obj := &metav1.PartialObjectMetadata{
|
||||
TypeMeta: metav1.TypeMeta{APIVersion: identity.APIVersion, Kind: identity.Kind},
|
||||
@@ -2523,6 +2652,18 @@ func processPendingGraphChanges(count int) step {
|
||||
}
|
||||
}
|
||||
|
||||
type reactionFuncFactory func(ctx stepContext) clientgotesting.ReactionFunc
|
||||
|
||||
func prependReactor(verb, resource string, reaction reactionFuncFactory) step {
|
||||
return step{
|
||||
name: "prependReactor",
|
||||
check: func(ctx stepContext) {
|
||||
ctx.t.Helper()
|
||||
ctx.metadataClient.PrependReactor(verb, resource, reaction(ctx))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func processAttemptToDelete(count int) step {
|
||||
return step{
|
||||
name: "processAttemptToDelete",
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
@@ -49,15 +50,33 @@ func (gc *GarbageCollector) apiResource(apiVersion, kind string) (schema.GroupVe
|
||||
return mapping.Resource, mapping.Scope == meta.RESTScopeNamespace, nil
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) deleteObject(item objectReference, policy *metav1.DeletionPropagation) error {
|
||||
func (gc *GarbageCollector) deleteObject(item objectReference, resourceVersion string, ownersAtResourceVersion []metav1.OwnerReference, policy *metav1.DeletionPropagation) error {
|
||||
resource, namespaced, err := gc.apiResource(item.APIVersion, item.Kind)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
uid := item.UID
|
||||
preconditions := metav1.Preconditions{UID: &uid}
|
||||
if len(resourceVersion) > 0 {
|
||||
preconditions.ResourceVersion = &resourceVersion
|
||||
}
|
||||
deleteOptions := metav1.DeleteOptions{Preconditions: &preconditions, PropagationPolicy: policy}
|
||||
return gc.metadataClient.Resource(resource).Namespace(resourceDefaultNamespace(namespaced, item.Namespace)).Delete(context.TODO(), item.Name, deleteOptions)
|
||||
resourceClient := gc.metadataClient.Resource(resource).Namespace(resourceDefaultNamespace(namespaced, item.Namespace))
|
||||
err = resourceClient.Delete(context.TODO(), item.Name, deleteOptions)
|
||||
if errors.IsConflict(err) && len(resourceVersion) > 0 {
|
||||
// check if the ownerReferences changed
|
||||
liveObject, liveErr := resourceClient.Get(context.TODO(), item.Name, metav1.GetOptions{})
|
||||
if errors.IsNotFound(liveErr) {
|
||||
// object we wanted to delete is gone, success!
|
||||
return nil
|
||||
}
|
||||
if liveErr == nil && liveObject.UID == item.UID && liveObject.ResourceVersion != resourceVersion && reflect.DeepEqual(liveObject.OwnerReferences, ownersAtResourceVersion) {
|
||||
// object changed, causing a conflict error, but ownerReferences did not change.
|
||||
// retry delete without resourceVersion precondition so rapid updates unrelated to ownerReferences don't starve GC
|
||||
return gc.deleteObject(item, "", nil, policy)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (gc *GarbageCollector) getObject(item objectReference) (*metav1.PartialObjectMetadata, error) {
|
||||
|
||||
@@ -279,19 +279,19 @@ func (c *dynamicResourceClient) Delete(ctx context.Context, name string, opts me
|
||||
switch {
|
||||
case len(c.namespace) == 0 && len(subresources) == 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewRootDeleteAction(c.resource, name), &metav1.Status{Status: "dynamic delete fail"})
|
||||
Invokes(testing.NewRootDeleteActionWithOptions(c.resource, name, opts), &metav1.Status{Status: "dynamic delete fail"})
|
||||
|
||||
case len(c.namespace) == 0 && len(subresources) > 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewRootDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), name), &metav1.Status{Status: "dynamic delete fail"})
|
||||
Invokes(testing.NewRootDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), name, opts), &metav1.Status{Status: "dynamic delete fail"})
|
||||
|
||||
case len(c.namespace) > 0 && len(subresources) == 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewDeleteAction(c.resource, c.namespace, name), &metav1.Status{Status: "dynamic delete fail"})
|
||||
Invokes(testing.NewDeleteActionWithOptions(c.resource, c.namespace, name, opts), &metav1.Status{Status: "dynamic delete fail"})
|
||||
|
||||
case len(c.namespace) > 0 && len(subresources) > 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), c.namespace, name), &metav1.Status{Status: "dynamic delete fail"})
|
||||
Invokes(testing.NewDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), c.namespace, name, opts), &metav1.Status{Status: "dynamic delete fail"})
|
||||
}
|
||||
|
||||
return err
|
||||
|
||||
@@ -235,19 +235,19 @@ func (c *metadataResourceClient) Delete(ctx context.Context, name string, opts m
|
||||
switch {
|
||||
case len(c.namespace) == 0 && len(subresources) == 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewRootDeleteAction(c.resource, name), &metav1.Status{Status: "metadata delete fail"})
|
||||
Invokes(testing.NewRootDeleteActionWithOptions(c.resource, name, opts), &metav1.Status{Status: "metadata delete fail"})
|
||||
|
||||
case len(c.namespace) == 0 && len(subresources) > 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewRootDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), name), &metav1.Status{Status: "metadata delete fail"})
|
||||
Invokes(testing.NewRootDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), name, opts), &metav1.Status{Status: "metadata delete fail"})
|
||||
|
||||
case len(c.namespace) > 0 && len(subresources) == 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewDeleteAction(c.resource, c.namespace, name), &metav1.Status{Status: "metadata delete fail"})
|
||||
Invokes(testing.NewDeleteActionWithOptions(c.resource, c.namespace, name, opts), &metav1.Status{Status: "metadata delete fail"})
|
||||
|
||||
case len(c.namespace) > 0 && len(subresources) > 0:
|
||||
_, err = c.client.Fake.
|
||||
Invokes(testing.NewDeleteSubresourceAction(c.resource, strings.Join(subresources, "/"), c.namespace, name), &metav1.Status{Status: "metadata delete fail"})
|
||||
Invokes(testing.NewDeleteSubresourceActionWithOptions(c.resource, strings.Join(subresources, "/"), c.namespace, name, opts), &metav1.Status{Status: "metadata delete fail"})
|
||||
}
|
||||
|
||||
return err
|
||||
|
||||
Reference in New Issue
Block a user