DRA allocator: fix data race around claimsToAllocate

The `claimsToAllocate` field stores the parameter of the `Allocate` call and
therefore has to be in the per-Allocate `allocate` struct.

Without support for extended resources, all calls get the same slice, which
explains why this bug did not fail more severely and only showed up in a data
race warning during integration testing. With support for extended resources,
the result is potentially broken because each call gets different slices.
This commit is contained in:
Patrick Ohly
2025-08-18 13:44:26 +02:00
parent 17d6c9c551
commit 4ebe560c74
3 changed files with 11 additions and 11 deletions

View File

@@ -86,12 +86,11 @@ var SupportedFeatures = internal.Features{
}
type Allocator struct {
features Features
claimsToAllocate []*resourceapi.ResourceClaim
allocatedState AllocatedState
classLister DeviceClassLister
slices []*resourceapi.ResourceSlice
celCache *cel.Cache
features Features
allocatedState AllocatedState
classLister DeviceClassLister
slices []*resourceapi.ResourceSlice
celCache *cel.Cache
// availableCounters contains the available counters for individual
// ResourceSlices. It acts as a cache that is updated the first time
// the available counters are needed for each ResourceSlice. The information
@@ -139,6 +138,7 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node, claims []*resou
ctx: ctx, // all methods share the same a and thus ctx
logger: klog.FromContext(ctx),
node: node,
claimsToAllocate: claims,
deviceMatchesRequest: make(map[matchKey]bool),
constraints: make([][]constraint, len(claims)),
consumedCounters: make(map[string]counterSets),
@@ -146,7 +146,6 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node, claims []*resou
result: make([]internalAllocationResult, len(claims)),
allocatingCapacity: NewConsumedCapacityCollection(),
}
alloc.claimsToAllocate = claims
alloc.logger.V(5).Info("Starting allocation", "numClaims", len(alloc.claimsToAllocate))
defer alloc.logger.V(5).Info("Done with allocation", "success", len(finalResult) == len(alloc.claimsToAllocate), "err", finalErr)
@@ -577,6 +576,7 @@ type allocator struct {
ctx context.Context
logger klog.Logger
node *v1.Node
claimsToAllocate []*resourceapi.ResourceClaim
pools []*Pool
deviceMatchesRequest map[matchKey]bool
constraints [][]constraint // one list of constraints per claim

View File

@@ -58,7 +58,6 @@ var SupportedFeatures = internal.Features{
type Allocator struct {
features Features
claimsToAllocate []*resourceapi.ResourceClaim
allocatedDevices sets.Set[DeviceID]
classLister DeviceClassLister
slices []*resourceapi.ResourceSlice
@@ -110,13 +109,13 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node, claims []*resou
ctx: ctx, // all methods share the same a and thus ctx
logger: klog.FromContext(ctx),
node: node,
claimsToAllocate: claims,
deviceMatchesRequest: make(map[matchKey]bool),
constraints: make([][]constraint, len(claims)),
consumedCounters: make(map[string]counterSets),
requestData: make(map[requestIndices]requestData),
result: make([]internalAllocationResult, len(claims)),
}
alloc.claimsToAllocate = claims
alloc.logger.V(5).Info("Starting allocation", "numClaims", len(alloc.claimsToAllocate))
defer alloc.logger.V(5).Info("Done with allocation", "success", len(finalResult) == len(alloc.claimsToAllocate), "err", finalErr)
@@ -471,6 +470,7 @@ type allocator struct {
ctx context.Context
logger klog.Logger
node *v1.Node
claimsToAllocate []*resourceapi.ResourceClaim
pools []*Pool
deviceMatchesRequest map[matchKey]bool
constraints [][]constraint // one list of constraints per claim

View File

@@ -56,7 +56,6 @@ var SupportedFeatures = internal.Features{}
type Allocator struct {
features Features
claimsToAllocate []*resourceapi.ResourceClaim
allocatedDevices sets.Set[DeviceID]
classLister DeviceClassLister
slices []*resourceapi.ResourceSlice
@@ -101,13 +100,13 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node, claims []*resou
ctx: ctx, // all methods share the same a and thus ctx
logger: klog.FromContext(ctx),
node: node,
claimsToAllocate: claims,
deviceMatchesRequest: make(map[matchKey]bool),
constraints: make([][]constraint, len(claims)),
consumedCounters: make(map[string]counterSets),
requestData: make(map[requestIndices]requestData),
result: make([]internalAllocationResult, len(claims)),
}
alloc.claimsToAllocate = claims
alloc.logger.V(5).Info("Starting allocation", "numClaims", len(alloc.claimsToAllocate))
defer alloc.logger.V(5).Info("Done with allocation", "success", len(finalResult) == len(alloc.claimsToAllocate), "err", finalErr)
@@ -453,6 +452,7 @@ type allocator struct {
ctx context.Context
logger klog.Logger
node *v1.Node
claimsToAllocate []*resourceapi.ResourceClaim
pools []*Pool
deviceMatchesRequest map[matchKey]bool
constraints [][]constraint // one list of constraints per claim