mirror of
				https://github.com/optim-enterprises-bv/kubernetes.git
				synced 2025-11-03 19:58:17 +00:00 
			
		
		
		
	Merge pull request #126303 from bart0sh/PR150-dra-refactor-checkpoint-upstream
DRA: refactor checkpointing
This commit is contained in:
		@@ -40,8 +40,8 @@ type ClaimInfo struct {
 | 
				
			|||||||
// claimInfoCache is a cache of processed resource claims keyed by namespace/claimname.
 | 
					// claimInfoCache is a cache of processed resource claims keyed by namespace/claimname.
 | 
				
			||||||
type claimInfoCache struct {
 | 
					type claimInfoCache struct {
 | 
				
			||||||
	sync.RWMutex
 | 
						sync.RWMutex
 | 
				
			||||||
	state     state.CheckpointState
 | 
						checkpointer state.Checkpointer
 | 
				
			||||||
	claimInfo map[string]*ClaimInfo
 | 
						claimInfo    map[string]*ClaimInfo
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// newClaimInfoFromClaim creates a new claim info from a resource claim.
 | 
					// newClaimInfoFromClaim creates a new claim info from a resource claim.
 | 
				
			||||||
@@ -77,12 +77,12 @@ func newClaimInfoFromState(state *state.ClaimInfoState) *ClaimInfo {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// setCDIDevices adds a set of CDI devices to the claim info.
 | 
					// setCDIDevices adds a set of CDI devices to the claim info.
 | 
				
			||||||
func (info *ClaimInfo) addDevice(driverName string, device state.Device) {
 | 
					func (info *ClaimInfo) addDevice(driverName string, deviceState state.Device) {
 | 
				
			||||||
	if info.DriverState == nil {
 | 
						if info.DriverState == nil {
 | 
				
			||||||
		info.DriverState = make(map[string]state.DriverState)
 | 
							info.DriverState = make(map[string]state.DriverState)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	driverState := info.DriverState[driverName]
 | 
						driverState := info.DriverState[driverName]
 | 
				
			||||||
	driverState.Devices = append(driverState.Devices, device)
 | 
						driverState.Devices = append(driverState.Devices, deviceState)
 | 
				
			||||||
	info.DriverState[driverName] = driverState
 | 
						info.DriverState[driverName] = driverState
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -113,22 +113,27 @@ func (info *ClaimInfo) isPrepared() bool {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
// newClaimInfoCache creates a new claim info cache object, pre-populated from a checkpoint (if present).
 | 
					// newClaimInfoCache creates a new claim info cache object, pre-populated from a checkpoint (if present).
 | 
				
			||||||
func newClaimInfoCache(stateDir, checkpointName string) (*claimInfoCache, error) {
 | 
					func newClaimInfoCache(stateDir, checkpointName string) (*claimInfoCache, error) {
 | 
				
			||||||
	stateImpl, err := state.NewCheckpointState(stateDir, checkpointName)
 | 
						checkpointer, err := state.NewCheckpointer(stateDir, checkpointName)
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return nil, fmt.Errorf("could not initialize checkpoint manager, please drain node and remove dra state file, err: %+v", err)
 | 
							return nil, fmt.Errorf("could not initialize checkpoint manager, please drain node and remove dra state file, err: %+v", err)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	curState, err := stateImpl.GetOrCreate()
 | 
						checkpoint, err := checkpointer.GetOrCreate()
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return nil, fmt.Errorf("error calling GetOrCreate() on checkpoint state: %v", err)
 | 
							return nil, fmt.Errorf("error calling GetOrCreate() on checkpoint state: %v", err)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cache := &claimInfoCache{
 | 
						cache := &claimInfoCache{
 | 
				
			||||||
		state:     stateImpl,
 | 
							checkpointer: checkpointer,
 | 
				
			||||||
		claimInfo: make(map[string]*ClaimInfo),
 | 
							claimInfo:    make(map[string]*ClaimInfo),
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for _, entry := range curState {
 | 
						entries, err := checkpoint.GetClaimInfoStateList()
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return nil, fmt.Errorf("error calling GetEntries() on checkpoint: %w", err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						for _, entry := range entries {
 | 
				
			||||||
		info := newClaimInfoFromState(&entry)
 | 
							info := newClaimInfoFromState(&entry)
 | 
				
			||||||
		cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
 | 
							cache.claimInfo[info.Namespace+"/"+info.ClaimName] = info
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -192,7 +197,11 @@ func (cache *claimInfoCache) syncToCheckpoint() error {
 | 
				
			|||||||
	for _, infoClaim := range cache.claimInfo {
 | 
						for _, infoClaim := range cache.claimInfo {
 | 
				
			||||||
		claimInfoStateList = append(claimInfoStateList, infoClaim.ClaimInfoState)
 | 
							claimInfoStateList = append(claimInfoStateList, infoClaim.ClaimInfoState)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return cache.state.Store(claimInfoStateList)
 | 
						checkpoint, err := state.NewCheckpoint(claimInfoStateList)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return cache.checkpointer.Store(checkpoint)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// cdiDevicesAsList returns a list of CDIDevices from the provided claim info.
 | 
					// cdiDevicesAsList returns a list of CDIDevices from the provided claim info.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -18,51 +18,90 @@ package state
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
	"encoding/json"
 | 
						"encoding/json"
 | 
				
			||||||
 | 
						"hash/crc32"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
 | 
						metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
 | 
						"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
var _ checkpointmanager.Checkpoint = &DRAManagerCheckpoint{}
 | 
					const (
 | 
				
			||||||
 | 
						CheckpointAPIGroup   = "checkpoint.dra.kubelet.k8s.io"
 | 
				
			||||||
 | 
						CheckpointKind       = "DRACheckpoint"
 | 
				
			||||||
 | 
						CheckpointAPIVersion = CheckpointAPIGroup + "/v1"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const checkpointVersion = "v1"
 | 
					// Checkpoint represents a structure to store DRA checkpoint data
 | 
				
			||||||
 | 
					type Checkpoint struct {
 | 
				
			||||||
// DRAManagerCheckpoint struct is used to store pod dynamic resources assignments in a checkpoint
 | 
						// Data is a JSON serialized checkpoint data
 | 
				
			||||||
type DRAManagerCheckpoint struct {
 | 
						Data string
 | 
				
			||||||
	Version  string             `json:"version"`
 | 
						// Checksum is a checksum of Data
 | 
				
			||||||
	Entries  ClaimInfoStateList `json:"entries,omitempty"`
 | 
						Checksum uint32
 | 
				
			||||||
	Checksum checksum.Checksum  `json:"checksum"`
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// List of claim info to store in checkpoint
 | 
					type CheckpointData struct {
 | 
				
			||||||
type ClaimInfoStateList []ClaimInfoState
 | 
						metav1.TypeMeta
 | 
				
			||||||
 | 
						ClaimInfoStateList ClaimInfoStateList
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// NewDRAManagerCheckpoint returns an instance of Checkpoint
 | 
					// NewCheckpoint creates a new checkpoint from a list of claim info states
 | 
				
			||||||
func NewDRAManagerCheckpoint() *DRAManagerCheckpoint {
 | 
					func NewCheckpoint(data ClaimInfoStateList) (*Checkpoint, error) {
 | 
				
			||||||
	return &DRAManagerCheckpoint{
 | 
						cpData := &CheckpointData{
 | 
				
			||||||
		Version: checkpointVersion,
 | 
							TypeMeta: metav1.TypeMeta{
 | 
				
			||||||
		Entries: ClaimInfoStateList{},
 | 
								Kind:       CheckpointKind,
 | 
				
			||||||
 | 
								APIVersion: CheckpointAPIVersion,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							ClaimInfoStateList: data,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cpDataBytes, err := json.Marshal(cpData)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return nil, err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cp := &Checkpoint{
 | 
				
			||||||
 | 
							Data:     string(cpDataBytes),
 | 
				
			||||||
 | 
							Checksum: crc32.ChecksumIEEE(cpDataBytes),
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return cp, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// MarshalCheckpoint returns marshalled checkpoint
 | 
					// MarshalCheckpoint marshals checkpoint to JSON
 | 
				
			||||||
func (dc *DRAManagerCheckpoint) MarshalCheckpoint() ([]byte, error) {
 | 
					func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) {
 | 
				
			||||||
	// make sure checksum wasn't set before so it doesn't affect output checksum
 | 
						return json.Marshal(cp)
 | 
				
			||||||
	dc.Checksum = 0
 | 
					 | 
				
			||||||
	dc.Checksum = checksum.New(dc)
 | 
					 | 
				
			||||||
	return json.Marshal(*dc)
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint
 | 
					// UnmarshalCheckpoint unmarshals checkpoint from JSON
 | 
				
			||||||
func (dc *DRAManagerCheckpoint) UnmarshalCheckpoint(blob []byte) error {
 | 
					// and verifies its data checksum
 | 
				
			||||||
	return json.Unmarshal(blob, dc)
 | 
					func (cp *Checkpoint) UnmarshalCheckpoint(blob []byte) error {
 | 
				
			||||||
 | 
						if err := json.Unmarshal(blob, cp); err != nil {
 | 
				
			||||||
 | 
							return err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// verify checksum
 | 
				
			||||||
 | 
						if err := cp.VerifyChecksum(); err != nil {
 | 
				
			||||||
 | 
							return err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// VerifyChecksum verifies that current checksum of checkpoint is valid
 | 
					// VerifyChecksum verifies that current checksum
 | 
				
			||||||
func (dc *DRAManagerCheckpoint) VerifyChecksum() error {
 | 
					// of checkpointed Data is valid
 | 
				
			||||||
	ck := dc.Checksum
 | 
					func (cp *Checkpoint) VerifyChecksum() error {
 | 
				
			||||||
	dc.Checksum = 0
 | 
						expectedCS := crc32.ChecksumIEEE([]byte(cp.Data))
 | 
				
			||||||
	err := ck.Verify(dc)
 | 
						if expectedCS != cp.Checksum {
 | 
				
			||||||
	dc.Checksum = ck
 | 
							return &errors.CorruptCheckpointError{ActualCS: uint64(cp.Checksum), ExpectedCS: uint64(expectedCS)}
 | 
				
			||||||
	return err
 | 
						}
 | 
				
			||||||
 | 
						return nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// GetClaimInfoStateList returns list of claim info states from checkpoint
 | 
				
			||||||
 | 
					func (cp *Checkpoint) GetClaimInfoStateList() (ClaimInfoStateList, error) {
 | 
				
			||||||
 | 
						var data CheckpointData
 | 
				
			||||||
 | 
						if err := json.Unmarshal([]byte(cp.Data), &data); err != nil {
 | 
				
			||||||
 | 
							return nil, err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return data.ClaimInfoStateList, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										98
									
								
								pkg/kubelet/cm/dra/state/checkpointer.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								pkg/kubelet/cm/dra/state/checkpointer.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,98 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2023 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package state
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"errors"
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
						"sync"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
 | 
				
			||||||
 | 
						checkpointerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type Checkpointer interface {
 | 
				
			||||||
 | 
						GetOrCreate() (*Checkpoint, error)
 | 
				
			||||||
 | 
						Store(*Checkpoint) error
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type checkpointer struct {
 | 
				
			||||||
 | 
						sync.RWMutex
 | 
				
			||||||
 | 
						checkpointManager checkpointmanager.CheckpointManager
 | 
				
			||||||
 | 
						checkpointName    string
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// NewCheckpointer creates new checkpointer for keeping track of claim info  with checkpoint backend
 | 
				
			||||||
 | 
					func NewCheckpointer(stateDir, checkpointName string) (Checkpointer, error) {
 | 
				
			||||||
 | 
						if len(checkpointName) == 0 {
 | 
				
			||||||
 | 
							return nil, fmt.Errorf("received empty string instead of checkpointName")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return nil, fmt.Errorf("failed to initialize checkpoint manager: %w", err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						checkpointer := &checkpointer{
 | 
				
			||||||
 | 
							checkpointManager: checkpointManager,
 | 
				
			||||||
 | 
							checkpointName:    checkpointName,
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return checkpointer, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// GetOrCreate gets list of claim info states from a checkpoint
 | 
				
			||||||
 | 
					// or creates empty list if checkpoint doesn't exist
 | 
				
			||||||
 | 
					func (sc *checkpointer) GetOrCreate() (*Checkpoint, error) {
 | 
				
			||||||
 | 
						sc.Lock()
 | 
				
			||||||
 | 
						defer sc.Unlock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						checkpoint, err := NewCheckpoint(nil)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return nil, fmt.Errorf("failed to create new checkpoint: %w", err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint)
 | 
				
			||||||
 | 
						if errors.Is(err, checkpointerrors.ErrCheckpointNotFound) {
 | 
				
			||||||
 | 
							err = sc.store(checkpoint)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								return nil, fmt.Errorf("failed to store checkpoint %v: %w", sc.checkpointName, err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return checkpoint, nil
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return checkpoint, nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Store stores checkpoint to the file
 | 
				
			||||||
 | 
					func (sc *checkpointer) Store(checkpoint *Checkpoint) error {
 | 
				
			||||||
 | 
						sc.Lock()
 | 
				
			||||||
 | 
						defer sc.Unlock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return sc.store(checkpoint)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// store saves state to a checkpoint, caller is responsible for locking
 | 
				
			||||||
 | 
					func (sc *checkpointer) store(checkpoint *Checkpoint) error {
 | 
				
			||||||
 | 
						if err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint); err != nil {
 | 
				
			||||||
 | 
							return fmt.Errorf("could not save checkpoint %s: %w", sc.checkpointName, err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return nil
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										342
									
								
								pkg/kubelet/cm/dra/state/checkpointer_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										342
									
								
								pkg/kubelet/cm/dra/state/checkpointer_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,342 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2020 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package state
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"os"
 | 
				
			||||||
 | 
						"strings"
 | 
				
			||||||
 | 
						"testing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/stretchr/testify/assert"
 | 
				
			||||||
 | 
						"github.com/stretchr/testify/require"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/apimachinery/pkg/util/sets"
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
 | 
				
			||||||
 | 
						testutil "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/testing"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const testingCheckpoint = "dramanager_checkpoint_test"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TODO (https://github.com/kubernetes/kubernetes/issues/123552): reconsider what data gets stored in checkpoints and whether that is really necessary.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestCheckpointGetOrCreate(t *testing.T) {
 | 
				
			||||||
 | 
						testCases := []struct {
 | 
				
			||||||
 | 
							description                string
 | 
				
			||||||
 | 
							checkpointContent          string
 | 
				
			||||||
 | 
							expectedError              string
 | 
				
			||||||
 | 
							expectedClaimInfoStateList ClaimInfoStateList
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description:                "new-checkpoint",
 | 
				
			||||||
 | 
								expectedClaimInfoStateList: nil,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description:       "single-claim-info-state",
 | 
				
			||||||
 | 
								checkpointContent: `{"Data":"{\"kind\":\"DRACheckpoint\",\"apiVersion\":\"checkpoint.dra.kubelet.k8s.io/v1\",\"ClaimInfoStateList\":[{\"ClaimUID\":\"067798be-454e-4be4-9047-1aa06aea63f7\",\"ClaimName\":\"example\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-1\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]}]}}}]}","Checksum":1656016162}`,
 | 
				
			||||||
 | 
								expectedClaimInfoStateList: ClaimInfoStateList{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-1",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
				
			||||||
 | 
										ClaimName: "example",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description:       "claim-info-state-with-multiple-devices",
 | 
				
			||||||
 | 
								checkpointContent: `{"Data":"{\"kind\":\"DRACheckpoint\",\"apiVersion\":\"checkpoint.dra.kubelet.k8s.io/v1\",\"ClaimInfoStateList\":[{\"ClaimUID\":\"067798be-454e-4be4-9047-1aa06aea63f7\",\"ClaimName\":\"example\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-1\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]},{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-2\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]}]}}}]}","Checksum":3369508096}`,
 | 
				
			||||||
 | 
								expectedClaimInfoStateList: ClaimInfoStateList{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-1",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-2",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
				
			||||||
 | 
										ClaimName: "example",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description:       "two-claim-info-states",
 | 
				
			||||||
 | 
								checkpointContent: `{"Data":"{\"kind\":\"DRACheckpoint\",\"apiVersion\":\"checkpoint.dra.kubelet.k8s.io/v1\",\"ClaimInfoStateList\":[{\"ClaimUID\":\"067798be-454e-4be4-9047-1aa06aea63f7\",\"ClaimName\":\"example-1\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-1\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]}]}}},{\"ClaimUID\":\"4cf8db2d-06c0-7d70-1a51-e59b25b2c16c\",\"ClaimName\":\"example-2\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-2\",\"RequestNames\":null,\"CDIDeviceIDs\":null}]}}}]}","Checksum":1582256999}`,
 | 
				
			||||||
 | 
								expectedClaimInfoStateList: ClaimInfoStateList{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-1",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
				
			||||||
 | 
										ClaimName: "example-1",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:   "worker-1",
 | 
				
			||||||
 | 
														DeviceName: "dev-2",
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "4cf8db2d-06c0-7d70-1a51-e59b25b2c16c",
 | 
				
			||||||
 | 
										ClaimName: "example-2",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description:       "incorrect-checksum",
 | 
				
			||||||
 | 
								checkpointContent: `{"Data":"{\"kind\":\"DRACheckpoint\",\"apiVersion\":\"checkpoint.dra.kubelet.k8s.io/v1\",\"Entries\":[{\"ClaimUID\":\"067798be-454e-4be4-9047-1aa06aea63f7\",\"ClaimName\":\"example-1\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-1\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]}]}}},{\"ClaimUID\":\"4cf8db2d-06c0-7d70-1a51-e59b25b2c16c\",\"ClaimName\":\"example-2\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-2\",\"RequestNames\":null,\"CDIDeviceIDs\":null}]}}}]}","Checksum":2930258365}`,
 | 
				
			||||||
 | 
								expectedError:     "checkpoint is corrupted",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description:       "invalid-JSON",
 | 
				
			||||||
 | 
								checkpointContent: `{`,
 | 
				
			||||||
 | 
								expectedError:     "unexpected end of JSON input",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// create temp dir
 | 
				
			||||||
 | 
						testingDir, err := os.MkdirTemp("", "dramanager_state_test")
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							t.Fatal(err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						defer func() {
 | 
				
			||||||
 | 
							if err := os.RemoveAll(testingDir); err != nil {
 | 
				
			||||||
 | 
								t.Fatal(err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// create checkpoint manager for testing
 | 
				
			||||||
 | 
						cpm, err := checkpointmanager.NewCheckpointManager(testingDir)
 | 
				
			||||||
 | 
						require.NoError(t, err, "could not create testing checkpoint manager")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, tc := range testCases {
 | 
				
			||||||
 | 
							t.Run(tc.description, func(t *testing.T) {
 | 
				
			||||||
 | 
								// ensure there is no previous checkpoint
 | 
				
			||||||
 | 
								require.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								// prepare checkpoint for testing
 | 
				
			||||||
 | 
								if strings.TrimSpace(tc.checkpointContent) != "" {
 | 
				
			||||||
 | 
									mock := &testutil.MockCheckpoint{Content: tc.checkpointContent}
 | 
				
			||||||
 | 
									require.NoError(t, cpm.CreateCheckpoint(testingCheckpoint, mock), "could not create testing checkpoint")
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								checkpointer, err := NewCheckpointer(testingDir, testingCheckpoint)
 | 
				
			||||||
 | 
								require.NoError(t, err, "could not create testing checkpointer")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								checkpoint, err := checkpointer.GetOrCreate()
 | 
				
			||||||
 | 
								if strings.TrimSpace(tc.expectedError) != "" {
 | 
				
			||||||
 | 
									assert.ErrorContains(t, err, tc.expectedError)
 | 
				
			||||||
 | 
								} else {
 | 
				
			||||||
 | 
									require.NoError(t, err, "unexpected error")
 | 
				
			||||||
 | 
									stateList, err := checkpoint.GetClaimInfoStateList()
 | 
				
			||||||
 | 
									require.NoError(t, err, "could not get data entries from checkpoint")
 | 
				
			||||||
 | 
									require.NoError(t, err)
 | 
				
			||||||
 | 
									assert.Equal(t, tc.expectedClaimInfoStateList, stateList)
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestCheckpointStateStore(t *testing.T) {
 | 
				
			||||||
 | 
						testCases := []struct {
 | 
				
			||||||
 | 
							description               string
 | 
				
			||||||
 | 
							claimInfoStateList        ClaimInfoStateList
 | 
				
			||||||
 | 
							expectedCheckpointContent string
 | 
				
			||||||
 | 
						}{
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description: "single-claim-info-state",
 | 
				
			||||||
 | 
								claimInfoStateList: ClaimInfoStateList{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-1",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
				
			||||||
 | 
										ClaimName: "example",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedCheckpointContent: `{"Data":"{\"kind\":\"DRACheckpoint\",\"apiVersion\":\"checkpoint.dra.kubelet.k8s.io/v1\",\"ClaimInfoStateList\":[{\"ClaimUID\":\"067798be-454e-4be4-9047-1aa06aea63f7\",\"ClaimName\":\"example\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-1\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]}]}}}]}","Checksum":1656016162}`,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description: "claim-info-state-with-multiple-devices",
 | 
				
			||||||
 | 
								claimInfoStateList: ClaimInfoStateList{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-1",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-2",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
				
			||||||
 | 
										ClaimName: "example",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedCheckpointContent: `{"Data":"{\"kind\":\"DRACheckpoint\",\"apiVersion\":\"checkpoint.dra.kubelet.k8s.io/v1\",\"ClaimInfoStateList\":[{\"ClaimUID\":\"067798be-454e-4be4-9047-1aa06aea63f7\",\"ClaimName\":\"example\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-1\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]},{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-2\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]}]}}}]}","Checksum":3369508096}`,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								description: "two-claim-info-states",
 | 
				
			||||||
 | 
								claimInfoStateList: ClaimInfoStateList{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:     "worker-1",
 | 
				
			||||||
 | 
														DeviceName:   "dev-1",
 | 
				
			||||||
 | 
														RequestNames: []string{"test request"},
 | 
				
			||||||
 | 
														CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
				
			||||||
 | 
										ClaimName: "example-1",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverState: map[string]DriverState{
 | 
				
			||||||
 | 
											"test-driver.cdi.k8s.io": {
 | 
				
			||||||
 | 
												Devices: []Device{
 | 
				
			||||||
 | 
													{
 | 
				
			||||||
 | 
														PoolName:   "worker-1",
 | 
				
			||||||
 | 
														DeviceName: "dev-2",
 | 
				
			||||||
 | 
													},
 | 
				
			||||||
 | 
												},
 | 
				
			||||||
 | 
											},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										ClaimUID:  "4cf8db2d-06c0-7d70-1a51-e59b25b2c16c",
 | 
				
			||||||
 | 
										ClaimName: "example-2",
 | 
				
			||||||
 | 
										Namespace: "default",
 | 
				
			||||||
 | 
										PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								expectedCheckpointContent: `{"Data":"{\"kind\":\"DRACheckpoint\",\"apiVersion\":\"checkpoint.dra.kubelet.k8s.io/v1\",\"ClaimInfoStateList\":[{\"ClaimUID\":\"067798be-454e-4be4-9047-1aa06aea63f7\",\"ClaimName\":\"example-1\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-1\",\"RequestNames\":[\"test request\"],\"CDIDeviceIDs\":[\"example.com/example=cdi-example\"]}]}}},{\"ClaimUID\":\"4cf8db2d-06c0-7d70-1a51-e59b25b2c16c\",\"ClaimName\":\"example-2\",\"Namespace\":\"default\",\"PodUIDs\":{\"139cdb46-f989-4f17-9561-ca10cfb509a6\":{}},\"DriverState\":{\"test-driver.cdi.k8s.io\":{\"Devices\":[{\"PoolName\":\"worker-1\",\"DeviceName\":\"dev-2\",\"RequestNames\":null,\"CDIDeviceIDs\":null}]}}}]}","Checksum":1582256999}`,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Should return an error, stateDir cannot be an empty string
 | 
				
			||||||
 | 
						if _, err := NewCheckpointer("", testingCheckpoint); err == nil {
 | 
				
			||||||
 | 
							t.Fatal("expected error but got nil")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// create temp dir
 | 
				
			||||||
 | 
						testingDir, err := os.MkdirTemp("", "dramanager_state_test")
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							t.Fatal(err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						defer func() {
 | 
				
			||||||
 | 
							if err := os.RemoveAll(testingDir); err != nil {
 | 
				
			||||||
 | 
								t.Fatal(err)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// NewCheckpointState with an empty checkpointName should return an error
 | 
				
			||||||
 | 
						if _, err = NewCheckpointer(testingDir, ""); err == nil {
 | 
				
			||||||
 | 
							t.Fatal("expected error but got nil")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cpm, err := checkpointmanager.NewCheckpointManager(testingDir)
 | 
				
			||||||
 | 
						require.NoError(t, err, "could not create testing checkpoint manager")
 | 
				
			||||||
 | 
						require.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cs, err := NewCheckpointer(testingDir, testingCheckpoint)
 | 
				
			||||||
 | 
						require.NoError(t, err, "could not create testing checkpointState instance")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, tc := range testCases {
 | 
				
			||||||
 | 
							t.Run(tc.description, func(t *testing.T) {
 | 
				
			||||||
 | 
								checkpoint, err := NewCheckpoint(tc.claimInfoStateList)
 | 
				
			||||||
 | 
								require.NoError(t, err, "could not create Checkpoint")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								err = cs.Store(checkpoint)
 | 
				
			||||||
 | 
								require.NoError(t, err, "could not store checkpoint")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								err = cpm.GetCheckpoint(testingCheckpoint, checkpoint)
 | 
				
			||||||
 | 
								require.NoError(t, err, "could not get checkpoint")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								checkpointContent, err := checkpoint.MarshalCheckpoint()
 | 
				
			||||||
 | 
								require.NoError(t, err, "could not Marshal Checkpoint")
 | 
				
			||||||
 | 
								assert.Equal(t, tc.expectedCheckpointContent, string(checkpointContent))
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										59
									
								
								pkg/kubelet/cm/dra/state/state.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								pkg/kubelet/cm/dra/state/state.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,59 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2024 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package state
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"k8s.io/apimachinery/pkg/types"
 | 
				
			||||||
 | 
						"k8s.io/apimachinery/pkg/util/sets"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type ClaimInfoStateList []ClaimInfoState
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// +k8s:deepcopy-gen=true
 | 
				
			||||||
 | 
					type ClaimInfoState struct {
 | 
				
			||||||
 | 
						// ClaimUID is the UID of a resource claim
 | 
				
			||||||
 | 
						ClaimUID types.UID
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// ClaimName is the name of a resource claim
 | 
				
			||||||
 | 
						ClaimName string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Namespace is a claim namespace
 | 
				
			||||||
 | 
						Namespace string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// PodUIDs is a set of pod UIDs that reference a resource
 | 
				
			||||||
 | 
						PodUIDs sets.Set[string]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// DriverState contains information about all drivers which have allocation
 | 
				
			||||||
 | 
						// results in the claim, even if they don't provide devices for their results.
 | 
				
			||||||
 | 
						DriverState map[string]DriverState
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// DriverState is used to store per-device claim info state in a checkpoint
 | 
				
			||||||
 | 
					// +k8s:deepcopy-gen=true
 | 
				
			||||||
 | 
					type DriverState struct {
 | 
				
			||||||
 | 
						Devices []Device
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Device is how a DRA driver described an allocated device in a claim
 | 
				
			||||||
 | 
					// to kubelet. RequestName and CDI device IDs are optional.
 | 
				
			||||||
 | 
					// +k8s:deepcopy-gen=true
 | 
				
			||||||
 | 
					type Device struct {
 | 
				
			||||||
 | 
						PoolName     string
 | 
				
			||||||
 | 
						DeviceName   string
 | 
				
			||||||
 | 
						RequestNames []string
 | 
				
			||||||
 | 
						CDIDeviceIDs []string
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@@ -1,133 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
Copyright 2023 The Kubernetes Authors.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
					 | 
				
			||||||
you may not use this file except in compliance with the License.
 | 
					 | 
				
			||||||
You may obtain a copy of the License at
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Unless required by applicable law or agreed to in writing, software
 | 
					 | 
				
			||||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
					 | 
				
			||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					 | 
				
			||||||
See the License for the specific language governing permissions and
 | 
					 | 
				
			||||||
limitations under the License.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
package state
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import (
 | 
					 | 
				
			||||||
	"fmt"
 | 
					 | 
				
			||||||
	"sync"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	"k8s.io/apimachinery/pkg/types"
 | 
					 | 
				
			||||||
	"k8s.io/apimachinery/pkg/util/sets"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
var _ CheckpointState = &stateCheckpoint{}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// CheckpointState interface provides to get and store state
 | 
					 | 
				
			||||||
type CheckpointState interface {
 | 
					 | 
				
			||||||
	GetOrCreate() (ClaimInfoStateList, error)
 | 
					 | 
				
			||||||
	Store(ClaimInfoStateList) error
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// ClaimInfoState is used to store claim info state in a checkpoint
 | 
					 | 
				
			||||||
// +k8s:deepcopy-gen=true
 | 
					 | 
				
			||||||
type ClaimInfoState struct {
 | 
					 | 
				
			||||||
	// ClaimUID is an UID of the resource claim
 | 
					 | 
				
			||||||
	ClaimUID types.UID
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// ClaimName is a name of the resource claim
 | 
					 | 
				
			||||||
	ClaimName string
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Namespace is a claim namespace
 | 
					 | 
				
			||||||
	Namespace string
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// PodUIDs is a set of pod UIDs that reference a resource
 | 
					 | 
				
			||||||
	PodUIDs sets.Set[string]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// DriverState contains information about all drivers which have allocation
 | 
					 | 
				
			||||||
	// results in the claim, even if they don't provide devices for their results.
 | 
					 | 
				
			||||||
	DriverState map[string]DriverState
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// DriverState is used to store per-device claim info state in a checkpoint
 | 
					 | 
				
			||||||
// +k8s:deepcopy-gen=true
 | 
					 | 
				
			||||||
type DriverState struct {
 | 
					 | 
				
			||||||
	Devices []Device
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Device is how a DRA driver described an allocated device in a claim
 | 
					 | 
				
			||||||
// to kubelet. RequestName and CDI device IDs are optional.
 | 
					 | 
				
			||||||
// +k8s:deepcopy-gen=true
 | 
					 | 
				
			||||||
type Device struct {
 | 
					 | 
				
			||||||
	PoolName     string
 | 
					 | 
				
			||||||
	DeviceName   string
 | 
					 | 
				
			||||||
	RequestNames []string
 | 
					 | 
				
			||||||
	CDIDeviceIDs []string
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
type stateCheckpoint struct {
 | 
					 | 
				
			||||||
	sync.RWMutex
 | 
					 | 
				
			||||||
	checkpointManager checkpointmanager.CheckpointManager
 | 
					 | 
				
			||||||
	checkpointName    string
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// NewCheckpointState creates new State for keeping track of claim info  with checkpoint backend
 | 
					 | 
				
			||||||
func NewCheckpointState(stateDir, checkpointName string) (*stateCheckpoint, error) {
 | 
					 | 
				
			||||||
	if len(checkpointName) == 0 {
 | 
					 | 
				
			||||||
		return nil, fmt.Errorf("received empty string instead of checkpointName")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	stateCheckpoint := &stateCheckpoint{
 | 
					 | 
				
			||||||
		checkpointManager: checkpointManager,
 | 
					 | 
				
			||||||
		checkpointName:    checkpointName,
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return stateCheckpoint, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// get state from a checkpoint and creates it if it doesn't exist
 | 
					 | 
				
			||||||
func (sc *stateCheckpoint) GetOrCreate() (ClaimInfoStateList, error) {
 | 
					 | 
				
			||||||
	sc.Lock()
 | 
					 | 
				
			||||||
	defer sc.Unlock()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	checkpoint := NewDRAManagerCheckpoint()
 | 
					 | 
				
			||||||
	err := sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint)
 | 
					 | 
				
			||||||
	if err == errors.ErrCheckpointNotFound {
 | 
					 | 
				
			||||||
		sc.store(ClaimInfoStateList{})
 | 
					 | 
				
			||||||
		return ClaimInfoStateList{}, nil
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return nil, fmt.Errorf("failed to get checkpoint %v: %w", sc.checkpointName, err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return checkpoint.Entries, nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// saves state to a checkpoint
 | 
					 | 
				
			||||||
func (sc *stateCheckpoint) Store(claimInfoStateList ClaimInfoStateList) error {
 | 
					 | 
				
			||||||
	sc.Lock()
 | 
					 | 
				
			||||||
	defer sc.Unlock()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return sc.store(claimInfoStateList)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// saves state to a checkpoint, caller is responsible for locking
 | 
					 | 
				
			||||||
func (sc *stateCheckpoint) store(claimInfoStateList ClaimInfoStateList) error {
 | 
					 | 
				
			||||||
	checkpoint := NewDRAManagerCheckpoint()
 | 
					 | 
				
			||||||
	checkpoint.Entries = claimInfoStateList
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		return fmt.Errorf("could not save checkpoint %s: %v", sc.checkpointName, err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	return nil
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@@ -1,289 +0,0 @@
 | 
				
			|||||||
/*
 | 
					 | 
				
			||||||
Copyright 2020 The Kubernetes Authors.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
					 | 
				
			||||||
you may not use this file except in compliance with the License.
 | 
					 | 
				
			||||||
You may obtain a copy of the License at
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Unless required by applicable law or agreed to in writing, software
 | 
					 | 
				
			||||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
					 | 
				
			||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					 | 
				
			||||||
See the License for the specific language governing permissions and
 | 
					 | 
				
			||||||
limitations under the License.
 | 
					 | 
				
			||||||
*/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
package state
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import (
 | 
					 | 
				
			||||||
	"errors"
 | 
					 | 
				
			||||||
	"os"
 | 
					 | 
				
			||||||
	"strings"
 | 
					 | 
				
			||||||
	"testing"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	"github.com/stretchr/testify/assert"
 | 
					 | 
				
			||||||
	"github.com/stretchr/testify/require"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	"k8s.io/apimachinery/pkg/util/sets"
 | 
					 | 
				
			||||||
	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
 | 
					 | 
				
			||||||
	cmerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
 | 
					 | 
				
			||||||
	testutil "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state/testing"
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const testingCheckpoint = "dramanager_checkpoint_test"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// assertStateEqual marks provided test as failed if provided states differ
 | 
					 | 
				
			||||||
func assertStateEqual(t *testing.T, restoredState, expectedState ClaimInfoStateList) {
 | 
					 | 
				
			||||||
	assert.Equal(t, expectedState, restoredState, "expected ClaimInfoState does not equal to restored one")
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// TODO (https://github.com/kubernetes/kubernetes/issues/123552): reconsider what data gets stored in checkpoints and whether that is really necessary.
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
// As it stands now, a "v1" checkpoint contains data for types like the resourceapi.ResourceHandle
 | 
					 | 
				
			||||||
// which may change over time as new fields get added in a backward-compatible way (not unusual
 | 
					 | 
				
			||||||
// for API types). That breaks checksuming with pkg/util/hash because it is based on spew output.
 | 
					 | 
				
			||||||
// That output includes those new fields.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func TestCheckpointGetOrCreate(t *testing.T) {
 | 
					 | 
				
			||||||
	testCases := []struct {
 | 
					 | 
				
			||||||
		description       string
 | 
					 | 
				
			||||||
		checkpointContent string
 | 
					 | 
				
			||||||
		expectedError     string
 | 
					 | 
				
			||||||
		expectedState     ClaimInfoStateList
 | 
					 | 
				
			||||||
	}{
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			description:       "Create non-existing checkpoint",
 | 
					 | 
				
			||||||
			checkpointContent: "",
 | 
					 | 
				
			||||||
			expectedError:     "",
 | 
					 | 
				
			||||||
			expectedState:     []ClaimInfoState{},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			description:       "Restore checkpoint - single claim",
 | 
					 | 
				
			||||||
			checkpointContent: `{"version":"v1","entries":[{"ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"DriverState":{"test-driver.cdi.k8s.io":{"Devices":[{"PoolName":"worker-1","DeviceName":"dev-1","RequestNames":["test request"],"CDIDeviceIDs":["example.com/example=cdi-example"]}]}}}],"checksum":1925941526}`,
 | 
					 | 
				
			||||||
			expectedState: []ClaimInfoState{
 | 
					 | 
				
			||||||
				{
 | 
					 | 
				
			||||||
					DriverState: map[string]DriverState{
 | 
					 | 
				
			||||||
						"test-driver.cdi.k8s.io": {
 | 
					 | 
				
			||||||
							Devices: []Device{
 | 
					 | 
				
			||||||
								{
 | 
					 | 
				
			||||||
									PoolName:     "worker-1",
 | 
					 | 
				
			||||||
									DeviceName:   "dev-1",
 | 
					 | 
				
			||||||
									RequestNames: []string{"test request"},
 | 
					 | 
				
			||||||
									CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
					 | 
				
			||||||
								},
 | 
					 | 
				
			||||||
							},
 | 
					 | 
				
			||||||
						},
 | 
					 | 
				
			||||||
					},
 | 
					 | 
				
			||||||
					ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
					 | 
				
			||||||
					ClaimName: "example",
 | 
					 | 
				
			||||||
					Namespace: "default",
 | 
					 | 
				
			||||||
					PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
					 | 
				
			||||||
				},
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			description:       "Restore checkpoint - single claim - multiple devices",
 | 
					 | 
				
			||||||
			checkpointContent: `{"version":"v1","entries":[{"ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"DriverState":{"test-driver.cdi.k8s.io":{"Devices":[{"PoolName":"worker-1","DeviceName":"dev-1","RequestNames":["test request"],"CDIDeviceIDs":["example.com/example=cdi-example"]},{"PoolName":"worker-1","DeviceName":"dev-2","RequestNames":["test request2"],"CDIDeviceIDs":["example.com/example=cdi-example2"]}]}}}],"checksum":3560752542}`,
 | 
					 | 
				
			||||||
			expectedError:     "",
 | 
					 | 
				
			||||||
			expectedState: []ClaimInfoState{
 | 
					 | 
				
			||||||
				{
 | 
					 | 
				
			||||||
					DriverState: map[string]DriverState{
 | 
					 | 
				
			||||||
						"test-driver.cdi.k8s.io": {
 | 
					 | 
				
			||||||
							Devices: []Device{
 | 
					 | 
				
			||||||
								{
 | 
					 | 
				
			||||||
									PoolName:     "worker-1",
 | 
					 | 
				
			||||||
									DeviceName:   "dev-1",
 | 
					 | 
				
			||||||
									RequestNames: []string{"test request"},
 | 
					 | 
				
			||||||
									CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
					 | 
				
			||||||
								},
 | 
					 | 
				
			||||||
								{
 | 
					 | 
				
			||||||
									PoolName:     "worker-1",
 | 
					 | 
				
			||||||
									DeviceName:   "dev-2",
 | 
					 | 
				
			||||||
									RequestNames: []string{"test request2"},
 | 
					 | 
				
			||||||
									CDIDeviceIDs: []string{"example.com/example=cdi-example2"},
 | 
					 | 
				
			||||||
								},
 | 
					 | 
				
			||||||
							},
 | 
					 | 
				
			||||||
						},
 | 
					 | 
				
			||||||
					},
 | 
					 | 
				
			||||||
					ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
					 | 
				
			||||||
					ClaimName: "example",
 | 
					 | 
				
			||||||
					Namespace: "default",
 | 
					 | 
				
			||||||
					PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
					 | 
				
			||||||
				},
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			description:       "Restore checkpoint - multiple claims",
 | 
					 | 
				
			||||||
			checkpointContent: `{"version":"v1","entries":[{"ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example-1","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"DriverState":{"test-driver.cdi.k8s.io":{"Devices":[{"PoolName":"worker-1","DeviceName":"dev-1","RequestNames":["test request"],"CDIDeviceIDs":["example.com/example=cdi-example"]}]}}},{"ClaimUID":"4cf8db2d-06c0-7d70-1a51-e59b25b2c16c","ClaimName":"example-2","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"DriverState":{"test-driver.cdi.k8s.io":{"Devices":[{"PoolName":"worker-1","DeviceName":"dev-1","RequestNames":["test request"],"CDIDeviceIDs":["example.com/example=cdi-example"]}]}}}],"checksum":351581974}`,
 | 
					 | 
				
			||||||
			expectedError:     "",
 | 
					 | 
				
			||||||
			expectedState: []ClaimInfoState{
 | 
					 | 
				
			||||||
				{
 | 
					 | 
				
			||||||
					DriverState: map[string]DriverState{
 | 
					 | 
				
			||||||
						"test-driver.cdi.k8s.io": {
 | 
					 | 
				
			||||||
							Devices: []Device{
 | 
					 | 
				
			||||||
								{
 | 
					 | 
				
			||||||
									PoolName:     "worker-1",
 | 
					 | 
				
			||||||
									DeviceName:   "dev-1",
 | 
					 | 
				
			||||||
									RequestNames: []string{"test request"},
 | 
					 | 
				
			||||||
									CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
					 | 
				
			||||||
								},
 | 
					 | 
				
			||||||
							},
 | 
					 | 
				
			||||||
						},
 | 
					 | 
				
			||||||
					},
 | 
					 | 
				
			||||||
					ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
					 | 
				
			||||||
					ClaimName: "example-1",
 | 
					 | 
				
			||||||
					Namespace: "default",
 | 
					 | 
				
			||||||
					PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
					 | 
				
			||||||
				},
 | 
					 | 
				
			||||||
				{
 | 
					 | 
				
			||||||
					DriverState: map[string]DriverState{
 | 
					 | 
				
			||||||
						"test-driver.cdi.k8s.io": {
 | 
					 | 
				
			||||||
							Devices: []Device{
 | 
					 | 
				
			||||||
								{
 | 
					 | 
				
			||||||
									PoolName:     "worker-1",
 | 
					 | 
				
			||||||
									DeviceName:   "dev-1",
 | 
					 | 
				
			||||||
									RequestNames: []string{"test request"},
 | 
					 | 
				
			||||||
									CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
					 | 
				
			||||||
								},
 | 
					 | 
				
			||||||
							},
 | 
					 | 
				
			||||||
						},
 | 
					 | 
				
			||||||
					},
 | 
					 | 
				
			||||||
					ClaimUID:  "4cf8db2d-06c0-7d70-1a51-e59b25b2c16c",
 | 
					 | 
				
			||||||
					ClaimName: "example-2",
 | 
					 | 
				
			||||||
					Namespace: "default",
 | 
					 | 
				
			||||||
					PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
					 | 
				
			||||||
				},
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			description:       "Restore checkpoint - invalid checksum",
 | 
					 | 
				
			||||||
			checkpointContent: `{"version":"v1","entries":[{"DriverName":"test-driver.cdi.k8s.io","ClassName":"class-name","ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"CDIDevices":{"test-driver.cdi.k8s.io":["example.com/example=cdi-example"]}}],"checksum":1988120168}`,
 | 
					 | 
				
			||||||
			expectedError:     "checkpoint is corrupted",
 | 
					 | 
				
			||||||
			expectedState:     []ClaimInfoState{},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			description:       "Restore checkpoint with invalid JSON",
 | 
					 | 
				
			||||||
			checkpointContent: `{`,
 | 
					 | 
				
			||||||
			expectedError:     "unexpected end of JSON input",
 | 
					 | 
				
			||||||
			expectedState:     []ClaimInfoState{},
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// create temp dir
 | 
					 | 
				
			||||||
	testingDir, err := os.MkdirTemp("", "dramanager_state_test")
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		t.Fatal(err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	defer os.RemoveAll(testingDir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// create checkpoint manager for testing
 | 
					 | 
				
			||||||
	cpm, err := checkpointmanager.NewCheckpointManager(testingDir)
 | 
					 | 
				
			||||||
	assert.NoError(t, err, "could not create testing checkpoint manager")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	for _, tc := range testCases {
 | 
					 | 
				
			||||||
		t.Run(tc.description, func(t *testing.T) {
 | 
					 | 
				
			||||||
			// ensure there is no previous checkpoint
 | 
					 | 
				
			||||||
			assert.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			// prepare checkpoint for testing
 | 
					 | 
				
			||||||
			if strings.TrimSpace(tc.checkpointContent) != "" {
 | 
					 | 
				
			||||||
				checkpoint := &testutil.MockCheckpoint{Content: tc.checkpointContent}
 | 
					 | 
				
			||||||
				assert.NoError(t, cpm.CreateCheckpoint(testingCheckpoint, checkpoint), "could not create testing checkpoint")
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			var state ClaimInfoStateList
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			checkpointState, err := NewCheckpointState(testingDir, testingCheckpoint)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
			if err == nil {
 | 
					 | 
				
			||||||
				state, err = checkpointState.GetOrCreate()
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
			if strings.TrimSpace(tc.expectedError) != "" {
 | 
					 | 
				
			||||||
				assert.ErrorContains(t, err, tc.expectedError)
 | 
					 | 
				
			||||||
			} else {
 | 
					 | 
				
			||||||
				requireNoCheckpointError(t, err)
 | 
					 | 
				
			||||||
				// compare state after restoration with the one expected
 | 
					 | 
				
			||||||
				assertStateEqual(t, state, tc.expectedState)
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		})
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func TestCheckpointStateStore(t *testing.T) {
 | 
					 | 
				
			||||||
	claimInfoStateList := ClaimInfoStateList{
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			DriverState: map[string]DriverState{
 | 
					 | 
				
			||||||
				"test-driver.cdi.k8s.io": {
 | 
					 | 
				
			||||||
					Devices: []Device{{
 | 
					 | 
				
			||||||
						PoolName:     "worker-1",
 | 
					 | 
				
			||||||
						DeviceName:   "dev-1",
 | 
					 | 
				
			||||||
						RequestNames: []string{"test request"},
 | 
					 | 
				
			||||||
						CDIDeviceIDs: []string{"example.com/example=cdi-example"},
 | 
					 | 
				
			||||||
					}},
 | 
					 | 
				
			||||||
				},
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
			ClaimUID:  "067798be-454e-4be4-9047-1aa06aea63f7",
 | 
					 | 
				
			||||||
			ClaimName: "example-1",
 | 
					 | 
				
			||||||
			Namespace: "default",
 | 
					 | 
				
			||||||
			PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
		{
 | 
					 | 
				
			||||||
			DriverState: map[string]DriverState{
 | 
					 | 
				
			||||||
				"test-driver.cdi.k8s.io": {
 | 
					 | 
				
			||||||
					Devices: []Device{{
 | 
					 | 
				
			||||||
						PoolName:   "worker-1",
 | 
					 | 
				
			||||||
						DeviceName: "dev-2",
 | 
					 | 
				
			||||||
					}},
 | 
					 | 
				
			||||||
				},
 | 
					 | 
				
			||||||
			},
 | 
					 | 
				
			||||||
			ClaimUID:  "4cf8db2d-06c0-7d70-1a51-e59b25b2c16c",
 | 
					 | 
				
			||||||
			ClaimName: "example-2",
 | 
					 | 
				
			||||||
			Namespace: "default",
 | 
					 | 
				
			||||||
			PodUIDs:   sets.New("139cdb46-f989-4f17-9561-ca10cfb509a6"),
 | 
					 | 
				
			||||||
		},
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	expectedCheckpoint := `{"version":"v1","entries":[{"ClaimUID":"067798be-454e-4be4-9047-1aa06aea63f7","ClaimName":"example-1","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"DriverState":{"test-driver.cdi.k8s.io":{"Devices":[{"PoolName":"worker-1","DeviceName":"dev-1","RequestNames":["test request"],"CDIDeviceIDs":["example.com/example=cdi-example"]}]}}},{"ClaimUID":"4cf8db2d-06c0-7d70-1a51-e59b25b2c16c","ClaimName":"example-2","Namespace":"default","PodUIDs":{"139cdb46-f989-4f17-9561-ca10cfb509a6":{}},"DriverState":{"test-driver.cdi.k8s.io":{"Devices":[{"PoolName":"worker-1","DeviceName":"dev-2","RequestNames":null,"CDIDeviceIDs":null}]}}}],"checksum":1191151426}`
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Should return an error, stateDir cannot be an empty string
 | 
					 | 
				
			||||||
	if _, err := NewCheckpointState("", testingCheckpoint); err == nil {
 | 
					 | 
				
			||||||
		t.Fatal("expected error but got nil")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// create temp dir
 | 
					 | 
				
			||||||
	testingDir, err := os.MkdirTemp("", "dramanager_state_test")
 | 
					 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		t.Fatal(err)
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	defer os.RemoveAll(testingDir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	cpm, err := checkpointmanager.NewCheckpointManager(testingDir)
 | 
					 | 
				
			||||||
	assert.NoError(t, err, "could not create testing checkpoint manager")
 | 
					 | 
				
			||||||
	assert.NoError(t, cpm.RemoveCheckpoint(testingCheckpoint), "could not remove testing checkpoint")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	cs, err := NewCheckpointState(testingDir, testingCheckpoint)
 | 
					 | 
				
			||||||
	assert.NoError(t, err, "could not create testing checkpointState instance")
 | 
					 | 
				
			||||||
	err = cs.Store(claimInfoStateList)
 | 
					 | 
				
			||||||
	assert.NoError(t, err, "could not store ClaimInfoState")
 | 
					 | 
				
			||||||
	checkpoint := NewDRAManagerCheckpoint()
 | 
					 | 
				
			||||||
	cpm.GetCheckpoint(testingCheckpoint, checkpoint)
 | 
					 | 
				
			||||||
	checkpointData, err := checkpoint.MarshalCheckpoint()
 | 
					 | 
				
			||||||
	assert.NoError(t, err, "could not Marshal Checkpoint")
 | 
					 | 
				
			||||||
	assert.Equal(t, expectedCheckpoint, string(checkpointData), "expected ClaimInfoState does not equal to restored one")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// NewCheckpointState with an empty checkpointName should return an error
 | 
					 | 
				
			||||||
	if _, err = NewCheckpointState(testingDir, ""); err == nil {
 | 
					 | 
				
			||||||
		t.Fatal("expected error but got nil")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func requireNoCheckpointError(t *testing.T, err error) {
 | 
					 | 
				
			||||||
	t.Helper()
 | 
					 | 
				
			||||||
	var cksumErr *cmerrors.CorruptCheckpointError
 | 
					 | 
				
			||||||
	if errors.As(err, &cksumErr) {
 | 
					 | 
				
			||||||
		t.Fatalf("unexpected corrupt checkpoint, expected checksum %d, got %d", cksumErr.ExpectedCS, cksumErr.ActualCS)
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		require.NoError(t, err, "could not restore checkpoint")
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
		Reference in New Issue
	
	Block a user