mirror of
https://github.com/optim-enterprises-bv/kubernetes.git
synced 2025-11-01 18:58:18 +00:00
Add sync state loop in master's volume reconciler
At master volume reconciler, the information about which volumes are attached to nodes is cached in actual state of world. However, this information might be out of date in case that node is terminated (volume is detached automatically). In this situation, reconciler assume volume is still attached and will not issue attach operation when node comes back. Pods created on those nodes will fail to mount. This PR adds the logic to periodically sync up the truth for attached volumes kept in the actual state cache. If the volume is no longer attached to the node, the actual state will be updated to reflect the truth. In turn, reconciler will take actions if needed. To avoid issuing many concurrent operations on cloud provider, this PR tries to add batch operation to check whether a list of volumes are attached to the node instead of one request per volume. More details are explained in PR #33760
This commit is contained in:
@@ -64,6 +64,13 @@ type OperationExecutor interface {
|
||||
// It then updates the actual state of the world to reflect that.
|
||||
AttachVolume(volumeToAttach VolumeToAttach, actualStateOfWorld ActualStateOfWorldAttacherUpdater) error
|
||||
|
||||
// VerifyVolumesAreAttached verifies the given list of volumes to see whether they are still attached to the node.
|
||||
// If any volume is not attached right now, it will update the actual state of the world to reflect that.
|
||||
// Note that this operation could be operated concurrently with other attach/detach operations.
|
||||
// In theory (but very unlikely in practise), race condition among these operations might mark volume as detached
|
||||
// even if it is attached. But reconciler can correct this in a short period of time.
|
||||
VerifyVolumesAreAttached(AttachedVolumes []AttachedVolume, nodeName types.NodeName, actualStateOfWorld ActualStateOfWorldAttacherUpdater) error
|
||||
|
||||
// DetachVolume detaches the volume from the node specified in
|
||||
// volumeToDetach, and updates the actual state of the world to reflect
|
||||
// that. If verifySafeToDetach is set, a call is made to the fetch the node
|
||||
@@ -397,6 +404,19 @@ func (oe *operationExecutor) DetachVolume(
|
||||
volumeToDetach.VolumeName, "" /* podName */, detachFunc)
|
||||
}
|
||||
|
||||
func (oe *operationExecutor) VerifyVolumesAreAttached(
|
||||
attachedVolumes []AttachedVolume,
|
||||
nodeName types.NodeName,
|
||||
actualStateOfWorld ActualStateOfWorldAttacherUpdater) error {
|
||||
volumesAreAttachedFunc, err :=
|
||||
oe.generateVolumesAreAttachedFunc(attachedVolumes, nodeName, actualStateOfWorld)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Give an empty UniqueVolumeName so that this operation could be executed concurrently.
|
||||
return oe.pendingOperations.Run("" /* volumeName */, "" /* podName */, volumesAreAttachedFunc)
|
||||
}
|
||||
|
||||
func (oe *operationExecutor) MountVolume(
|
||||
waitForAttachTimeout time.Duration,
|
||||
volumeToMount VolumeToMount,
|
||||
@@ -465,6 +485,83 @@ func (oe *operationExecutor) VerifyControllerAttachedVolume(
|
||||
volumeToMount.VolumeName, "" /* podName */, verifyControllerAttachedVolumeFunc)
|
||||
}
|
||||
|
||||
func (oe *operationExecutor) generateVolumesAreAttachedFunc(
|
||||
attachedVolumes []AttachedVolume,
|
||||
nodeName types.NodeName,
|
||||
actualStateOfWorld ActualStateOfWorldAttacherUpdater) (func() error, error) {
|
||||
|
||||
// volumesPerPlugin maps from a volume plugin to a list of volume specs which belong
|
||||
// to this type of plugin
|
||||
volumesPerPlugin := make(map[string][]*volume.Spec)
|
||||
// volumeSpecMap maps from a volume spec to its unique volumeName which will be used
|
||||
// when calling MarkVolumeAsDetached
|
||||
volumeSpecMap := make(map[*volume.Spec]api.UniqueVolumeName)
|
||||
// Iterate each volume spec and put them into a map index by the pluginName
|
||||
for _, volumeAttached := range attachedVolumes {
|
||||
volumePlugin, err :=
|
||||
oe.volumePluginMgr.FindPluginBySpec(volumeAttached.VolumeSpec)
|
||||
if err != nil || volumePlugin == nil {
|
||||
glog.Errorf(
|
||||
"VolumesAreAttached.FindPluginBySpec failed for volume %q (spec.Name: %q) on node %q with error: %v",
|
||||
volumeAttached.VolumeName,
|
||||
volumeAttached.VolumeSpec.Name(),
|
||||
volumeAttached.NodeName,
|
||||
err)
|
||||
}
|
||||
volumeSpecList, pluginExists := volumesPerPlugin[volumePlugin.GetPluginName()]
|
||||
if !pluginExists {
|
||||
volumeSpecList = []*volume.Spec{}
|
||||
}
|
||||
volumeSpecList = append(volumeSpecList, volumeAttached.VolumeSpec)
|
||||
volumesPerPlugin[volumePlugin.GetPluginName()] = volumeSpecList
|
||||
volumeSpecMap[volumeAttached.VolumeSpec] = volumeAttached.VolumeName
|
||||
}
|
||||
|
||||
return func() error {
|
||||
|
||||
// For each volume plugin, pass the list of volume specs to VolumesAreAttached to check
|
||||
// whether the volumes are still attached.
|
||||
for pluginName, volumesSpecs := range volumesPerPlugin {
|
||||
attachableVolumePlugin, err :=
|
||||
oe.volumePluginMgr.FindAttachablePluginByName(pluginName)
|
||||
if err != nil || attachableVolumePlugin == nil {
|
||||
glog.Errorf(
|
||||
"VolumeAreAttached.FindAttachablePluginBySpec failed for plugin %q with: %v",
|
||||
pluginName,
|
||||
err)
|
||||
continue
|
||||
}
|
||||
|
||||
volumeAttacher, newAttacherErr := attachableVolumePlugin.NewAttacher()
|
||||
if newAttacherErr != nil {
|
||||
glog.Errorf(
|
||||
"VolumesAreAttached failed for getting plugin %q with: %v",
|
||||
pluginName,
|
||||
newAttacherErr)
|
||||
continue
|
||||
}
|
||||
|
||||
attached, areAttachedErr := volumeAttacher.VolumesAreAttached(volumesSpecs, nodeName)
|
||||
if areAttachedErr != nil {
|
||||
glog.Errorf(
|
||||
"VolumesAreAttached failed for checking on node %q with: %v",
|
||||
nodeName,
|
||||
areAttachedErr)
|
||||
continue
|
||||
}
|
||||
|
||||
for spec, check := range attached {
|
||||
if !check {
|
||||
actualStateOfWorld.MarkVolumeAsDetached(volumeSpecMap[spec], nodeName)
|
||||
glog.V(1).Infof("VerifyVolumesAreAttached determined volume %q (spec.Name: %q) is no longer attached to node %q, therefore it was marked as detached.",
|
||||
volumeSpecMap[spec], spec.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (oe *operationExecutor) generateAttachVolumeFunc(
|
||||
volumeToAttach VolumeToAttach,
|
||||
actualStateOfWorld ActualStateOfWorldAttacherUpdater) (func() error, error) {
|
||||
|
||||
Reference in New Issue
Block a user