mirror of
https://github.com/outbackdingo/debos.git
synced 2026-01-28 02:18:45 +00:00
Fix partition races while partitioning
Every so often we've seen races where seemingly the partition device disappeared when trying to use it. These became a bit more frequent after some speedups in fakemachine. Digging deeper it turns out that udev helpfully ask the kernel to rescan the partition table when it gets told via inotify that something might have written to it. As we're using parted, which already tells the kernel about partitions it created, this gets rather confusion. The new partition device would pop up (triggered by parted), parted would close the disk device node; This then would notify udev, which asked the kernel to rescan the partition table on the device *again*, which in turn causes all the partition device nodes to disappear and re-appear, leaving a nice window for things to go horribly horribly wrong. Ofcourse this all happens asynchronously, so there is really no way to know everything settled down.. Luckily there is one upside to this depressing story, udev only does this when it can get an exclusive lock on the disk device (e.g. none of the partitions are mounted and nobody else holds an exclusive lock). So we can avoid all this madness by simply holding an exclusive device lock and only releasing it after mounting has happened. As one last unexpected side-effect, while we do hold the exclusive lock it turns out udev also does not setup the various symlinks. So instead for the partition device use the canonical device names rather then the aliases. On the bright side those device nodes are generated directly by the kernel with devtmpfs, so there is no need to wait for udev to have done its processing anymore. Closes #50 Signed-off-by: Sjoerd Simons <sjoerd.simons@collabora.co.uk>
This commit is contained in:
@@ -116,6 +116,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
@@ -183,19 +184,23 @@ func (i *ImagePartitionAction) generateKernelRoot(context *debos.DebosContext) e
|
||||
}
|
||||
|
||||
func (i ImagePartitionAction) getPartitionDevice(number int, context debos.DebosContext) string {
|
||||
/* Always look up canonical device as udev might not generate the by-id
|
||||
* symlinks while there is an flock on /dev/vda */
|
||||
device, _ := filepath.EvalSymlinks(context.Image)
|
||||
|
||||
suffix := "p"
|
||||
/* Check partition naming first: if used 'by-id'i naming convention */
|
||||
if strings.Contains(context.Image, "/disk/by-id/") {
|
||||
if strings.Contains(device, "/disk/by-id/") {
|
||||
suffix = "-part"
|
||||
}
|
||||
|
||||
/* If the iamge device has a digit as the last character, the partition
|
||||
* suffix is p<number> else it's just <number> */
|
||||
last := context.Image[len(context.Image)-1]
|
||||
last := device[len(device)-1]
|
||||
if last >= '0' && last <= '9' {
|
||||
return fmt.Sprintf("%s%s%d", context.Image, suffix, number)
|
||||
return fmt.Sprintf("%s%s%d", device, suffix, number)
|
||||
} else {
|
||||
return fmt.Sprintf("%s%d", context.Image, number)
|
||||
return fmt.Sprintf("%s%d", device, number)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,11 +279,28 @@ func (i *ImagePartitionAction) PreNoMachine(context *debos.DebosContext) error {
|
||||
func (i ImagePartitionAction) Run(context *debos.DebosContext) error {
|
||||
i.LogStart()
|
||||
|
||||
/* Exclusively Lock image device file to prevent udev from triggering
|
||||
* partition rescans, which cause confusion as some time asynchronously the
|
||||
* partition device might disappear and reappear due to that! */
|
||||
imageFD, err := os.Open(context.Image)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
/* Defer will keep the fd open until the function returns, at which points
|
||||
* the filesystems will have been mounted protecting from more udev funnyness
|
||||
*/
|
||||
defer imageFD.Close()
|
||||
|
||||
err = syscall.Flock(int(imageFD.Fd()), syscall.LOCK_EX)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
command := []string{"parted", "-s", context.Image, "mklabel", i.PartitionType}
|
||||
if len(i.GptGap) > 0 {
|
||||
command = append(command, i.GptGap)
|
||||
}
|
||||
err := debos.Command{}.Run("parted", command...)
|
||||
err = debos.Command{}.Run("parted", command...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -317,12 +339,6 @@ func (i ImagePartitionAction) Run(context *debos.DebosContext) error {
|
||||
}
|
||||
|
||||
devicePath := i.getPartitionDevice(p.number, *context)
|
||||
// Give a chance for udevd to create proper symlinks
|
||||
err = debos.Command{}.Run("udevadm", "udevadm", "settle", "-t", "5",
|
||||
"-E", devicePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = i.formatPartition(p, *context)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user