From 0bc5983e8a5c371e6fbd87b578b5b65bec54b5af Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Mon, 2 Dec 2024 10:55:04 +0100 Subject: [PATCH 1/2] ImagePartitionAction: retry losetup.Attach() losetup.Attach() can fail due to concurrent attaches in other processes as seen in https://github.com/go-debos/debos/issues/522 . The problem is a race condition between finding a free loop device and attaching the image. Now that we have go-losetup v2, which does report the error, we can do what util-linux does ( https://github.com/util-linux/util-linux/blob/4c4b248c68149089c8be2f830214bb2be693307e/sys-utils/losetup.c#L662 ) and retry on failure. I only sleep for 200 ms as opposed to 1 second as in https://github.com/go-debos/debos/blob/78aad24dc068ec2aac0355c165f760b953379b8f/actions/image_partition_action.go#L668 because the race condition should immediately resolve without waiting at all. I still sleep for 200 ms as this is what util-linux does to prevent spinning ( https://github.com/util-linux/util-linux/commit/3ff6fb802de1efafbd90af228f91461691ac190c ). Fixes: https://github.com/go-debos/debos/issues/522 --- actions/image_partition_action.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/actions/image_partition_action.go b/actions/image_partition_action.go index bee910e..180971b 100644 --- a/actions/image_partition_action.go +++ b/actions/image_partition_action.go @@ -459,7 +459,17 @@ func (i *ImagePartitionAction) PreNoMachine(context *debos.DebosContext) error { img.Close() - i.loopDev, err = losetup.Attach(imagePath, 0, false) + // losetup.Attach() can fail due to concurrent attaches in other processes + retries := 60 + for t := 1; t <= retries; t++ { + i.loopDev, err = losetup.Attach(imagePath, 0, false) + if err == nil { + break + } + log.Printf("Setup loop device: try %d/%d failed: %v", t, retries, err) + time.Sleep(200 * time.Millisecond) + } + if err != nil { return fmt.Errorf("Failed to setup loop device") } From db6b23c20a965aa8e91b9e1fd04c5d59b555d1bf Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Mon, 2 Dec 2024 11:35:00 +0100 Subject: [PATCH 2/2] ImagePartitionAction: return specific error on failure go-losetup v2 now returns a meaningful error. Add it to the returned error message. --- actions/image_partition_action.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/actions/image_partition_action.go b/actions/image_partition_action.go index 180971b..0054c34 100644 --- a/actions/image_partition_action.go +++ b/actions/image_partition_action.go @@ -471,7 +471,7 @@ func (i *ImagePartitionAction) PreNoMachine(context *debos.DebosContext) error { } if err != nil { - return fmt.Errorf("Failed to setup loop device") + return fmt.Errorf("Failed to setup loop device: %v", err) } context.Image = i.loopDev.Path() i.usingLoop = true