Merge pull request #46697 from thaJeztah/24.0_backport_restart_nocancel

[24.0 backport] daemon: daemon.containerRestart: don't cancel restart on context cancel
diff --git a/daemon/restart.go b/daemon/restart.go
index 38828ef..d56ce08 100644
--- a/daemon/restart.go
+++ b/daemon/restart.go
@@ -6,6 +6,7 @@
 
 	containertypes "github.com/docker/docker/api/types/container"
 	"github.com/docker/docker/container"
+	"github.com/docker/docker/internal/compatcontext"
 )
 
 // ContainerRestart stops and starts a container. It attempts to
@@ -31,6 +32,10 @@
 // gracefully stop, before forcefully terminating the container. If
 // given a negative duration, wait forever for a graceful stop.
 func (daemon *Daemon) containerRestart(ctx context.Context, container *container.Container, options containertypes.StopOptions) error {
+	// Restarting is expected to be an atomic operation, and cancelling
+	// the request should not cancel the stop -> start sequence.
+	ctx = compatcontext.WithoutCancel(ctx)
+
 	// Determine isolation. If not specified in the hostconfig, use daemon default.
 	actualIsolation := container.HostConfig.Isolation
 	if containertypes.Isolation.IsDefault(actualIsolation) {
diff --git a/integration/container/restart_test.go b/integration/container/restart_test.go
index 3a61550..a7aab25 100644
--- a/integration/container/restart_test.go
+++ b/integration/container/restart_test.go
@@ -3,15 +3,18 @@
 import (
 	"context"
 	"fmt"
+	"runtime"
 	"testing"
 	"time"
 
 	"github.com/docker/docker/api/types"
 	"github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/api/types/filters"
 	"github.com/docker/docker/client"
 	testContainer "github.com/docker/docker/integration/internal/container"
 	"github.com/docker/docker/testutil/daemon"
 	"gotest.tools/v3/assert"
+	is "gotest.tools/v3/assert/cmp"
 	"gotest.tools/v3/poll"
 	"gotest.tools/v3/skip"
 )
@@ -208,3 +211,72 @@
 		})
 	}
 }
+
+// TestContainerRestartWithCancelledRequest verifies that cancelling a restart
+// request does not cancel the restart operation, and still starts the container
+// after it was stopped.
+//
+// Regression test for https://github.com/moby/moby/discussions/46682
+func TestContainerRestartWithCancelledRequest(t *testing.T) {
+	ctx := context.TODO()
+	defer setupTest(t)()
+	apiClient := testEnv.APIClient()
+
+	// Create a container that ignores SIGTERM and doesn't stop immediately,
+	// giving us time to cancel the request.
+	//
+	// Restarting a container is "stop" (and, if needed, "kill"), then "start"
+	// the container. We're trying to create the scenario where the "stop" is
+	// handled, but the request was cancelled and therefore the "start" not
+	// taking place.
+	cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
+	defer func() {
+		err := apiClient.ContainerRemove(ctx, cID, types.ContainerRemoveOptions{Force: true})
+		if t.Failed() && err != nil {
+			t.Logf("Cleaning up test container failed with error: %v", err)
+		}
+	}()
+
+	// Start listening for events.
+	messages, errs := apiClient.Events(ctx, types.EventsOptions{
+		Filters: filters.NewArgs(
+			filters.Arg("container", cID),
+			filters.Arg("event", "restart"),
+		),
+	})
+
+	// Make restart request, but cancel the request before the container
+	// is (forcibly) killed.
+	ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
+	stopTimeout := 1
+	err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
+		Timeout: &stopTimeout,
+	})
+	assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
+	cancel()
+
+	// Validate that the restart event occurred, which is emitted
+	// after the restart (stop (kill) start) finished.
+	//
+	// Note that we cannot use RestartCount for this, as that's only
+	// used for restart-policies.
+	restartTimeout := 2 * time.Second
+	if runtime.GOOS == "windows" {
+		// hcs can sometimes take a long time to stop container.
+		restartTimeout = StopContainerWindowsPollTimeout
+	}
+	select {
+	case m := <-messages:
+		assert.Check(t, is.Equal(m.Actor.ID, cID))
+		assert.Check(t, is.Equal(m.Action, "restart"))
+	case err := <-errs:
+		assert.NilError(t, err)
+	case <-time.After(restartTimeout):
+		t.Errorf("timeout waiting for restart event")
+	}
+
+	// Container should be restarted (running).
+	inspect, err := apiClient.ContainerInspect(ctx, cID)
+	assert.NilError(t, err)
+	assert.Check(t, is.Equal(inspect.State.Status, "running"))
+}