Merge pull request #46697 from thaJeztah/24.0_backport_restart_nocancel
[24.0 backport] daemon: daemon.containerRestart: don't cancel restart on context cancel
diff --git a/daemon/restart.go b/daemon/restart.go
index 38828ef..d56ce08 100644
--- a/daemon/restart.go
+++ b/daemon/restart.go
@@ -6,6 +6,7 @@
containertypes "github.com/docker/docker/api/types/container"
"github.com/docker/docker/container"
+ "github.com/docker/docker/internal/compatcontext"
)
// ContainerRestart stops and starts a container. It attempts to
@@ -31,6 +32,10 @@
// gracefully stop, before forcefully terminating the container. If
// given a negative duration, wait forever for a graceful stop.
func (daemon *Daemon) containerRestart(ctx context.Context, container *container.Container, options containertypes.StopOptions) error {
+ // Restarting is expected to be an atomic operation, and cancelling
+ // the request should not cancel the stop -> start sequence.
+ ctx = compatcontext.WithoutCancel(ctx)
+
// Determine isolation. If not specified in the hostconfig, use daemon default.
actualIsolation := container.HostConfig.Isolation
if containertypes.Isolation.IsDefault(actualIsolation) {
diff --git a/integration/container/restart_test.go b/integration/container/restart_test.go
index 3a61550..a7aab25 100644
--- a/integration/container/restart_test.go
+++ b/integration/container/restart_test.go
@@ -3,15 +3,18 @@
import (
"context"
"fmt"
+ "runtime"
"testing"
"time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
+ "github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
testContainer "github.com/docker/docker/integration/internal/container"
"github.com/docker/docker/testutil/daemon"
"gotest.tools/v3/assert"
+ is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/poll"
"gotest.tools/v3/skip"
)
@@ -208,3 +211,72 @@
})
}
}
+
+// TestContainerRestartWithCancelledRequest verifies that cancelling a restart
+// request does not cancel the restart operation, and still starts the container
+// after it was stopped.
+//
+// Regression test for https://github.com/moby/moby/discussions/46682
+func TestContainerRestartWithCancelledRequest(t *testing.T) {
+ ctx := context.TODO()
+ defer setupTest(t)()
+ apiClient := testEnv.APIClient()
+
+ // Create a container that ignores SIGTERM and doesn't stop immediately,
+ // giving us time to cancel the request.
+ //
+ // Restarting a container is "stop" (and, if needed, "kill"), then "start"
+ // the container. We're trying to create the scenario where the "stop" is
+ // handled, but the request was cancelled and therefore the "start" not
+ // taking place.
+ cID := testContainer.Run(ctx, t, apiClient, testContainer.WithCmd("sh", "-c", "trap 'echo received TERM' TERM; while true; do usleep 10; done"))
+ defer func() {
+ err := apiClient.ContainerRemove(ctx, cID, types.ContainerRemoveOptions{Force: true})
+ if t.Failed() && err != nil {
+ t.Logf("Cleaning up test container failed with error: %v", err)
+ }
+ }()
+
+ // Start listening for events.
+ messages, errs := apiClient.Events(ctx, types.EventsOptions{
+ Filters: filters.NewArgs(
+ filters.Arg("container", cID),
+ filters.Arg("event", "restart"),
+ ),
+ })
+
+ // Make restart request, but cancel the request before the container
+ // is (forcibly) killed.
+ ctx2, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
+ stopTimeout := 1
+ err := apiClient.ContainerRestart(ctx2, cID, container.StopOptions{
+ Timeout: &stopTimeout,
+ })
+ assert.Check(t, is.ErrorIs(err, context.DeadlineExceeded))
+ cancel()
+
+ // Validate that the restart event occurred, which is emitted
+ // after the restart (stop (kill) start) finished.
+ //
+ // Note that we cannot use RestartCount for this, as that's only
+ // used for restart-policies.
+ restartTimeout := 2 * time.Second
+ if runtime.GOOS == "windows" {
+ // hcs can sometimes take a long time to stop container.
+ restartTimeout = StopContainerWindowsPollTimeout
+ }
+ select {
+ case m := <-messages:
+ assert.Check(t, is.Equal(m.Actor.ID, cID))
+ assert.Check(t, is.Equal(m.Action, "restart"))
+ case err := <-errs:
+ assert.NilError(t, err)
+ case <-time.After(restartTimeout):
+ t.Errorf("timeout waiting for restart event")
+ }
+
+ // Container should be restarted (running).
+ inspect, err := apiClient.ContainerInspect(ctx, cID)
+ assert.NilError(t, err)
+ assert.Check(t, is.Equal(inspect.State.Status, "running"))
+}