Merge pull request #35805 from cpuguy83/fix_kill_containers_on_restart

Ensure containers are stopped on daemon startup
diff --git a/daemon/daemon.go b/daemon/daemon.go
index e63e209..7de6580 100644
--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@@ -247,6 +247,11 @@
 					logrus.WithError(err).Errorf("Failed to delete container %s from containerd", c.ID)
 					return
 				}
+			} else if !daemon.configStore.LiveRestoreEnabled {
+				if err := daemon.kill(c, c.StopSignal()); err != nil && !errdefs.IsNotFound(err) {
+					logrus.WithError(err).WithField("container", c.ID).Error("error shutting down container")
+					return
+				}
 			}
 
 			if c.IsRunning() || c.IsPaused() {
@@ -317,24 +322,24 @@
 					activeSandboxes[c.NetworkSettings.SandboxID] = options
 					mapLock.Unlock()
 				}
-			} else {
-				// get list of containers we need to restart
+			}
 
-				// Do not autostart containers which
-				// has endpoints in a swarm scope
-				// network yet since the cluster is
-				// not initialized yet. We will start
-				// it after the cluster is
-				// initialized.
-				if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
-					mapLock.Lock()
-					restartContainers[c] = make(chan struct{})
-					mapLock.Unlock()
-				} else if c.HostConfig != nil && c.HostConfig.AutoRemove {
-					mapLock.Lock()
-					removeContainers[c.ID] = c
-					mapLock.Unlock()
-				}
+			// get list of containers we need to restart
+
+			// Do not autostart containers which
+			// has endpoints in a swarm scope
+			// network yet since the cluster is
+			// not initialized yet. We will start
+			// it after the cluster is
+			// initialized.
+			if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
+				mapLock.Lock()
+				restartContainers[c] = make(chan struct{})
+				mapLock.Unlock()
+			} else if c.HostConfig != nil && c.HostConfig.AutoRemove {
+				mapLock.Lock()
+				removeContainers[c.ID] = c
+				mapLock.Unlock()
 			}
 
 			c.Lock()
diff --git a/integration/container/restart_test.go b/integration/container/restart_test.go
new file mode 100644
index 0000000..fe80f09
--- /dev/null
+++ b/integration/container/restart_test.go
@@ -0,0 +1,112 @@
+package container
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/docker/docker/api/types"
+	"github.com/docker/docker/api/types/container"
+	"github.com/docker/docker/integration-cli/daemon"
+)
+
+func TestDaemonRestartKillContainers(t *testing.T) {
+	type testCase struct {
+		desc       string
+		config     *container.Config
+		hostConfig *container.HostConfig
+
+		xRunning            bool
+		xRunningLiveRestore bool
+	}
+
+	for _, c := range []testCase{
+		{
+			desc:                "container without restart policy",
+			config:              &container.Config{Image: "busybox", Cmd: []string{"top"}},
+			xRunningLiveRestore: true,
+		},
+		{
+			desc:                "container with restart=always",
+			config:              &container.Config{Image: "busybox", Cmd: []string{"top"}},
+			hostConfig:          &container.HostConfig{RestartPolicy: container.RestartPolicy{Name: "always"}},
+			xRunning:            true,
+			xRunningLiveRestore: true,
+		},
+	} {
+		for _, liveRestoreEnabled := range []bool{false, true} {
+			for fnName, stopDaemon := range map[string]func(*testing.T, *daemon.Daemon){
+				"kill-daemon": func(t *testing.T, d *daemon.Daemon) {
+					if err := d.Kill(); err != nil {
+						t.Fatal(err)
+					}
+				},
+				"stop-daemon": func(t *testing.T, d *daemon.Daemon) {
+					d.Stop(t)
+				},
+			} {
+				t.Run(fmt.Sprintf("live-restore=%v/%s/%s", liveRestoreEnabled, c.desc, fnName), func(t *testing.T) {
+					c := c
+					liveRestoreEnabled := liveRestoreEnabled
+					stopDaemon := stopDaemon
+
+					t.Parallel()
+
+					d := daemon.New(t, "", "dockerd", daemon.Config{})
+					client, err := d.NewClient()
+					if err != nil {
+						t.Fatal(err)
+					}
+
+					var args []string
+					if liveRestoreEnabled {
+						args = []string{"--live-restore"}
+					}
+
+					d.StartWithBusybox(t, args...)
+					defer d.Stop(t)
+					ctx := context.Background()
+
+					resp, err := client.ContainerCreate(ctx, c.config, c.hostConfig, nil, "")
+					if err != nil {
+						t.Fatal(err)
+					}
+					defer client.ContainerRemove(ctx, resp.ID, types.ContainerRemoveOptions{Force: true})
+
+					if err := client.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil {
+						t.Fatal(err)
+					}
+
+					stopDaemon(t, d)
+					d.Start(t, args...)
+
+					expected := c.xRunning
+					if liveRestoreEnabled {
+						expected = c.xRunningLiveRestore
+					}
+
+					var running bool
+					for i := 0; i < 30; i++ {
+						inspect, err := client.ContainerInspect(ctx, resp.ID)
+						if err != nil {
+							t.Fatal(err)
+						}
+
+						running = inspect.State.Running
+						if running == expected {
+							break
+						}
+						time.Sleep(2 * time.Second)
+
+					}
+
+					if running != expected {
+						t.Fatalf("got unexpected running state, expected %v, got: %v", expected, running)
+					}
+					// TODO(cpuguy83): test pause states... this seems to be rather undefined currently
+				})
+			}
+		}
+	}
+}