Merge pull request #42957 from thaJeztah/20.10_backport_rootless_quotes

[20.10 backport] rootless script fixes
diff --git a/builder/builder-next/adapters/containerimage/pull.go b/builder/builder-next/adapters/containerimage/pull.go
index 6b791f1..7a5a4e1 100644
--- a/builder/builder-next/adapters/containerimage/pull.go
+++ b/builder/builder-next/adapters/containerimage/pull.go
@@ -22,6 +22,7 @@
 	"github.com/containerd/containerd/remotes/docker"
 	"github.com/containerd/containerd/remotes/docker/schema1"
 	distreference "github.com/docker/distribution/reference"
+	dimages "github.com/docker/docker/daemon/images"
 	"github.com/docker/docker/distribution"
 	"github.com/docker/docker/distribution/metadata"
 	"github.com/docker/docker/distribution/xfer"
@@ -854,11 +855,11 @@
 }
 
 func platformMatches(img *image.Image, p *ocispec.Platform) bool {
-	if img.Architecture != p.Architecture {
-		return false
-	}
-	if img.Variant != "" && img.Variant != p.Variant {
-		return false
-	}
-	return img.OS == p.OS
+	return dimages.OnlyPlatformWithFallback(*p).Match(ocispec.Platform{
+		Architecture: img.Architecture,
+		OS:           img.OS,
+		OSVersion:    img.OSVersion,
+		OSFeatures:   img.OSFeatures,
+		Variant:      img.Variant,
+	})
 }
diff --git a/daemon/container_operations_unix.go b/daemon/container_operations_unix.go
index 1647df0..6a50b99 100644
--- a/daemon/container_operations_unix.go
+++ b/daemon/container_operations_unix.go
@@ -3,13 +3,11 @@
 package daemon // import "github.com/docker/docker/daemon"
 
 import (
-	"context"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
-	"time"
 
 	"github.com/docker/docker/container"
 	"github.com/docker/docker/daemon/links"
@@ -336,38 +334,32 @@
 	}
 }
 
-func killProcessDirectly(cntr *container.Container) error {
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+func killProcessDirectly(container *container.Container) error {
+	pid := container.GetPID()
+	// Ensure that we don't kill ourselves
+	if pid == 0 {
+		return nil
+	}
 
-	// Block until the container to stops or timeout.
-	status := <-cntr.Wait(ctx, container.WaitConditionNotRunning)
-	if status.Err() != nil {
-		// Ensure that we don't kill ourselves
-		if pid := cntr.GetPID(); pid != 0 {
-			logrus.Infof("Container %s failed to exit within 10 seconds of kill - trying direct SIGKILL", stringid.TruncateID(cntr.ID))
-			if err := unix.Kill(pid, 9); err != nil {
-				if err != unix.ESRCH {
-					return err
-				}
-				e := errNoSuchProcess{pid, 9}
-				logrus.Debug(e)
-				return e
-			}
+	if err := unix.Kill(pid, 9); err != nil {
+		if err != unix.ESRCH {
+			return err
+		}
+		e := errNoSuchProcess{pid, 9}
+		logrus.WithError(e).WithField("container", container.ID).Debug("no such process")
+		return e
+	}
 
-			// In case there were some exceptions(e.g., state of zombie and D)
-			if system.IsProcessAlive(pid) {
-
-				// Since we can not kill a zombie pid, add zombie check here
-				isZombie, err := system.IsProcessZombie(pid)
-				if err != nil {
-					logrus.Warnf("Container %s state is invalid", stringid.TruncateID(cntr.ID))
-					return err
-				}
-				if isZombie {
-					return errdefs.System(errors.Errorf("container %s PID %d is zombie and can not be killed. Use the --init option when creating containers to run an init inside the container that forwards signals and reaps processes", stringid.TruncateID(cntr.ID), pid))
-				}
-			}
+	// In case there were some exceptions(e.g., state of zombie and D)
+	if system.IsProcessAlive(pid) {
+		// Since we can not kill a zombie pid, add zombie check here
+		isZombie, err := system.IsProcessZombie(pid)
+		if err != nil {
+			logrus.WithError(err).WithField("container", container.ID).Warn("Container state is invalid")
+			return err
+		}
+		if isZombie {
+			return errdefs.System(errors.Errorf("container %s PID %d is zombie and can not be killed. Use the --init option when creating containers to run an init inside the container that forwards signals and reaps processes", stringid.TruncateID(container.ID), pid))
 		}
 	}
 	return nil
diff --git a/daemon/kill.go b/daemon/kill.go
index da8e723..cb374b2 100644
--- a/daemon/kill.go
+++ b/daemon/kill.go
@@ -139,29 +139,22 @@
 
 	// 1. Send SIGKILL
 	if err := daemon.killPossiblyDeadProcess(container, int(syscall.SIGKILL)); err != nil {
-		// While normally we might "return err" here we're not going to
-		// because if we can't stop the container by this point then
-		// it's probably because it's already stopped. Meaning, between
-		// the time of the IsRunning() call above and now it stopped.
-		// Also, since the err return will be environment specific we can't
-		// look for any particular (common) error that would indicate
-		// that the process is already dead vs something else going wrong.
-		// So, instead we'll give it up to 2 more seconds to complete and if
-		// by that time the container is still running, then the error
-		// we got is probably valid and so we return it to the caller.
+		// kill failed, check if process is no longer running.
 		if isErrNoSuchProcess(err) {
 			return nil
 		}
-
-		ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-		defer cancel()
-
-		if status := <-container.Wait(ctx, containerpkg.WaitConditionNotRunning); status.Err() != nil {
-			return err
-		}
 	}
 
-	// 2. Wait for the process to die, in last resort, try to kill the process directly
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	status := <-container.Wait(ctx, containerpkg.WaitConditionNotRunning)
+	if status.Err() == nil {
+		return nil
+	}
+
+	logrus.WithError(status.Err()).WithField("container", container.ID).Error("Container failed to exit within 10 seconds of kill - trying direct SIGKILL")
+
 	if err := killProcessDirectly(container); err != nil {
 		if isErrNoSuchProcess(err) {
 			return nil
@@ -169,10 +162,13 @@
 		return err
 	}
 
-	// Wait for exit with no timeout.
-	// Ignore returned status.
-	<-container.Wait(context.Background(), containerpkg.WaitConditionNotRunning)
+	// wait for container to exit one last time, if it doesn't then kill didnt work, so return error
+	ctx2, cancel2 := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel2()
 
+	if status := <-container.Wait(ctx2, containerpkg.WaitConditionNotRunning); status.Err() != nil {
+		return errors.New("tried to kill container, but did not receive an exit event")
+	}
 	return nil
 }
 
diff --git a/daemon/stop.go b/daemon/stop.go
index c3ac090..5d4c214 100644
--- a/daemon/stop.go
+++ b/daemon/stop.go
@@ -38,52 +38,64 @@
 
 // containerStop sends a stop signal, waits, sends a kill signal.
 func (daemon *Daemon) containerStop(container *containerpkg.Container, seconds int) error {
+	// TODO propagate a context down to this function
+	ctx := context.TODO()
 	if !container.IsRunning() {
 		return nil
 	}
-
-	stopSignal := container.StopSignal()
-	// 1. Send a stop signal
-	if err := daemon.killPossiblyDeadProcess(container, stopSignal); err != nil {
-		// While normally we might "return err" here we're not going to
-		// because if we can't stop the container by this point then
-		// it's probably because it's already stopped. Meaning, between
-		// the time of the IsRunning() call above and now it stopped.
-		// Also, since the err return will be environment specific we can't
-		// look for any particular (common) error that would indicate
-		// that the process is already dead vs something else going wrong.
-		// So, instead we'll give it up to 2 more seconds to complete and if
-		// by that time the container is still running, then the error
-		// we got is probably valid and so we force kill it.
-		ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-		defer cancel()
-
-		if status := <-container.Wait(ctx, containerpkg.WaitConditionNotRunning); status.Err() != nil {
-			logrus.Infof("Container failed to stop after sending signal %d to the process, force killing", stopSignal)
-			if err := daemon.killPossiblyDeadProcess(container, 9); err != nil {
-				return err
-			}
-		}
-	}
-
-	// 2. Wait for the process to exit on its own
-	ctx := context.Background()
+	var wait time.Duration
 	if seconds >= 0 {
-		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, time.Duration(seconds)*time.Second)
+		wait = time.Duration(seconds) * time.Second
+	}
+	success := func() error {
+		daemon.LogContainerEvent(container, "stop")
+		return nil
+	}
+	stopSignal := container.StopSignal()
+
+	// 1. Send a stop signal
+	err := daemon.killPossiblyDeadProcess(container, stopSignal)
+	if err != nil {
+		wait = 2 * time.Second
+	}
+
+	var subCtx context.Context
+	var cancel context.CancelFunc
+	if seconds >= 0 {
+		subCtx, cancel = context.WithTimeout(ctx, wait)
+	} else {
+		subCtx, cancel = context.WithCancel(ctx)
+	}
+	defer cancel()
+
+	if status := <-container.Wait(subCtx, containerpkg.WaitConditionNotRunning); status.Err() == nil {
+		// container did exit, so ignore any previous errors and return
+		return success()
+	}
+
+	if err != nil {
+		// the container has still not exited, and the kill function errored, so log the error here:
+		logrus.WithError(err).WithField("container", container.ID).Errorf("Error sending stop (signal %d) to container", stopSignal)
+	}
+	if seconds < 0 {
+		// if the client requested that we never kill / wait forever, but container.Wait was still
+		// interrupted (parent context cancelled, for example), we should propagate the signal failure
+		return err
+	}
+
+	logrus.WithField("container", container.ID).Infof("Container failed to exit within %s of signal %d - using the force", wait, stopSignal)
+	// Stop either failed or container didnt exit, so fallback to kill.
+	if err := daemon.Kill(container); err != nil {
+		// got a kill error, but give container 2 more seconds to exit just in case
+		subCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
 		defer cancel()
-	}
-
-	if status := <-container.Wait(ctx, containerpkg.WaitConditionNotRunning); status.Err() != nil {
-		logrus.Infof("Container %v failed to exit within %d seconds of signal %d - using the force", container.ID, seconds, stopSignal)
-		// 3. If it doesn't, then send SIGKILL
-		if err := daemon.Kill(container); err != nil {
-			// Wait without a timeout, ignore result.
-			<-container.Wait(context.Background(), containerpkg.WaitConditionNotRunning)
-			logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
+		if status := <-container.Wait(subCtx, containerpkg.WaitConditionNotRunning); status.Err() == nil {
+			// container did exit, so ignore error and return
+			return success()
 		}
+		logrus.WithError(err).WithField("container", container.ID).Error("Error killing the container")
+		return err
 	}
 
-	daemon.LogContainerEvent(container, "stop")
-	return nil
+	return success()
 }