Merge pull request #34999 from kolyshkin/wait-on-rm

ContainerWait on remove: don't stuck on rm fail
diff --git a/api/server/router/container/container_routes.go b/api/server/router/container/container_routes.go
index 95bbe0e..106a708 100644
--- a/api/server/router/container/container_routes.go
+++ b/api/server/router/container/container_routes.go
@@ -280,11 +280,12 @@
 	// Behavior changed in version 1.30 to handle wait condition and to
 	// return headers immediately.
 	version := httputils.VersionFromContext(ctx)
-	legacyBehavior := versions.LessThan(version, "1.30")
+	legacyBehaviorPre130 := versions.LessThan(version, "1.30")
+	legacyRemovalWaitPre134 := false
 
 	// The wait condition defaults to "not-running".
 	waitCondition := containerpkg.WaitConditionNotRunning
-	if !legacyBehavior {
+	if !legacyBehaviorPre130 {
 		if err := httputils.ParseForm(r); err != nil {
 			return err
 		}
@@ -293,6 +294,7 @@
 			waitCondition = containerpkg.WaitConditionNextExit
 		case container.WaitConditionRemoved:
 			waitCondition = containerpkg.WaitConditionRemoved
+			legacyRemovalWaitPre134 = versions.LessThan(version, "1.34")
 		}
 	}
 
@@ -306,7 +308,7 @@
 
 	w.Header().Set("Content-Type", "application/json")
 
-	if !legacyBehavior {
+	if !legacyBehaviorPre130 {
 		// Write response header immediately.
 		w.WriteHeader(http.StatusOK)
 		if flusher, ok := w.(http.Flusher); ok {
@@ -317,8 +319,22 @@
 	// Block on the result of the wait operation.
 	status := <-waitC
 
+	// With API < 1.34, wait on WaitConditionRemoved did not return
+	// in case container removal failed. The only way to report an
+	// error back to the client is to not write anything (i.e. send
+	// an empty response which will be treated as an error).
+	if legacyRemovalWaitPre134 && status.Err() != nil {
+		return nil
+	}
+
+	var waitError *container.ContainerWaitOKBodyError
+	if status.Err() != nil {
+		waitError = &container.ContainerWaitOKBodyError{Message: status.Err().Error()}
+	}
+
 	return json.NewEncoder(w).Encode(&container.ContainerWaitOKBody{
 		StatusCode: int64(status.ExitCode()),
+		Error:      waitError,
 	})
 }
 
diff --git a/api/swagger.yaml b/api/swagger.yaml
index c3b9c29..b0c0575 100644
--- a/api/swagger.yaml
+++ b/api/swagger.yaml
@@ -5723,6 +5723,13 @@
                 description: "Exit code of the container"
                 type: "integer"
                 x-nullable: false
+              Error:
+                description: "container waiting error, if any"
+                type: "object"
+                properties:
+                  Message:
+                    description: "Details of an error"
+                    type: "string"
         404:
           description: "no such container"
           schema:
diff --git a/api/types/container/container_wait.go b/api/types/container/container_wait.go
index 77ecdba..47fb175 100644
--- a/api/types/container/container_wait.go
+++ b/api/types/container/container_wait.go
@@ -7,10 +7,22 @@
 // See hack/generate-swagger-api.sh
 // ----------------------------------------------------------------------------
 
+// ContainerWaitOKBodyError container waiting error, if any
+// swagger:model ContainerWaitOKBodyError
+type ContainerWaitOKBodyError struct {
+
+	// Details of an error
+	Message string `json:"Message,omitempty"`
+}
+
 // ContainerWaitOKBody container wait o k body
 // swagger:model ContainerWaitOKBody
 type ContainerWaitOKBody struct {
 
+	// error
+	// Required: true
+	Error *ContainerWaitOKBodyError `json:"Error"`
+
 	// Exit code of the container
 	// Required: true
 	StatusCode int64 `json:"StatusCode"`
diff --git a/container/state.go b/container/state.go
index 1a4c45c..3af4015 100644
--- a/container/state.go
+++ b/container/state.go
@@ -29,7 +29,7 @@
 	Dead              bool
 	Pid               int
 	ExitCodeValue     int    `json:"ExitCode"`
-	ErrorMsg          string `json:"Error"` // contains last known error when starting the container
+	ErrorMsg          string `json:"Error"` // contains last known error during container start or remove
 	StartedAt         time.Time
 	FinishedAt        time.Time
 	Health            *Health
@@ -326,7 +326,10 @@
 // know the error that occurred when container transits to another state
 // when inspecting it
 func (s *State) SetError(err error) {
-	s.ErrorMsg = err.Error()
+	s.ErrorMsg = ""
+	if err != nil {
+		s.ErrorMsg = err.Error()
+	}
 }
 
 // IsPaused returns whether the container is paused or not.
@@ -392,8 +395,18 @@
 // closes the internal waitRemove channel to unblock callers waiting for a
 // container to be removed.
 func (s *State) SetRemoved() {
+	s.SetRemovalError(nil)
+}
+
+// SetRemovalError is to be called in case a container remove failed.
+// It sets an error and closes the internal waitRemove channel to unblock
+// callers waiting for the container to be removed.
+func (s *State) SetRemovalError(err error) {
+	s.SetError(err)
 	s.Lock()
 	close(s.waitRemove) // Unblock those waiting on remove.
+	// Recreate the channel so next ContainerWait will work
+	s.waitRemove = make(chan struct{})
 	s.Unlock()
 }
 
diff --git a/daemon/delete.go b/daemon/delete.go
index 6db08f3..4d56d14 100644
--- a/daemon/delete.go
+++ b/daemon/delete.go
@@ -120,12 +120,16 @@
 		metadata, err := daemon.stores[container.OS].layerStore.ReleaseRWLayer(container.RWLayer)
 		layer.LogReleaseMetadata(metadata)
 		if err != nil && err != layer.ErrMountDoesNotExist && !os.IsNotExist(errors.Cause(err)) {
-			return errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID)
+			e := errors.Wrapf(err, "driver %q failed to remove root filesystem for %s", daemon.GraphDriverName(container.OS), container.ID)
+			container.SetRemovalError(e)
+			return e
 		}
 	}
 
 	if err := system.EnsureRemoveAll(container.Root); err != nil {
-		return errors.Wrapf(err, "unable to remove filesystem for %s", container.ID)
+		e := errors.Wrapf(err, "unable to remove filesystem for %s", container.ID)
+		container.SetRemovalError(e)
+		return e
 	}
 
 	linkNames := daemon.linkIndex.delete(container)
diff --git a/docs/api/version-history.md b/docs/api/version-history.md
index 9ac31dc..77b8545 100644
--- a/docs/api/version-history.md
+++ b/docs/api/version-history.md
@@ -17,12 +17,19 @@
 
 [Docker Engine API v1.34](https://docs.docker.com/engine/api/v1.34/) documentation
 
+* `POST /containers/(name)/wait?condition=removed` now also also returns
+  in case of container removal failure. A pointer to a structure named
+  `Error` added to the response JSON in order to indicate a failure.
+  If `Error` is `null`, container removal has succeeded, otherwise
+  the test of an error message indicating why container removal has failed
+  is available from `Error.Message` field.
+
 ## v1.33 API changes
 
 [Docker Engine API v1.33](https://docs.docker.com/engine/api/v1.33/) documentation
 
 * `GET /events` now supports filtering 4 more kinds of events: `config`, `node`,
-`secret` and `service`. 
+`secret` and `service`.
 
 ## v1.32 API changes