| package daemon |
| |
| import ( |
| "context" |
| "time" |
| |
| containerd "github.com/containerd/containerd/v2/client" |
| "github.com/containerd/containerd/v2/core/containers" |
| "github.com/containerd/log" |
| "github.com/moby/moby/api/types/events" |
| "github.com/moby/moby/v2/daemon/container" |
| mobyc8dstore "github.com/moby/moby/v2/daemon/containerd" |
| "github.com/moby/moby/v2/daemon/internal/libcontainerd" |
| "github.com/moby/moby/v2/daemon/internal/metrics" |
| "github.com/moby/moby/v2/daemon/internal/otelutil" |
| "github.com/moby/moby/v2/daemon/server/backend" |
| "github.com/moby/moby/v2/errdefs" |
| "github.com/pkg/errors" |
| "go.opentelemetry.io/otel" |
| "go.opentelemetry.io/otel/attribute" |
| "go.opentelemetry.io/otel/trace" |
| ) |
| |
| // validateState verifies if the container is in a non-conflicting state. |
| func validateState(ctr *container.Container) error { |
| ctr.Lock() |
| defer ctr.Unlock() |
| |
| // Intentionally checking paused first, because a container can be |
| // BOTH running AND paused. To start a paused (but running) container, |
| // it must be thawed ("un-paused"). |
| if ctr.State.Paused { |
| return errdefs.Conflict(errors.New("cannot start a paused container, try unpause instead")) |
| } else if ctr.State.Running { |
| // This is not an actual error, but produces a 304 "not modified" |
| // when returned through the API to indicates the container is |
| // already in the desired state. It's implemented as an error |
| // to make the code calling this function terminate early (as |
| // no further processing is needed). |
| return errdefs.NotModified(errors.New("container is already running")) |
| } |
| if ctr.State.RemovalInProgress || ctr.State.Dead { |
| return errdefs.Conflict(errors.New("container is marked for removal and cannot be started")) |
| } |
| return nil |
| } |
| |
| // ContainerStart starts a container. |
| func (daemon *Daemon) ContainerStart(ctx context.Context, name string, checkpoint string, checkpointDir string) error { |
| daemonCfg := daemon.config() |
| if checkpoint != "" && !daemonCfg.Experimental { |
| return errdefs.InvalidParameter(errors.New("checkpoint is only supported in experimental mode")) |
| } |
| |
| ctr, err := daemon.GetContainer(name) |
| if err != nil { |
| return err |
| } |
| if err := validateState(ctr); err != nil { |
| return err |
| } |
| |
| // check if hostConfig is in line with the current system settings. |
| // It may happen cgroups are unmounted or the like. |
| if _, err = daemon.verifyContainerSettings(daemonCfg, ctr.HostConfig, nil, false); err != nil { |
| return errdefs.InvalidParameter(err) |
| } |
| |
| return daemon.containerStart(ctx, daemonCfg, ctr, checkpoint, checkpointDir, true) |
| } |
| |
| // containerStart prepares the container to run by setting up everything the |
| // container needs, such as storage and networking, as well as links |
| // between containers. The container is left waiting for a signal to |
| // begin running. |
| func (daemon *Daemon) containerStart(ctx context.Context, daemonCfg *configStore, container *container.Container, checkpoint string, checkpointDir string, resetRestartManager bool) (retErr error) { |
| ctx, span := otel.Tracer("").Start(ctx, "daemon.containerStart", trace.WithAttributes(append( |
| labelsAsOTelAttributes(container.Config.Labels), |
| attribute.String("container.ID", container.ID), |
| attribute.String("container.Name", container.Name), |
| )...)) |
| defer func() { |
| otelutil.RecordStatus(span, retErr) |
| span.End() |
| }() |
| |
| start := time.Now() |
| container.Lock() |
| defer container.Unlock() |
| |
| if resetRestartManager && container.State.Running { // skip this check if already in restarting step and resetRestartManager==false |
| return nil |
| } |
| |
| if container.State.RemovalInProgress || container.State.Dead { |
| return errdefs.Conflict(errors.New("container is marked for removal and cannot be started")) |
| } |
| |
| if checkpointDir != "" { |
| // TODO(mlaventure): how would we support that? |
| return errdefs.Forbidden(errors.New("custom checkpointdir is not supported")) |
| } |
| |
| // if we encounter an error during start we need to ensure that any other |
| // setup has been cleaned up properly |
| defer func() { |
| if retErr != nil { |
| container.State.SetError(retErr) |
| // if no one else has set it, make sure we don't leave it at zero |
| if container.State.ExitCode == 0 { |
| container.State.SetExitCode(exitUnknown) |
| } |
| if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil { |
| log.G(ctx).Errorf("%s: failed saving state on start failure: %v", container.ID, err) |
| } |
| container.Reset(false) |
| |
| daemon.Cleanup(context.WithoutCancel(ctx), container) |
| // if containers AutoRemove flag is set, remove it after clean up |
| if container.HostConfig.AutoRemove { |
| container.Unlock() |
| if err := daemon.containerRm(&daemonCfg.Config, container.ID, &backend.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil { |
| log.G(ctx).Errorf("can't remove container %s: %v", container.ID, err) |
| } |
| container.Lock() |
| } |
| } |
| }() |
| |
| if err := daemon.conditionalMountOnStart(container); err != nil { |
| return err |
| } |
| |
| newSandbox, err := daemon.initializeNetworking(ctx, &daemonCfg.Config, container) |
| if err != nil { |
| return err |
| } |
| defer func() { |
| if retErr != nil && newSandbox != nil { |
| if err := newSandbox.Delete(ctx); err != nil { |
| log.G(ctx).WithFields(log.Fields{ |
| "error": err, |
| "container": container.ID, |
| }).Warn("After failure in networking initialisation, failed to remove sandbox") |
| } |
| } |
| }() |
| |
| mnts, err := daemon.setupContainerDirs(container) |
| if err != nil { |
| return err |
| } |
| |
| m, cleanup, err := daemon.setupMounts(ctx, container) |
| if err != nil { |
| return err |
| } |
| mnts = append(mnts, m...) |
| defer cleanup(context.WithoutCancel(ctx)) |
| |
| spec, err := daemon.createSpec(ctx, daemonCfg, container, mnts) |
| if err != nil { |
| // Any error that occurs while creating the spec, even if it's the |
| // result of an invalid container config, must be considered a System |
| // error (internal server error), as it's not an error with the request |
| // to start the container. |
| // |
| // Invalid configuration in the config itself must be validated when |
| // creating the container (creating its config), but some errors are |
| // dependent on the current state, for example when starting a container |
| // that shares a namespace with another container, and that container |
| // is not running (or missing). |
| return errdefs.System(err) |
| } |
| |
| if resetRestartManager { |
| container.ResetRestartManager(true) |
| container.HasBeenManuallyStopped = false |
| } |
| |
| if err := daemon.saveAppArmorConfig(container); err != nil { |
| return err |
| } |
| |
| if checkpoint != "" { |
| checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false) |
| if err != nil { |
| return err |
| } |
| } |
| |
| shim, createOptions, err := daemon.getLibcontainerdCreateOptions(daemonCfg, container) |
| if err != nil { |
| return err |
| } |
| |
| ctr, err := libcontainerd.ReplaceContainer(ctx, daemon.containerd, container.ID, spec, shim, createOptions, func(ctx context.Context, client *containerd.Client, c *containers.Container) error { |
| // Only set the image if we are using containerd for image storage. |
| // This is for metadata purposes only. |
| // Other lower-level components may make use of this information. |
| is, ok := daemon.imageService.(*mobyc8dstore.ImageService) |
| if !ok { |
| return nil |
| } |
| img, err := is.ResolveImage(ctx, container.Config.Image) |
| if err != nil { |
| log.G(ctx).WithError(err).WithField("container", container.ID).Warn("Failed to resolve containerd image reference") |
| return nil |
| } |
| c.Image = img.Name |
| return nil |
| }) |
| if err != nil { |
| return setExitCodeFromError(container.State.SetExitCode, err) |
| } |
| defer func() { |
| if retErr != nil { |
| if err := ctr.Delete(context.WithoutCancel(ctx)); err != nil { |
| log.G(ctx).WithError(err).WithField("container", container.ID). |
| Error("failed to delete failed start container") |
| } |
| } |
| }() |
| |
| startupTime := time.Now() |
| // TODO(mlaventure): we need to specify checkpoint options here |
| tsk, err := ctr.NewTask(context.WithoutCancel(ctx), // passing a cancelable ctx caused integration tests to be stuck in the cleanup phase |
| checkpointDir, container.StreamConfig.Stdin() != nil || container.Config.Tty, |
| container.InitializeStdio) |
| if err != nil { |
| return setExitCodeFromError(container.State.SetExitCode, err) |
| } |
| defer func() { |
| if retErr != nil { |
| if err := tsk.ForceDelete(context.WithoutCancel(ctx)); err != nil { |
| log.G(ctx).WithError(err).WithField("container", container.ID). |
| Error("failed to delete task after fail start") |
| } |
| } |
| }() |
| |
| if err := daemon.initializeCreatedTask(ctx, &daemonCfg.Config, tsk, container, spec); err != nil { |
| return err |
| } |
| |
| if err := tsk.Start(context.WithoutCancel(ctx)); err != nil { // passing a cancelable ctx caused integration tests to be stuck in the cleanup phase |
| return setExitCodeFromError(container.State.SetExitCode, err) |
| } |
| |
| container.HasBeenManuallyRestarted = false |
| container.State.SetRunning(ctr, tsk, startupTime) |
| container.HasBeenStartedBefore = true |
| daemon.setStateCounter(container) |
| |
| daemon.initHealthMonitor(container) |
| |
| if err := container.CheckpointTo(context.WithoutCancel(ctx), daemon.containersReplica); err != nil { |
| log.G(ctx).WithError(err).WithField("container", container.ID). |
| Errorf("failed to store container") |
| } |
| |
| daemon.LogContainerEvent(container, events.ActionStart) |
| metrics.ContainerActions.WithValues("start").UpdateSince(start) |
| |
| return nil |
| } |
| |
| // Cleanup releases any network resources allocated to the container along with any rules |
| // around how containers are linked together. It also unmounts the container's root filesystem. |
| func (daemon *Daemon) Cleanup(ctx context.Context, container *container.Container) { |
| // Microsoft HCS containers get in a bad state if host resources are |
| // released while the container still exists. |
| if ctr, ok := container.State.C8dContainer(); ok { |
| if err := ctr.Delete(context.Background()); err != nil { |
| log.G(ctx).Errorf("%s cleanup: failed to delete container from containerd: %v", container.ID, err) |
| } |
| } |
| |
| daemon.releaseNetwork(ctx, container) |
| |
| if err := container.UnmountIpcMount(); err != nil { |
| log.G(ctx).Warnf("%s cleanup: failed to unmount IPC: %s", container.ID, err) |
| } |
| |
| if err := daemon.conditionalUnmountOnCleanup(container); err != nil { |
| // FIXME: remove once reference counting for graphdrivers has been refactored |
| // Ensure that all the mounts are gone |
| if mountid, err := daemon.imageService.GetLayerMountID(container.ID); err == nil { |
| daemon.cleanupMountsByID(mountid) |
| } |
| } |
| |
| if err := container.UnmountSecrets(); err != nil { |
| log.G(ctx).Warnf("%s cleanup: failed to unmount secrets: %s", container.ID, err) |
| } |
| |
| if err := recursiveUnmount(container.Root); err != nil { |
| log.G(ctx).WithError(err).WithField("container", container.ID).Warn("Error while cleaning up container resource mounts.") |
| } |
| |
| for _, eConfig := range container.ExecCommands.Commands() { |
| daemon.unregisterExecCommand(container, eConfig) |
| } |
| |
| if container.BaseFS != "" { |
| if err := container.UnmountVolumes(ctx, daemon.LogVolumeEvent); err != nil { |
| log.G(ctx).Warnf("%s cleanup: Failed to umount volumes: %v", container.ID, err) |
| } |
| } |
| |
| container.CancelAttachContext() |
| } |