| package daemon |
| |
| import ( |
| "context" |
| "errors" |
| "fmt" |
| "os" |
| "strings" |
| "sync" |
| "time" |
| |
| "github.com/containerd/log" |
| "github.com/containerd/platforms" |
| containertypes "github.com/moby/moby/api/types/container" |
| "github.com/moby/moby/api/types/events" |
| networktypes "github.com/moby/moby/api/types/network" |
| "github.com/moby/moby/v2/daemon/config" |
| "github.com/moby/moby/v2/daemon/container" |
| "github.com/moby/moby/v2/daemon/images" |
| "github.com/moby/moby/v2/daemon/internal/image" |
| "github.com/moby/moby/v2/daemon/internal/metrics" |
| "github.com/moby/moby/v2/daemon/internal/multierror" |
| "github.com/moby/moby/v2/daemon/internal/otelutil" |
| "github.com/moby/moby/v2/daemon/server/backend" |
| "github.com/moby/moby/v2/daemon/server/imagebackend" |
| "github.com/moby/moby/v2/errdefs" |
| "github.com/moby/sys/user" |
| ocispec "github.com/opencontainers/image-spec/specs-go/v1" |
| "github.com/opencontainers/selinux/go-selinux" |
| "github.com/tonistiigi/go-archvariant" |
| "go.opentelemetry.io/otel" |
| "go.opentelemetry.io/otel/attribute" |
| "go.opentelemetry.io/otel/trace" |
| ) |
| |
| type createOpts struct { |
| params backend.ContainerCreateConfig |
| managed bool |
| ignoreImagesArgsEscaped bool |
| } |
| |
| // CreateManagedContainer creates a container that is managed by a Service |
| func (daemon *Daemon) CreateManagedContainer(ctx context.Context, params backend.ContainerCreateConfig) (containertypes.CreateResponse, error) { |
| return daemon.containerCreate(ctx, daemon.config(), createOpts{ |
| params: params, |
| managed: true, |
| }) |
| } |
| |
| // ContainerCreate creates a regular container |
| func (daemon *Daemon) ContainerCreate(ctx context.Context, params backend.ContainerCreateConfig) (containertypes.CreateResponse, error) { |
| return daemon.containerCreate(ctx, daemon.config(), createOpts{ |
| params: params, |
| }) |
| } |
| |
| // ContainerCreateIgnoreImagesArgsEscaped creates a regular container. This is called from the builder RUN case |
| // and ensures that we do not take the images ArgsEscaped |
| func (daemon *Daemon) ContainerCreateIgnoreImagesArgsEscaped(ctx context.Context, params backend.ContainerCreateConfig) (containertypes.CreateResponse, error) { |
| return daemon.containerCreate(ctx, daemon.config(), createOpts{ |
| params: params, |
| ignoreImagesArgsEscaped: true, |
| }) |
| } |
| |
| func (daemon *Daemon) containerCreate(ctx context.Context, daemonCfg *configStore, opts createOpts) (_ containertypes.CreateResponse, retErr error) { |
| ctx, span := otel.Tracer("").Start(ctx, "daemon.containerCreate", trace.WithAttributes( |
| labelsAsOTelAttributes(opts.params.Config.Labels)..., |
| )) |
| defer func() { |
| otelutil.RecordStatus(span, retErr) |
| span.End() |
| }() |
| |
| start := time.Now() |
| if opts.params.Config == nil { |
| return containertypes.CreateResponse{}, errdefs.InvalidParameter(errors.New("config cannot be empty in order to create a container")) |
| } |
| |
| // Normalize some defaults. Doing this "ad-hoc" here for now, as there's |
| // only one field to migrate, but we should consider having a better |
| // location for this (and decide where in the flow would be most appropriate). |
| // |
| // TODO(thaJeztah): we should have a more visible, more canonical location for this. |
| if opts.params.HostConfig != nil && opts.params.HostConfig.RestartPolicy.Name == "" { |
| // Set the default restart-policy ("none") if no restart-policy was set. |
| opts.params.HostConfig.RestartPolicy.Name = containertypes.RestartPolicyDisabled |
| } |
| |
| warnings, err := daemon.verifyContainerSettings(daemonCfg, opts.params.HostConfig, opts.params.Config, false) |
| if err != nil { |
| return containertypes.CreateResponse{Warnings: warnings}, errdefs.InvalidParameter(err) |
| } |
| |
| if opts.params.Platform == nil && opts.params.Config.Image != "" { |
| img, err := daemon.imageService.GetImage(ctx, opts.params.Config.Image, imagebackend.GetImageOpts{}) |
| if err != nil { |
| return containertypes.CreateResponse{}, err |
| } |
| if img != nil { |
| p := maximumSpec() |
| imgPlat := ocispec.Platform{ |
| OS: img.OS, |
| Architecture: img.Architecture, |
| Variant: img.Variant, |
| } |
| |
| if !images.OnlyPlatformWithFallback(p).Match(imgPlat) { |
| warnings = append(warnings, fmt.Sprintf("The requested image's platform (%s) does not match the detected host platform (%s) and no specific platform was requested", platforms.FormatAll(imgPlat), platforms.FormatAll(p))) |
| } |
| } |
| } |
| |
| err = daemon.validateNetworkingConfig(opts.params.NetworkingConfig) |
| if err != nil { |
| return containertypes.CreateResponse{Warnings: warnings}, errdefs.InvalidParameter(err) |
| } |
| |
| if opts.params.HostConfig == nil { |
| opts.params.HostConfig = &containertypes.HostConfig{} |
| } |
| err = daemon.adaptContainerSettings(&daemonCfg.Config, opts.params.HostConfig) |
| if err != nil { |
| return containertypes.CreateResponse{Warnings: warnings}, errdefs.InvalidParameter(err) |
| } |
| |
| ctr, err := daemon.create(ctx, &daemonCfg.Config, opts) |
| if err != nil { |
| return containertypes.CreateResponse{Warnings: warnings}, err |
| } |
| metrics.ContainerActions.WithValues("create").UpdateSince(start) |
| |
| if warnings == nil { |
| warnings = make([]string, 0) // Create an empty slice to avoid https://github.com/moby/moby/issues/38222 |
| } |
| |
| return containertypes.CreateResponse{ID: ctr.ID, Warnings: warnings}, nil |
| } |
| |
| var ( |
| containerLabelsFilter []string |
| containerLabelsFilterOnce sync.Once |
| ) |
| |
| func labelsAsOTelAttributes(labels map[string]string) []attribute.KeyValue { |
| containerLabelsFilterOnce.Do(func() { |
| containerLabelsFilter = strings.Split(os.Getenv("DOCKER_CONTAINER_LABELS_FILTER"), ",") |
| }) |
| |
| // This env var is a comma-separated list of labels to be included in the |
| // OTel span attributes. The labels are prefixed with "label." to avoid |
| // collision with other attributes. |
| // |
| // Note that, this is an experimental env var that might be removed |
| // unceremoniously at any point in time. |
| attrs := make([]attribute.KeyValue, 0, len(containerLabelsFilter)) |
| for _, k := range containerLabelsFilter { |
| if v, ok := labels[k]; ok { |
| attrs = append(attrs, attribute.String("label."+k, v)) |
| } |
| } |
| return attrs |
| } |
| |
| // Create creates a new container from the given configuration with a given name. |
| func (daemon *Daemon) create(ctx context.Context, daemonCfg *config.Config, opts createOpts) (retC *container.Container, retErr error) { |
| var ( |
| ctr *container.Container |
| img *image.Image |
| imgManifest *ocispec.Descriptor |
| imgID image.ID |
| err error |
| platform = platforms.DefaultSpec() |
| ) |
| |
| if opts.params.Config.Image != "" { |
| img, err = daemon.imageService.GetImage(ctx, opts.params.Config.Image, imagebackend.GetImageOpts{Platform: opts.params.Platform}) |
| if err != nil { |
| return nil, err |
| } |
| if img.Details != nil { |
| imgManifest = img.Details.ManifestDescriptor |
| } |
| platform = img.Platform() |
| imgID = img.ID() |
| } else if isWindows { |
| platform.OS = "linux" // 'scratch' case. |
| } |
| |
| // On WCOW, if are not being invoked by the builder to create this container (where |
| // ignoreImagesArgEscaped will be true) - if the image already has its arguments escaped, |
| // ensure that this is replicated across to the created container to avoid double-escaping |
| // of the arguments/command line when the runtime attempts to run the container. |
| if platform.OS == "windows" && !opts.ignoreImagesArgsEscaped && img != nil && img.RunConfig().ArgsEscaped { |
| opts.params.Config.ArgsEscaped = true |
| } |
| |
| if err := daemon.mergeAndVerifyConfig(opts.params.Config, img); err != nil { |
| return nil, errdefs.InvalidParameter(err) |
| } |
| |
| if err := daemon.mergeAndVerifyLogConfig(&opts.params.HostConfig.LogConfig); err != nil { |
| return nil, errdefs.InvalidParameter(err) |
| } |
| |
| if ctr, err = daemon.newContainer(opts.params.Name, platform, opts.params.Config, opts.params.HostConfig, imgID, opts.managed); err != nil { |
| return nil, err |
| } |
| defer func() { |
| if retErr != nil { |
| err = daemon.cleanupContainer(ctr, backend.ContainerRmConfig{ |
| ForceRemove: true, |
| RemoveVolume: true, |
| }) |
| if err != nil { |
| log.G(ctx).WithFields(log.Fields{ |
| "error": err, |
| "container": ctr.ID, |
| }).Errorf("failed to cleanup container on create error") |
| } |
| } |
| }() |
| |
| if err := daemon.setSecurityOptions(daemonCfg, ctr, opts.params.HostConfig); err != nil { |
| return nil, err |
| } |
| |
| ctr.HostConfig.StorageOpt = opts.params.HostConfig.StorageOpt |
| ctr.ImageManifest = imgManifest |
| |
| // Set RWLayer for container after mount labels have been set |
| rwLayer, err := daemon.imageService.CreateLayer(ctr, setupInitLayer(daemon.idMapping.RootPair())) |
| if err != nil { |
| return nil, errdefs.System(err) |
| } |
| ctr.RWLayer = rwLayer |
| |
| cuid := os.Getuid() |
| _, gid := daemon.IdentityMapping().RootPair() |
| if err := user.MkdirAndChown(ctr.Root, 0o710, cuid, gid); err != nil { |
| return nil, err |
| } |
| if err := user.MkdirAndChown(ctr.CheckpointDir(), 0o700, cuid, os.Getegid()); err != nil { |
| return nil, err |
| } |
| |
| if err := daemon.setHostConfig(ctr, opts.params.HostConfig, opts.params.DefaultReadOnlyNonRecursive); err != nil { |
| return nil, err |
| } |
| |
| if err := daemon.createContainerOSSpecificSettings(ctx, ctr, opts.params.Config, opts.params.HostConfig); err != nil { |
| return nil, err |
| } |
| |
| var endpointsConfigs map[string]*networktypes.EndpointSettings |
| if opts.params.NetworkingConfig != nil { |
| endpointsConfigs = opts.params.NetworkingConfig.EndpointsConfig |
| } |
| // Make sure NetworkMode has an acceptable value. We do this to ensure |
| // backwards API compatibility. |
| if ctr.HostConfig != nil && ctr.HostConfig.NetworkMode == "" { |
| ctr.HostConfig.NetworkMode = networktypes.NetworkDefault |
| } |
| |
| daemon.updateContainerNetworkSettings(ctr, endpointsConfigs) |
| if err := daemon.register(ctx, ctr); err != nil { |
| return nil, err |
| } |
| metrics.StateCtr.Set(ctr.ID, "stopped") |
| daemon.LogContainerEvent(ctr, events.ActionCreate) |
| return ctr, nil |
| } |
| |
| func toHostConfigSelinuxLabels(labels []string) []string { |
| for i, l := range labels { |
| labels[i] = "label=" + l |
| } |
| return labels |
| } |
| |
| func (daemon *Daemon) generateSecurityOpt(hostConfig *containertypes.HostConfig) ([]string, error) { |
| for _, opt := range hostConfig.SecurityOpt { |
| con := strings.Split(opt, "=") |
| if con[0] == "label" { |
| // Caller overrode SecurityOpts |
| return nil, nil |
| } |
| } |
| ipcMode := hostConfig.IpcMode |
| pidMode := hostConfig.PidMode |
| privileged := hostConfig.Privileged |
| if ipcMode.IsHost() || pidMode.IsHost() || privileged { |
| return toHostConfigSelinuxLabels(selinux.DisableSecOpt()), nil |
| } |
| |
| var ipcLabel []string |
| var pidLabel []string |
| ipcContainer := ipcMode.Container() |
| pidContainer := pidMode.Container() |
| if ipcContainer != "" { |
| c, err := daemon.GetContainer(ipcContainer) |
| if err != nil { |
| return nil, err |
| } |
| ipcLabel, err = selinux.DupSecOpt(c.ProcessLabel) |
| if err != nil { |
| return nil, err |
| } |
| if pidContainer == "" { |
| return toHostConfigSelinuxLabels(ipcLabel), err |
| } |
| } |
| if pidContainer != "" { |
| c, err := daemon.GetContainer(pidContainer) |
| if err != nil { |
| return nil, err |
| } |
| |
| pidLabel, err = selinux.DupSecOpt(c.ProcessLabel) |
| if err != nil { |
| return nil, err |
| } |
| if ipcContainer == "" { |
| return toHostConfigSelinuxLabels(pidLabel), err |
| } |
| } |
| |
| if pidLabel != nil && ipcLabel != nil { |
| for i := 0; i < len(pidLabel); i++ { |
| if pidLabel[i] != ipcLabel[i] { |
| return nil, errors.New("--ipc and --pid containers SELinux labels aren't the same") |
| } |
| } |
| return toHostConfigSelinuxLabels(pidLabel), nil |
| } |
| return nil, nil |
| } |
| |
| func (daemon *Daemon) mergeAndVerifyConfig(config *containertypes.Config, img *image.Image) error { |
| if img != nil && img.Config != nil { |
| if err := merge(config, img.Config); err != nil { |
| return err |
| } |
| } |
| // Reset the Entrypoint if it is [""] |
| if len(config.Entrypoint) == 1 && config.Entrypoint[0] == "" { |
| config.Entrypoint = nil |
| } |
| if len(config.Entrypoint) == 0 && len(config.Cmd) == 0 { |
| return errors.New("no command specified") |
| } |
| return nil |
| } |
| |
| // validateNetworkingConfig checks whether a container's NetworkingConfig is valid. |
| func (daemon *Daemon) validateNetworkingConfig(nwConfig *networktypes.NetworkingConfig) error { |
| if nwConfig == nil { |
| return nil |
| } |
| |
| var errs []error |
| for k, v := range nwConfig.EndpointsConfig { |
| if v == nil { |
| errs = append(errs, fmt.Errorf("invalid config for network %s: EndpointsConfig is nil", k)) |
| continue |
| } |
| |
| // The referenced network k might not exist when the container is created, so just ignore the error in that case. |
| nw, _ := daemon.FindNetwork(k) |
| if err := validateEndpointSettings(nw, k, v); err != nil { |
| errs = append(errs, fmt.Errorf("invalid config for network %s: %w", k, err)) |
| } |
| } |
| |
| if len(errs) > 0 { |
| return errdefs.InvalidParameter(multierror.Join(errs...)) |
| } |
| |
| return nil |
| } |
| |
| // maximumSpec returns the distribution platform with maximum compatibility for the current node. |
| func maximumSpec() ocispec.Platform { |
| p := platforms.DefaultSpec() |
| if p.Architecture == "amd64" { |
| p.Variant = archvariant.AMD64Variant() |
| } |
| return p |
| } |