| /* |
| Copyright The containerd Authors. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| package oci |
| |
| import ( |
| "bufio" |
| "context" |
| "encoding/json" |
| "errors" |
| "fmt" |
| "os" |
| "path/filepath" |
| "runtime" |
| "strconv" |
| "strings" |
| |
| "github.com/containerd/containerd/containers" |
| "github.com/containerd/containerd/content" |
| "github.com/containerd/containerd/images" |
| "github.com/containerd/containerd/mount" |
| "github.com/containerd/containerd/namespaces" |
| "github.com/containerd/containerd/platforms" |
| "github.com/containerd/continuity/fs" |
| v1 "github.com/opencontainers/image-spec/specs-go/v1" |
| "github.com/opencontainers/runc/libcontainer/user" |
| "github.com/opencontainers/runtime-spec/specs-go" |
| ) |
| |
| // SpecOpts sets spec specific information to a newly generated OCI spec |
| type SpecOpts func(context.Context, Client, *containers.Container, *Spec) error |
| |
| // Compose converts a sequence of spec operations into a single operation |
| func Compose(opts ...SpecOpts) SpecOpts { |
| return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { |
| for _, o := range opts { |
| if err := o(ctx, client, c, s); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| } |
| |
| // setProcess sets Process to empty if unset |
| func setProcess(s *Spec) { |
| if s.Process == nil { |
| s.Process = &specs.Process{} |
| } |
| } |
| |
| // setRoot sets Root to empty if unset |
| func setRoot(s *Spec) { |
| if s.Root == nil { |
| s.Root = &specs.Root{} |
| } |
| } |
| |
| // setLinux sets Linux to empty if unset |
| func setLinux(s *Spec) { |
| if s.Linux == nil { |
| s.Linux = &specs.Linux{} |
| } |
| } |
| |
| // nolint |
| func setResources(s *Spec) { |
| if s.Linux != nil { |
| if s.Linux.Resources == nil { |
| s.Linux.Resources = &specs.LinuxResources{} |
| } |
| } |
| if s.Windows != nil { |
| if s.Windows.Resources == nil { |
| s.Windows.Resources = &specs.WindowsResources{} |
| } |
| } |
| } |
| |
| // nolint |
| func setCPU(s *Spec) { |
| setResources(s) |
| if s.Linux != nil { |
| if s.Linux.Resources.CPU == nil { |
| s.Linux.Resources.CPU = &specs.LinuxCPU{} |
| } |
| } |
| if s.Windows != nil { |
| if s.Windows.Resources.CPU == nil { |
| s.Windows.Resources.CPU = &specs.WindowsCPUResources{} |
| } |
| } |
| } |
| |
| // setCapabilities sets Linux Capabilities to empty if unset |
| func setCapabilities(s *Spec) { |
| setProcess(s) |
| if s.Process.Capabilities == nil { |
| s.Process.Capabilities = &specs.LinuxCapabilities{} |
| } |
| } |
| |
| // ensureAdditionalGids ensures that the primary GID is also included in the additional GID list. |
| func ensureAdditionalGids(s *Spec) { |
| setProcess(s) |
| for _, f := range s.Process.User.AdditionalGids { |
| if f == s.Process.User.GID { |
| return |
| } |
| } |
| s.Process.User.AdditionalGids = append([]uint32{s.Process.User.GID}, s.Process.User.AdditionalGids...) |
| } |
| |
| // WithDefaultSpec returns a SpecOpts that will populate the spec with default |
| // values. |
| // |
| // Use as the first option to clear the spec, then apply options afterwards. |
| func WithDefaultSpec() SpecOpts { |
| return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error { |
| return generateDefaultSpecWithPlatform(ctx, platforms.DefaultString(), c.ID, s) |
| } |
| } |
| |
| // WithDefaultSpecForPlatform returns a SpecOpts that will populate the spec |
| // with default values for a given platform. |
| // |
| // Use as the first option to clear the spec, then apply options afterwards. |
| func WithDefaultSpecForPlatform(platform string) SpecOpts { |
| return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error { |
| return generateDefaultSpecWithPlatform(ctx, platform, c.ID, s) |
| } |
| } |
| |
| // WithSpecFromBytes loads the spec from the provided byte slice. |
| func WithSpecFromBytes(p []byte) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| *s = Spec{} // make sure spec is cleared. |
| if err := json.Unmarshal(p, s); err != nil { |
| return fmt.Errorf("decoding spec config file failed, current supported OCI runtime-spec : v%s: %w", specs.Version, err) |
| } |
| return nil |
| } |
| } |
| |
| // WithSpecFromFile loads the specification from the provided filename. |
| func WithSpecFromFile(filename string) SpecOpts { |
| return func(ctx context.Context, c Client, container *containers.Container, s *Spec) error { |
| p, err := os.ReadFile(filename) |
| if err != nil { |
| return fmt.Errorf("cannot load spec config file: %w", err) |
| } |
| return WithSpecFromBytes(p)(ctx, c, container, s) |
| } |
| } |
| |
| // WithEnv appends environment variables |
| func WithEnv(environmentVariables []string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| if len(environmentVariables) > 0 { |
| setProcess(s) |
| s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, environmentVariables) |
| } |
| return nil |
| } |
| } |
| |
| // WithDefaultPathEnv sets the $PATH environment variable to the |
| // default PATH defined in this package. |
| func WithDefaultPathEnv(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| s.Process.Env = replaceOrAppendEnvValues(s.Process.Env, defaultUnixEnv) |
| return nil |
| } |
| |
| // replaceOrAppendEnvValues returns the defaults with the overrides either |
| // replaced by env key or appended to the list |
| func replaceOrAppendEnvValues(defaults, overrides []string) []string { |
| cache := make(map[string]int, len(defaults)) |
| results := make([]string, 0, len(defaults)) |
| for i, e := range defaults { |
| parts := strings.SplitN(e, "=", 2) |
| results = append(results, e) |
| cache[parts[0]] = i |
| } |
| |
| for _, value := range overrides { |
| // Values w/o = means they want this env to be removed/unset. |
| if !strings.Contains(value, "=") { |
| if i, exists := cache[value]; exists { |
| results[i] = "" // Used to indicate it should be removed |
| } |
| continue |
| } |
| |
| // Just do a normal set/update |
| parts := strings.SplitN(value, "=", 2) |
| if i, exists := cache[parts[0]]; exists { |
| results[i] = value |
| } else { |
| results = append(results, value) |
| } |
| } |
| |
| // Now remove all entries that we want to "unset" |
| for i := 0; i < len(results); i++ { |
| if results[i] == "" { |
| results = append(results[:i], results[i+1:]...) |
| i-- |
| } |
| } |
| |
| return results |
| } |
| |
| // WithProcessArgs replaces the args on the generated spec |
| func WithProcessArgs(args ...string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| s.Process.Args = args |
| return nil |
| } |
| } |
| |
| // WithProcessCwd replaces the current working directory on the generated spec |
| func WithProcessCwd(cwd string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| s.Process.Cwd = cwd |
| return nil |
| } |
| } |
| |
| // WithTTY sets the information on the spec as well as the environment variables for |
| // using a TTY |
| func WithTTY(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| s.Process.Terminal = true |
| if s.Linux != nil { |
| s.Process.Env = append(s.Process.Env, "TERM=xterm") |
| } |
| |
| return nil |
| } |
| |
| // WithTTYSize sets the information on the spec as well as the environment variables for |
| // using a TTY |
| func WithTTYSize(width, height int) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| if s.Process.ConsoleSize == nil { |
| s.Process.ConsoleSize = &specs.Box{} |
| } |
| s.Process.ConsoleSize.Width = uint(width) |
| s.Process.ConsoleSize.Height = uint(height) |
| return nil |
| } |
| } |
| |
| // WithHostname sets the container's hostname |
| func WithHostname(name string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| s.Hostname = name |
| return nil |
| } |
| } |
| |
| // WithMounts appends mounts |
| func WithMounts(mounts []specs.Mount) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| s.Mounts = append(s.Mounts, mounts...) |
| return nil |
| } |
| } |
| |
| // WithoutMounts removes mounts |
| func WithoutMounts(dests ...string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| var ( |
| mounts []specs.Mount |
| current = s.Mounts |
| ) |
| mLoop: |
| for _, m := range current { |
| mDestination := filepath.Clean(m.Destination) |
| for _, dest := range dests { |
| if mDestination == dest { |
| continue mLoop |
| } |
| } |
| mounts = append(mounts, m) |
| } |
| s.Mounts = mounts |
| return nil |
| } |
| } |
| |
| // WithHostNamespace allows a task to run inside the host's linux namespace |
| func WithHostNamespace(ns specs.LinuxNamespaceType) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| for i, n := range s.Linux.Namespaces { |
| if n.Type == ns { |
| s.Linux.Namespaces = append(s.Linux.Namespaces[:i], s.Linux.Namespaces[i+1:]...) |
| return nil |
| } |
| } |
| return nil |
| } |
| } |
| |
| // WithLinuxNamespace uses the passed in namespace for the spec. If a namespace of the same type already exists in the |
| // spec, the existing namespace is replaced by the one provided. |
| func WithLinuxNamespace(ns specs.LinuxNamespace) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| for i, n := range s.Linux.Namespaces { |
| if n.Type == ns.Type { |
| s.Linux.Namespaces[i] = ns |
| return nil |
| } |
| } |
| s.Linux.Namespaces = append(s.Linux.Namespaces, ns) |
| return nil |
| } |
| } |
| |
| // WithNewPrivileges turns off the NoNewPrivileges feature flag in the spec |
| func WithNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| s.Process.NoNewPrivileges = false |
| |
| return nil |
| } |
| |
| // WithImageConfig configures the spec to from the configuration of an Image |
| func WithImageConfig(image Image) SpecOpts { |
| return WithImageConfigArgs(image, nil) |
| } |
| |
| // WithImageConfigArgs configures the spec to from the configuration of an Image with additional args that |
| // replaces the CMD of the image |
| func WithImageConfigArgs(image Image, args []string) SpecOpts { |
| return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { |
| ic, err := image.Config(ctx) |
| if err != nil { |
| return err |
| } |
| var ( |
| ociimage v1.Image |
| config v1.ImageConfig |
| ) |
| switch ic.MediaType { |
| case v1.MediaTypeImageConfig, images.MediaTypeDockerSchema2Config: |
| p, err := content.ReadBlob(ctx, image.ContentStore(), ic) |
| if err != nil { |
| return err |
| } |
| |
| if err := json.Unmarshal(p, &ociimage); err != nil { |
| return err |
| } |
| config = ociimage.Config |
| default: |
| return fmt.Errorf("unknown image config media type %s", ic.MediaType) |
| } |
| |
| setProcess(s) |
| if s.Linux != nil { |
| defaults := config.Env |
| if len(defaults) == 0 { |
| defaults = defaultUnixEnv |
| } |
| s.Process.Env = replaceOrAppendEnvValues(defaults, s.Process.Env) |
| cmd := config.Cmd |
| if len(args) > 0 { |
| cmd = args |
| } |
| s.Process.Args = append(config.Entrypoint, cmd...) |
| |
| cwd := config.WorkingDir |
| if cwd == "" { |
| cwd = "/" |
| } |
| s.Process.Cwd = cwd |
| if config.User != "" { |
| if err := WithUser(config.User)(ctx, client, c, s); err != nil { |
| return err |
| } |
| return WithAdditionalGIDs(fmt.Sprintf("%d", s.Process.User.UID))(ctx, client, c, s) |
| } |
| // we should query the image's /etc/group for additional GIDs |
| // even if there is no specified user in the image config |
| return WithAdditionalGIDs("root")(ctx, client, c, s) |
| } else if s.Windows != nil { |
| s.Process.Env = replaceOrAppendEnvValues(config.Env, s.Process.Env) |
| cmd := config.Cmd |
| if len(args) > 0 { |
| cmd = args |
| } |
| s.Process.Args = append(config.Entrypoint, cmd...) |
| |
| s.Process.Cwd = config.WorkingDir |
| s.Process.User = specs.User{ |
| Username: config.User, |
| } |
| } else { |
| return errors.New("spec does not contain Linux or Windows section") |
| } |
| return nil |
| } |
| } |
| |
| // WithRootFSPath specifies unmanaged rootfs path. |
| func WithRootFSPath(path string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setRoot(s) |
| s.Root.Path = path |
| // Entrypoint is not set here (it's up to caller) |
| return nil |
| } |
| } |
| |
| // WithRootFSReadonly sets specs.Root.Readonly to true |
| func WithRootFSReadonly() SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setRoot(s) |
| s.Root.Readonly = true |
| return nil |
| } |
| } |
| |
| // WithNoNewPrivileges sets no_new_privileges on the process for the container |
| func WithNoNewPrivileges(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| s.Process.NoNewPrivileges = true |
| return nil |
| } |
| |
| // WithHostHostsFile bind-mounts the host's /etc/hosts into the container as readonly |
| func WithHostHostsFile(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| s.Mounts = append(s.Mounts, specs.Mount{ |
| Destination: "/etc/hosts", |
| Type: "bind", |
| Source: "/etc/hosts", |
| Options: []string{"rbind", "ro"}, |
| }) |
| return nil |
| } |
| |
| // WithHostResolvconf bind-mounts the host's /etc/resolv.conf into the container as readonly |
| func WithHostResolvconf(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| s.Mounts = append(s.Mounts, specs.Mount{ |
| Destination: "/etc/resolv.conf", |
| Type: "bind", |
| Source: "/etc/resolv.conf", |
| Options: []string{"rbind", "ro"}, |
| }) |
| return nil |
| } |
| |
| // WithHostLocaltime bind-mounts the host's /etc/localtime into the container as readonly |
| func WithHostLocaltime(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| s.Mounts = append(s.Mounts, specs.Mount{ |
| Destination: "/etc/localtime", |
| Type: "bind", |
| Source: "/etc/localtime", |
| Options: []string{"rbind", "ro"}, |
| }) |
| return nil |
| } |
| |
| // WithUserNamespace sets the uid and gid mappings for the task |
| // this can be called multiple times to add more mappings to the generated spec |
| func WithUserNamespace(uidMap, gidMap []specs.LinuxIDMapping) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| var hasUserns bool |
| setLinux(s) |
| for _, ns := range s.Linux.Namespaces { |
| if ns.Type == specs.UserNamespace { |
| hasUserns = true |
| break |
| } |
| } |
| if !hasUserns { |
| s.Linux.Namespaces = append(s.Linux.Namespaces, specs.LinuxNamespace{ |
| Type: specs.UserNamespace, |
| }) |
| } |
| s.Linux.UIDMappings = append(s.Linux.UIDMappings, uidMap...) |
| s.Linux.GIDMappings = append(s.Linux.GIDMappings, gidMap...) |
| return nil |
| } |
| } |
| |
| // WithCgroup sets the container's cgroup path |
| func WithCgroup(path string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| s.Linux.CgroupsPath = path |
| return nil |
| } |
| } |
| |
| // WithNamespacedCgroup uses the namespace set on the context to create a |
| // root directory for containers in the cgroup with the id as the subcgroup |
| func WithNamespacedCgroup() SpecOpts { |
| return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error { |
| namespace, err := namespaces.NamespaceRequired(ctx) |
| if err != nil { |
| return err |
| } |
| setLinux(s) |
| s.Linux.CgroupsPath = filepath.Join("/", namespace, c.ID) |
| return nil |
| } |
| } |
| |
| // WithUser sets the user to be used within the container. |
| // It accepts a valid user string in OCI Image Spec v1.0.0: |
| // |
| // user, uid, user:group, uid:gid, uid:group, user:gid |
| func WithUser(userstr string) SpecOpts { |
| return func(ctx context.Context, client Client, c *containers.Container, s *Spec) error { |
| defer ensureAdditionalGids(s) |
| setProcess(s) |
| s.Process.User.AdditionalGids = nil |
| |
| // For LCOW it's a bit harder to confirm that the user actually exists on the host as a rootfs isn't |
| // mounted on the host and shared into the guest, but rather the rootfs is constructed entirely in the |
| // guest itself. To accommodate this, a spot to place the user string provided by a client as-is is needed. |
| // The `Username` field on the runtime spec is marked by Platform as only for Windows, and in this case it |
| // *is* being set on a Windows host at least, but will be used as a temporary holding spot until the guest |
| // can use the string to perform these same operations to grab the uid:gid inside. |
| if s.Windows != nil && s.Linux != nil { |
| s.Process.User.Username = userstr |
| return nil |
| } |
| |
| parts := strings.Split(userstr, ":") |
| switch len(parts) { |
| case 1: |
| v, err := strconv.Atoi(parts[0]) |
| if err != nil { |
| // if we cannot parse as a uint they try to see if it is a username |
| return WithUsername(userstr)(ctx, client, c, s) |
| } |
| return WithUserID(uint32(v))(ctx, client, c, s) |
| case 2: |
| var ( |
| username string |
| groupname string |
| ) |
| var uid, gid uint32 |
| v, err := strconv.Atoi(parts[0]) |
| if err != nil { |
| username = parts[0] |
| } else { |
| uid = uint32(v) |
| } |
| if v, err = strconv.Atoi(parts[1]); err != nil { |
| groupname = parts[1] |
| } else { |
| gid = uint32(v) |
| } |
| if username == "" && groupname == "" { |
| s.Process.User.UID, s.Process.User.GID = uid, gid |
| return nil |
| } |
| f := func(root string) error { |
| if username != "" { |
| user, err := UserFromPath(root, func(u user.User) bool { |
| return u.Name == username |
| }) |
| if err != nil { |
| return err |
| } |
| uid = uint32(user.Uid) |
| } |
| if groupname != "" { |
| gid, err = GIDFromPath(root, func(g user.Group) bool { |
| return g.Name == groupname |
| }) |
| if err != nil { |
| return err |
| } |
| } |
| s.Process.User.UID, s.Process.User.GID = uid, gid |
| return nil |
| } |
| if c.Snapshotter == "" && c.SnapshotKey == "" { |
| if !isRootfsAbs(s.Root.Path) { |
| return errors.New("rootfs absolute path is required") |
| } |
| return f(s.Root.Path) |
| } |
| if c.Snapshotter == "" { |
| return errors.New("no snapshotter set for container") |
| } |
| if c.SnapshotKey == "" { |
| return errors.New("rootfs snapshot not created for container") |
| } |
| snapshotter := client.SnapshotService(c.Snapshotter) |
| mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey) |
| if err != nil { |
| return err |
| } |
| |
| mounts = tryReadonlyMounts(mounts) |
| return mount.WithTempMount(ctx, mounts, f) |
| default: |
| return fmt.Errorf("invalid USER value %s", userstr) |
| } |
| } |
| } |
| |
| // WithUIDGID allows the UID and GID for the Process to be set |
| func WithUIDGID(uid, gid uint32) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| defer ensureAdditionalGids(s) |
| setProcess(s) |
| s.Process.User.AdditionalGids = nil |
| s.Process.User.UID = uid |
| s.Process.User.GID = gid |
| return nil |
| } |
| } |
| |
| // WithUserID sets the correct UID and GID for the container based |
| // on the image's /etc/passwd contents. If /etc/passwd does not exist, |
| // or uid is not found in /etc/passwd, it sets the requested uid, |
| // additionally sets the gid to 0, and does not return an error. |
| func WithUserID(uid uint32) SpecOpts { |
| return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) { |
| defer ensureAdditionalGids(s) |
| setProcess(s) |
| s.Process.User.AdditionalGids = nil |
| setUser := func(root string) error { |
| user, err := UserFromPath(root, func(u user.User) bool { |
| return u.Uid == int(uid) |
| }) |
| if err != nil { |
| if os.IsNotExist(err) || err == ErrNoUsersFound { |
| s.Process.User.UID, s.Process.User.GID = uid, 0 |
| return nil |
| } |
| return err |
| } |
| s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid) |
| return nil |
| } |
| if c.Snapshotter == "" && c.SnapshotKey == "" { |
| if !isRootfsAbs(s.Root.Path) { |
| return errors.New("rootfs absolute path is required") |
| } |
| return setUser(s.Root.Path) |
| } |
| if c.Snapshotter == "" { |
| return errors.New("no snapshotter set for container") |
| } |
| if c.SnapshotKey == "" { |
| return errors.New("rootfs snapshot not created for container") |
| } |
| snapshotter := client.SnapshotService(c.Snapshotter) |
| mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey) |
| if err != nil { |
| return err |
| } |
| |
| mounts = tryReadonlyMounts(mounts) |
| return mount.WithTempMount(ctx, mounts, setUser) |
| } |
| } |
| |
| // WithUsername sets the correct UID and GID for the container |
| // based on the image's /etc/passwd contents. If /etc/passwd |
| // does not exist, or the username is not found in /etc/passwd, |
| // it returns error. On Windows this sets the username as provided, |
| // the operating system will validate the user when going to run |
| // the container. |
| func WithUsername(username string) SpecOpts { |
| return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) { |
| defer ensureAdditionalGids(s) |
| setProcess(s) |
| s.Process.User.AdditionalGids = nil |
| if s.Linux != nil { |
| setUser := func(root string) error { |
| user, err := UserFromPath(root, func(u user.User) bool { |
| return u.Name == username |
| }) |
| if err != nil { |
| return err |
| } |
| s.Process.User.UID, s.Process.User.GID = uint32(user.Uid), uint32(user.Gid) |
| return nil |
| } |
| if c.Snapshotter == "" && c.SnapshotKey == "" { |
| if !isRootfsAbs(s.Root.Path) { |
| return errors.New("rootfs absolute path is required") |
| } |
| return setUser(s.Root.Path) |
| } |
| if c.Snapshotter == "" { |
| return errors.New("no snapshotter set for container") |
| } |
| if c.SnapshotKey == "" { |
| return errors.New("rootfs snapshot not created for container") |
| } |
| snapshotter := client.SnapshotService(c.Snapshotter) |
| mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey) |
| if err != nil { |
| return err |
| } |
| |
| mounts = tryReadonlyMounts(mounts) |
| return mount.WithTempMount(ctx, mounts, setUser) |
| } else if s.Windows != nil { |
| s.Process.User.Username = username |
| } else { |
| return errors.New("spec does not contain Linux or Windows section") |
| } |
| return nil |
| } |
| } |
| |
| // WithAdditionalGIDs sets the OCI spec's additionalGids array to any additional groups listed |
| // for a particular user in the /etc/group file of the image's root filesystem |
| // The passed in user can be either a uid or a username. |
| func WithAdditionalGIDs(userstr string) SpecOpts { |
| return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) { |
| // For LCOW or on Darwin additional GID's not supported |
| if s.Windows != nil || runtime.GOOS == "darwin" { |
| return nil |
| } |
| setProcess(s) |
| s.Process.User.AdditionalGids = nil |
| setAdditionalGids := func(root string) error { |
| defer ensureAdditionalGids(s) |
| var username string |
| uid, err := strconv.Atoi(userstr) |
| if err == nil { |
| user, err := UserFromPath(root, func(u user.User) bool { |
| return u.Uid == uid |
| }) |
| if err != nil { |
| if os.IsNotExist(err) || err == ErrNoUsersFound { |
| return nil |
| } |
| return err |
| } |
| username = user.Name |
| } else { |
| username = userstr |
| } |
| gids, err := getSupplementalGroupsFromPath(root, func(g user.Group) bool { |
| // we only want supplemental groups |
| if g.Name == username { |
| return false |
| } |
| for _, entry := range g.List { |
| if entry == username { |
| return true |
| } |
| } |
| return false |
| }) |
| if err != nil { |
| if os.IsNotExist(err) { |
| return nil |
| } |
| return err |
| } |
| s.Process.User.AdditionalGids = gids |
| return nil |
| } |
| if c.Snapshotter == "" && c.SnapshotKey == "" { |
| if !isRootfsAbs(s.Root.Path) { |
| return errors.New("rootfs absolute path is required") |
| } |
| return setAdditionalGids(s.Root.Path) |
| } |
| if c.Snapshotter == "" { |
| return errors.New("no snapshotter set for container") |
| } |
| if c.SnapshotKey == "" { |
| return errors.New("rootfs snapshot not created for container") |
| } |
| snapshotter := client.SnapshotService(c.Snapshotter) |
| mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey) |
| if err != nil { |
| return err |
| } |
| |
| mounts = tryReadonlyMounts(mounts) |
| return mount.WithTempMount(ctx, mounts, setAdditionalGids) |
| } |
| } |
| |
| // WithAppendAdditionalGroups append additional groups within the container. |
| // The passed in groups can be either a gid or a groupname. |
| func WithAppendAdditionalGroups(groups ...string) SpecOpts { |
| return func(ctx context.Context, client Client, c *containers.Container, s *Spec) (err error) { |
| // For LCOW or on Darwin additional GID's are not supported |
| if s.Windows != nil || runtime.GOOS == "darwin" { |
| return nil |
| } |
| setProcess(s) |
| setAdditionalGids := func(root string) error { |
| defer ensureAdditionalGids(s) |
| gpath, err := fs.RootPath(root, "/etc/group") |
| if err != nil { |
| return err |
| } |
| ugroups, err := user.ParseGroupFile(gpath) |
| if err != nil { |
| return err |
| } |
| groupMap := make(map[string]user.Group) |
| for _, group := range ugroups { |
| groupMap[group.Name] = group |
| } |
| var gids []uint32 |
| for _, group := range groups { |
| gid, err := strconv.ParseUint(group, 10, 32) |
| if err == nil { |
| gids = append(gids, uint32(gid)) |
| } else { |
| g, ok := groupMap[group] |
| if !ok { |
| return fmt.Errorf("unable to find group %s", group) |
| } |
| gids = append(gids, uint32(g.Gid)) |
| } |
| } |
| s.Process.User.AdditionalGids = append(s.Process.User.AdditionalGids, gids...) |
| return nil |
| } |
| if c.Snapshotter == "" && c.SnapshotKey == "" { |
| if !filepath.IsAbs(s.Root.Path) { |
| return errors.New("rootfs absolute path is required") |
| } |
| return setAdditionalGids(s.Root.Path) |
| } |
| if c.Snapshotter == "" { |
| return errors.New("no snapshotter set for container") |
| } |
| if c.SnapshotKey == "" { |
| return errors.New("rootfs snapshot not created for container") |
| } |
| snapshotter := client.SnapshotService(c.Snapshotter) |
| mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey) |
| if err != nil { |
| return err |
| } |
| |
| mounts = tryReadonlyMounts(mounts) |
| return mount.WithTempMount(ctx, mounts, setAdditionalGids) |
| } |
| } |
| |
| // WithCapabilities sets Linux capabilities on the process |
| func WithCapabilities(caps []string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setCapabilities(s) |
| |
| s.Process.Capabilities.Bounding = caps |
| s.Process.Capabilities.Effective = caps |
| s.Process.Capabilities.Permitted = caps |
| |
| return nil |
| } |
| } |
| |
| func capsContain(caps []string, s string) bool { |
| for _, c := range caps { |
| if c == s { |
| return true |
| } |
| } |
| return false |
| } |
| |
| func removeCap(caps *[]string, s string) { |
| var newcaps []string |
| for _, c := range *caps { |
| if c == s { |
| continue |
| } |
| newcaps = append(newcaps, c) |
| } |
| *caps = newcaps |
| } |
| |
| // WithAddedCapabilities adds the provided capabilities |
| func WithAddedCapabilities(caps []string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setCapabilities(s) |
| for _, c := range caps { |
| for _, cl := range []*[]string{ |
| &s.Process.Capabilities.Bounding, |
| &s.Process.Capabilities.Effective, |
| &s.Process.Capabilities.Permitted, |
| } { |
| if !capsContain(*cl, c) { |
| *cl = append(*cl, c) |
| } |
| } |
| } |
| return nil |
| } |
| } |
| |
| // WithDroppedCapabilities removes the provided capabilities |
| func WithDroppedCapabilities(caps []string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setCapabilities(s) |
| for _, c := range caps { |
| for _, cl := range []*[]string{ |
| &s.Process.Capabilities.Bounding, |
| &s.Process.Capabilities.Effective, |
| &s.Process.Capabilities.Permitted, |
| } { |
| removeCap(cl, c) |
| } |
| } |
| return nil |
| } |
| } |
| |
| // WithAmbientCapabilities set the Linux ambient capabilities for the process |
| // Ambient capabilities should only be set for non-root users or the caller should |
| // understand how these capabilities are used and set |
| func WithAmbientCapabilities(caps []string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setCapabilities(s) |
| s.Process.Capabilities.Inheritable = caps |
| s.Process.Capabilities.Ambient = caps |
| return nil |
| } |
| } |
| |
| // ErrNoUsersFound can be returned from UserFromPath |
| var ErrNoUsersFound = errors.New("no users found") |
| |
| // UserFromPath inspects the user object using /etc/passwd in the specified rootfs. |
| // filter can be nil. |
| func UserFromPath(root string, filter func(user.User) bool) (user.User, error) { |
| ppath, err := fs.RootPath(root, "/etc/passwd") |
| if err != nil { |
| return user.User{}, err |
| } |
| users, err := user.ParsePasswdFileFilter(ppath, filter) |
| if err != nil { |
| return user.User{}, err |
| } |
| if len(users) == 0 { |
| return user.User{}, ErrNoUsersFound |
| } |
| return users[0], nil |
| } |
| |
| // ErrNoGroupsFound can be returned from GIDFromPath |
| var ErrNoGroupsFound = errors.New("no groups found") |
| |
| // GIDFromPath inspects the GID using /etc/group in the specified rootfs. |
| // filter can be nil. |
| func GIDFromPath(root string, filter func(user.Group) bool) (gid uint32, err error) { |
| gpath, err := fs.RootPath(root, "/etc/group") |
| if err != nil { |
| return 0, err |
| } |
| groups, err := user.ParseGroupFileFilter(gpath, filter) |
| if err != nil { |
| return 0, err |
| } |
| if len(groups) == 0 { |
| return 0, ErrNoGroupsFound |
| } |
| g := groups[0] |
| return uint32(g.Gid), nil |
| } |
| |
| func getSupplementalGroupsFromPath(root string, filter func(user.Group) bool) ([]uint32, error) { |
| gpath, err := fs.RootPath(root, "/etc/group") |
| if err != nil { |
| return []uint32{}, err |
| } |
| groups, err := user.ParseGroupFileFilter(gpath, filter) |
| if err != nil { |
| return []uint32{}, err |
| } |
| if len(groups) == 0 { |
| // if there are no additional groups; just return an empty set |
| return []uint32{}, nil |
| } |
| addlGids := []uint32{} |
| for _, grp := range groups { |
| addlGids = append(addlGids, uint32(grp.Gid)) |
| } |
| return addlGids, nil |
| } |
| |
| func isRootfsAbs(root string) bool { |
| return filepath.IsAbs(root) |
| } |
| |
| // WithMaskedPaths sets the masked paths option |
| func WithMaskedPaths(paths []string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| s.Linux.MaskedPaths = paths |
| return nil |
| } |
| } |
| |
| // WithReadonlyPaths sets the read only paths option |
| func WithReadonlyPaths(paths []string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| s.Linux.ReadonlyPaths = paths |
| return nil |
| } |
| } |
| |
| // WithWriteableSysfs makes any sysfs mounts writeable |
| func WithWriteableSysfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| for _, m := range s.Mounts { |
| if m.Type == "sysfs" { |
| for i, o := range m.Options { |
| if o == "ro" { |
| m.Options[i] = "rw" |
| } |
| } |
| } |
| } |
| return nil |
| } |
| |
| // WithWriteableCgroupfs makes any cgroup mounts writeable |
| func WithWriteableCgroupfs(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| for _, m := range s.Mounts { |
| if m.Type == "cgroup" { |
| for i, o := range m.Options { |
| if o == "ro" { |
| m.Options[i] = "rw" |
| } |
| } |
| } |
| } |
| return nil |
| } |
| |
| // WithSelinuxLabel sets the process SELinux label |
| func WithSelinuxLabel(label string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| s.Process.SelinuxLabel = label |
| return nil |
| } |
| } |
| |
| // WithApparmorProfile sets the Apparmor profile for the process |
| func WithApparmorProfile(profile string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setProcess(s) |
| s.Process.ApparmorProfile = profile |
| return nil |
| } |
| } |
| |
| // WithSeccompUnconfined clears the seccomp profile |
| func WithSeccompUnconfined(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| s.Linux.Seccomp = nil |
| return nil |
| } |
| |
| // WithParentCgroupDevices uses the default cgroup setup to inherit the container's parent cgroup's |
| // allowed and denied devices |
| func WithParentCgroupDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| if s.Linux.Resources == nil { |
| s.Linux.Resources = &specs.LinuxResources{} |
| } |
| s.Linux.Resources.Devices = nil |
| return nil |
| } |
| |
| // WithAllDevicesAllowed permits READ WRITE MKNOD on all devices nodes for the container |
| func WithAllDevicesAllowed(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| if s.Linux.Resources == nil { |
| s.Linux.Resources = &specs.LinuxResources{} |
| } |
| s.Linux.Resources.Devices = []specs.LinuxDeviceCgroup{ |
| { |
| Allow: true, |
| Access: rwm, |
| }, |
| } |
| return nil |
| } |
| |
| // WithDefaultUnixDevices adds the default devices for unix such as /dev/null, /dev/random to |
| // the container's resource cgroup spec |
| func WithDefaultUnixDevices(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| if s.Linux.Resources == nil { |
| s.Linux.Resources = &specs.LinuxResources{} |
| } |
| intptr := func(i int64) *int64 { |
| return &i |
| } |
| s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, []specs.LinuxDeviceCgroup{ |
| { |
| // "/dev/null", |
| Type: "c", |
| Major: intptr(1), |
| Minor: intptr(3), |
| Access: rwm, |
| Allow: true, |
| }, |
| { |
| // "/dev/random", |
| Type: "c", |
| Major: intptr(1), |
| Minor: intptr(8), |
| Access: rwm, |
| Allow: true, |
| }, |
| { |
| // "/dev/full", |
| Type: "c", |
| Major: intptr(1), |
| Minor: intptr(7), |
| Access: rwm, |
| Allow: true, |
| }, |
| { |
| // "/dev/tty", |
| Type: "c", |
| Major: intptr(5), |
| Minor: intptr(0), |
| Access: rwm, |
| Allow: true, |
| }, |
| { |
| // "/dev/zero", |
| Type: "c", |
| Major: intptr(1), |
| Minor: intptr(5), |
| Access: rwm, |
| Allow: true, |
| }, |
| { |
| // "/dev/urandom", |
| Type: "c", |
| Major: intptr(1), |
| Minor: intptr(9), |
| Access: rwm, |
| Allow: true, |
| }, |
| { |
| // "/dev/console", |
| Type: "c", |
| Major: intptr(5), |
| Minor: intptr(1), |
| Access: rwm, |
| Allow: true, |
| }, |
| // /dev/pts/ - pts namespaces are "coming soon" |
| { |
| Type: "c", |
| Major: intptr(136), |
| Access: rwm, |
| Allow: true, |
| }, |
| { |
| // "dev/ptmx" |
| Type: "c", |
| Major: intptr(5), |
| Minor: intptr(2), |
| Access: rwm, |
| Allow: true, |
| }, |
| }...) |
| return nil |
| } |
| |
| // WithPrivileged sets up options for a privileged container |
| var WithPrivileged = Compose( |
| WithAllCurrentCapabilities, |
| WithMaskedPaths(nil), |
| WithReadonlyPaths(nil), |
| WithWriteableSysfs, |
| WithWriteableCgroupfs, |
| WithSelinuxLabel(""), |
| WithApparmorProfile(""), |
| WithSeccompUnconfined, |
| ) |
| |
| // WithWindowsHyperV sets the Windows.HyperV section for HyperV isolation of containers. |
| func WithWindowsHyperV(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| if s.Windows == nil { |
| s.Windows = &specs.Windows{} |
| } |
| if s.Windows.HyperV == nil { |
| s.Windows.HyperV = &specs.WindowsHyperV{} |
| } |
| return nil |
| } |
| |
| // WithMemoryLimit sets the `Linux.LinuxResources.Memory.Limit` section to the |
| // `limit` specified if the `Linux` section is not `nil`. Additionally sets the |
| // `Windows.WindowsResources.Memory.Limit` section if the `Windows` section is |
| // not `nil`. |
| func WithMemoryLimit(limit uint64) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| if s.Linux != nil { |
| if s.Linux.Resources == nil { |
| s.Linux.Resources = &specs.LinuxResources{} |
| } |
| if s.Linux.Resources.Memory == nil { |
| s.Linux.Resources.Memory = &specs.LinuxMemory{} |
| } |
| l := int64(limit) |
| s.Linux.Resources.Memory.Limit = &l |
| } |
| if s.Windows != nil { |
| if s.Windows.Resources == nil { |
| s.Windows.Resources = &specs.WindowsResources{} |
| } |
| if s.Windows.Resources.Memory == nil { |
| s.Windows.Resources.Memory = &specs.WindowsMemoryResources{} |
| } |
| s.Windows.Resources.Memory.Limit = &limit |
| } |
| return nil |
| } |
| } |
| |
| // WithAnnotations appends or replaces the annotations on the spec with the |
| // provided annotations |
| func WithAnnotations(annotations map[string]string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| if s.Annotations == nil { |
| s.Annotations = make(map[string]string) |
| } |
| for k, v := range annotations { |
| s.Annotations[k] = v |
| } |
| return nil |
| } |
| } |
| |
| // WithLinuxDevices adds the provided linux devices to the spec |
| func WithLinuxDevices(devices []specs.LinuxDevice) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| s.Linux.Devices = append(s.Linux.Devices, devices...) |
| return nil |
| } |
| } |
| |
| // WithLinuxDevice adds the device specified by path to the spec |
| func WithLinuxDevice(path, permissions string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| setLinux(s) |
| setResources(s) |
| |
| dev, err := DeviceFromPath(path) |
| if err != nil { |
| return err |
| } |
| |
| s.Linux.Devices = append(s.Linux.Devices, *dev) |
| |
| s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, specs.LinuxDeviceCgroup{ |
| Type: dev.Type, |
| Allow: true, |
| Major: &dev.Major, |
| Minor: &dev.Minor, |
| Access: permissions, |
| }) |
| |
| return nil |
| } |
| } |
| |
| // WithEnvFile adds environment variables from a file to the container's spec |
| func WithEnvFile(path string) SpecOpts { |
| return func(_ context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| var vars []string |
| f, err := os.Open(path) |
| if err != nil { |
| return err |
| } |
| defer f.Close() |
| |
| sc := bufio.NewScanner(f) |
| for sc.Scan() { |
| vars = append(vars, sc.Text()) |
| } |
| if err = sc.Err(); err != nil { |
| return err |
| } |
| return WithEnv(vars)(nil, nil, nil, s) |
| } |
| } |
| |
| // ErrNoShmMount is returned when there is no /dev/shm mount specified in the config |
| // and an Opts was trying to set a configuration value on the mount. |
| var ErrNoShmMount = errors.New("no /dev/shm mount specified") |
| |
| // WithDevShmSize sets the size of the /dev/shm mount for the container. |
| // |
| // The size value is specified in kb, kilobytes. |
| func WithDevShmSize(kb int64) SpecOpts { |
| return func(ctx context.Context, _ Client, _ *containers.Container, s *Spec) error { |
| for i, m := range s.Mounts { |
| if filepath.Clean(m.Destination) == "/dev/shm" && m.Source == "shm" && m.Type == "tmpfs" { |
| for i := 0; i < len(m.Options); i++ { |
| if strings.HasPrefix(m.Options[i], "size=") { |
| m.Options = append(m.Options[:i], m.Options[i+1:]...) |
| i-- |
| } |
| } |
| s.Mounts[i].Options = append(m.Options, fmt.Sprintf("size=%dk", kb)) |
| return nil |
| } |
| } |
| return ErrNoShmMount |
| } |
| } |
| |
| // tryReadonlyMounts is used by the options which are trying to get user/group |
| // information from container's rootfs. Since the option does read operation |
| // only, this helper will append ReadOnly mount option to prevent linux kernel |
| // from syncing whole filesystem in umount syscall. |
| // |
| // TODO(fuweid): |
| // |
| // Currently, it only works for overlayfs. I think we can apply it to other |
| // kinds of filesystem. Maybe we can return `ro` option by `snapshotter.Mount` |
| // API, when the caller passes that experimental annotation |
| // `containerd.io/snapshot/readonly.mount` something like that. |
| func tryReadonlyMounts(mounts []mount.Mount) []mount.Mount { |
| if len(mounts) == 1 && mounts[0].Type == "overlay" { |
| mounts[0].Options = append(mounts[0].Options, "ro") |
| } |
| return mounts |
| } |