blob: ce15ca2b9a336b0ce5aba4fc5b167f4b9940d42c [file] [log] [blame]
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cgroups
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall"
"golang.org/x/sys/unix"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewMemory(root string) *memoryController {
return &memoryController{
root: filepath.Join(root, string(Memory)),
}
}
type memoryController struct {
root string
}
func (m *memoryController) Name() Name {
return Memory
}
func (m *memoryController) Path(path string) string {
return filepath.Join(m.root, path)
}
func (m *memoryController) Create(path string, resources *specs.LinuxResources) error {
if err := os.MkdirAll(m.Path(path), defaultDirPerm); err != nil {
return err
}
if resources.Memory == nil {
return nil
}
if resources.Memory.Kernel != nil {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := ioutil.WriteFile(
filepath.Join(m.Path(path), "memory.kmem.limit_in_bytes"),
[]byte(strconv.FormatInt(i, 10)),
defaultFilePerm,
); err != nil {
return checkEBUSY(err)
}
}
}
return m.set(path, getMemorySettings(resources))
}
func (m *memoryController) Update(path string, resources *specs.LinuxResources) error {
if resources.Memory == nil {
return nil
}
g := func(v *int64) bool {
return v != nil && *v > 0
}
settings := getMemorySettings(resources)
if g(resources.Memory.Limit) && g(resources.Memory.Swap) {
// if the updated swap value is larger than the current memory limit set the swap changes first
// then set the memory limit as swap must always be larger than the current limit
current, err := readUint(filepath.Join(m.Path(path), "memory.limit_in_bytes"))
if err != nil {
return err
}
if current < uint64(*resources.Memory.Swap) {
settings[0], settings[1] = settings[1], settings[0]
}
}
return m.set(path, settings)
}
func (m *memoryController) Stat(path string, stats *Metrics) error {
f, err := os.Open(filepath.Join(m.Path(path), "memory.stat"))
if err != nil {
return err
}
defer f.Close()
stats.Memory = &MemoryStat{
Usage: &MemoryEntry{},
Swap: &MemoryEntry{},
Kernel: &MemoryEntry{},
KernelTCP: &MemoryEntry{},
}
if err := m.parseStats(f, stats.Memory); err != nil {
return err
}
for _, t := range []struct {
module string
entry *MemoryEntry
}{
{
module: "",
entry: stats.Memory.Usage,
},
{
module: "memsw",
entry: stats.Memory.Swap,
},
{
module: "kmem",
entry: stats.Memory.Kernel,
},
{
module: "kmem.tcp",
entry: stats.Memory.KernelTCP,
},
} {
for _, tt := range []struct {
name string
value *uint64
}{
{
name: "usage_in_bytes",
value: &t.entry.Usage,
},
{
name: "max_usage_in_bytes",
value: &t.entry.Max,
},
{
name: "failcnt",
value: &t.entry.Failcnt,
},
{
name: "limit_in_bytes",
value: &t.entry.Limit,
},
} {
parts := []string{"memory"}
if t.module != "" {
parts = append(parts, t.module)
}
parts = append(parts, tt.name)
v, err := readUint(filepath.Join(m.Path(path), strings.Join(parts, ".")))
if err != nil {
return err
}
*tt.value = v
}
}
return nil
}
func (m *memoryController) OOMEventFD(path string) (uintptr, error) {
root := m.Path(path)
f, err := os.Open(filepath.Join(root, "memory.oom_control"))
if err != nil {
return 0, err
}
defer f.Close()
fd, _, serr := unix.RawSyscall(unix.SYS_EVENTFD2, 0, unix.EFD_CLOEXEC, 0)
if serr != 0 {
return 0, serr
}
if err := writeEventFD(root, f.Fd(), fd); err != nil {
unix.Close(int(fd))
return 0, err
}
return fd, nil
}
func writeEventFD(root string, cfd, efd uintptr) error {
f, err := os.OpenFile(filepath.Join(root, "cgroup.event_control"), os.O_WRONLY, 0)
if err != nil {
return err
}
_, err = f.WriteString(fmt.Sprintf("%d %d", efd, cfd))
f.Close()
return err
}
func (m *memoryController) parseStats(r io.Reader, stat *MemoryStat) error {
var (
raw = make(map[string]uint64)
sc = bufio.NewScanner(r)
line int
)
for sc.Scan() {
if err := sc.Err(); err != nil {
return err
}
key, v, err := parseKV(sc.Text())
if err != nil {
return fmt.Errorf("%d: %v", line, err)
}
raw[key] = v
line++
}
stat.Cache = raw["cache"]
stat.RSS = raw["rss"]
stat.RSSHuge = raw["rss_huge"]
stat.MappedFile = raw["mapped_file"]
stat.Dirty = raw["dirty"]
stat.Writeback = raw["writeback"]
stat.PgPgIn = raw["pgpgin"]
stat.PgPgOut = raw["pgpgout"]
stat.PgFault = raw["pgfault"]
stat.PgMajFault = raw["pgmajfault"]
stat.InactiveAnon = raw["inactive_anon"]
stat.ActiveAnon = raw["active_anon"]
stat.InactiveFile = raw["inactive_file"]
stat.ActiveFile = raw["active_file"]
stat.Unevictable = raw["unevictable"]
stat.HierarchicalMemoryLimit = raw["hierarchical_memory_limit"]
stat.HierarchicalSwapLimit = raw["hierarchical_memsw_limit"]
stat.TotalCache = raw["total_cache"]
stat.TotalRSS = raw["total_rss"]
stat.TotalRSSHuge = raw["total_rss_huge"]
stat.TotalMappedFile = raw["total_mapped_file"]
stat.TotalDirty = raw["total_dirty"]
stat.TotalWriteback = raw["total_writeback"]
stat.TotalPgPgIn = raw["total_pgpgin"]
stat.TotalPgPgOut = raw["total_pgpgout"]
stat.TotalPgFault = raw["total_pgfault"]
stat.TotalPgMajFault = raw["total_pgmajfault"]
stat.TotalInactiveAnon = raw["total_inactive_anon"]
stat.TotalActiveAnon = raw["total_active_anon"]
stat.TotalInactiveFile = raw["total_inactive_file"]
stat.TotalActiveFile = raw["total_active_file"]
stat.TotalUnevictable = raw["total_unevictable"]
return nil
}
func (m *memoryController) set(path string, settings []memorySettings) error {
for _, t := range settings {
if t.value != nil {
if err := ioutil.WriteFile(
filepath.Join(m.Path(path), fmt.Sprintf("memory.%s", t.name)),
[]byte(strconv.FormatInt(*t.value, 10)),
defaultFilePerm,
); err != nil {
return err
}
}
}
return nil
}
type memorySettings struct {
name string
value *int64
}
func getMemorySettings(resources *specs.LinuxResources) []memorySettings {
mem := resources.Memory
var swappiness *int64
if mem.Swappiness != nil {
v := int64(*mem.Swappiness)
swappiness = &v
}
return []memorySettings{
{
name: "limit_in_bytes",
value: mem.Limit,
},
{
name: "memsw.limit_in_bytes",
value: mem.Swap,
},
{
name: "kmem.limit_in_bytes",
value: mem.Kernel,
},
{
name: "kmem.tcp.limit_in_bytes",
value: mem.KernelTCP,
},
{
name: "oom_control",
value: getOomControlValue(mem),
},
{
name: "swappiness",
value: swappiness,
},
}
}
func checkEBUSY(err error) error {
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf(
"failed to set memory.kmem.limit_in_bytes, because either tasks have already joined this cgroup or it has children")
}
}
}
return err
}
func getOomControlValue(mem *specs.LinuxMemory) *int64 {
if mem.DisableOOMKiller != nil && *mem.DisableOOMKiller {
i := int64(1)
return &i
}
return nil
}