blob: 9ff95de457d116ca0a2c4d07473408e3429de003 [file] [log] [blame]
package local
import (
"context"
"fmt"
"io"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/filters"
"github.com/containerd/containerd/log"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
)
var bufPool = sync.Pool{
New: func() interface{} {
buffer := make([]byte, 1<<20)
return &buffer
},
}
// LabelStore is used to store mutable labels for digests
type LabelStore interface {
// Get returns all the labels for the given digest
Get(digest.Digest) (map[string]string, error)
// Set sets all the labels for a given digest
Set(digest.Digest, map[string]string) error
// Update replaces the given labels for a digest,
// a key with an empty value removes a label.
Update(digest.Digest, map[string]string) (map[string]string, error)
}
// Store is digest-keyed store for content. All data written into the store is
// stored under a verifiable digest.
//
// Store can generally support multi-reader, single-writer ingest of data,
// including resumable ingest.
type store struct {
root string
ls LabelStore
}
// NewStore returns a local content store
func NewStore(root string) (content.Store, error) {
return NewLabeledStore(root, nil)
}
// NewLabeledStore returns a new content store using the provided label store
//
// Note: content stores which are used underneath a metadata store may not
// require labels and should use `NewStore`. `NewLabeledStore` is primarily
// useful for tests or standalone implementations.
func NewLabeledStore(root string, ls LabelStore) (content.Store, error) {
if err := os.MkdirAll(filepath.Join(root, "ingest"), 0777); err != nil {
return nil, err
}
return &store{
root: root,
ls: ls,
}, nil
}
func (s *store) Info(ctx context.Context, dgst digest.Digest) (content.Info, error) {
p := s.blobPath(dgst)
fi, err := os.Stat(p)
if err != nil {
if os.IsNotExist(err) {
err = errors.Wrapf(errdefs.ErrNotFound, "content %v", dgst)
}
return content.Info{}, err
}
var labels map[string]string
if s.ls != nil {
labels, err = s.ls.Get(dgst)
if err != nil {
return content.Info{}, err
}
}
return s.info(dgst, fi, labels), nil
}
func (s *store) info(dgst digest.Digest, fi os.FileInfo, labels map[string]string) content.Info {
return content.Info{
Digest: dgst,
Size: fi.Size(),
CreatedAt: fi.ModTime(),
UpdatedAt: getATime(fi),
Labels: labels,
}
}
// ReaderAt returns an io.ReaderAt for the blob.
func (s *store) ReaderAt(ctx context.Context, dgst digest.Digest) (content.ReaderAt, error) {
p := s.blobPath(dgst)
fi, err := os.Stat(p)
if err != nil {
if !os.IsNotExist(err) {
return nil, err
}
return nil, errors.Wrapf(errdefs.ErrNotFound, "blob %s expected at %s", dgst, p)
}
fp, err := os.Open(p)
if err != nil {
if !os.IsNotExist(err) {
return nil, err
}
return nil, errors.Wrapf(errdefs.ErrNotFound, "blob %s expected at %s", dgst, p)
}
return sizeReaderAt{size: fi.Size(), fp: fp}, nil
}
// Delete removes a blob by its digest.
//
// While this is safe to do concurrently, safe exist-removal logic must hold
// some global lock on the store.
func (s *store) Delete(ctx context.Context, dgst digest.Digest) error {
if err := os.RemoveAll(s.blobPath(dgst)); err != nil {
if !os.IsNotExist(err) {
return err
}
return errors.Wrapf(errdefs.ErrNotFound, "content %v", dgst)
}
return nil
}
func (s *store) Update(ctx context.Context, info content.Info, fieldpaths ...string) (content.Info, error) {
if s.ls == nil {
return content.Info{}, errors.Wrapf(errdefs.ErrFailedPrecondition, "update not supported on immutable content store")
}
p := s.blobPath(info.Digest)
fi, err := os.Stat(p)
if err != nil {
if os.IsNotExist(err) {
err = errors.Wrapf(errdefs.ErrNotFound, "content %v", info.Digest)
}
return content.Info{}, err
}
var (
all bool
labels map[string]string
)
if len(fieldpaths) > 0 {
for _, path := range fieldpaths {
if strings.HasPrefix(path, "labels.") {
if labels == nil {
labels = map[string]string{}
}
key := strings.TrimPrefix(path, "labels.")
labels[key] = info.Labels[key]
continue
}
switch path {
case "labels":
all = true
labels = info.Labels
default:
return content.Info{}, errors.Wrapf(errdefs.ErrInvalidArgument, "cannot update %q field on content info %q", path, info.Digest)
}
}
} else {
all = true
labels = info.Labels
}
if all {
err = s.ls.Set(info.Digest, labels)
} else {
labels, err = s.ls.Update(info.Digest, labels)
}
if err != nil {
return content.Info{}, err
}
info = s.info(info.Digest, fi, labels)
info.UpdatedAt = time.Now()
if err := os.Chtimes(p, info.UpdatedAt, info.CreatedAt); err != nil {
log.G(ctx).WithError(err).Warnf("could not change access time for %s", info.Digest)
}
return info, nil
}
func (s *store) Walk(ctx context.Context, fn content.WalkFunc, filters ...string) error {
// TODO: Support filters
root := filepath.Join(s.root, "blobs")
var alg digest.Algorithm
return filepath.Walk(root, func(path string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
if !fi.IsDir() && !alg.Available() {
return nil
}
// TODO(stevvooe): There are few more cases with subdirs that should be
// handled in case the layout gets corrupted. This isn't strict enough
// and may spew bad data.
if path == root {
return nil
}
if filepath.Dir(path) == root {
alg = digest.Algorithm(filepath.Base(path))
if !alg.Available() {
alg = ""
return filepath.SkipDir
}
// descending into a hash directory
return nil
}
dgst := digest.NewDigestFromHex(alg.String(), filepath.Base(path))
if err := dgst.Validate(); err != nil {
// log error but don't report
log.L.WithError(err).WithField("path", path).Error("invalid digest for blob path")
// if we see this, it could mean some sort of corruption of the
// store or extra paths not expected previously.
}
var labels map[string]string
if s.ls != nil {
labels, err = s.ls.Get(dgst)
if err != nil {
return err
}
}
return fn(s.info(dgst, fi, labels))
})
}
func (s *store) Status(ctx context.Context, ref string) (content.Status, error) {
return s.status(s.ingestRoot(ref))
}
func (s *store) ListStatuses(ctx context.Context, fs ...string) ([]content.Status, error) {
fp, err := os.Open(filepath.Join(s.root, "ingest"))
if err != nil {
return nil, err
}
defer fp.Close()
fis, err := fp.Readdir(-1)
if err != nil {
return nil, err
}
filter, err := filters.ParseAll(fs...)
if err != nil {
return nil, err
}
var active []content.Status
for _, fi := range fis {
p := filepath.Join(s.root, "ingest", fi.Name())
stat, err := s.status(p)
if err != nil {
if !os.IsNotExist(err) {
return nil, err
}
// TODO(stevvooe): This is a common error if uploads are being
// completed while making this listing. Need to consider taking a
// lock on the whole store to coordinate this aspect.
//
// Another option is to cleanup downloads asynchronously and
// coordinate this method with the cleanup process.
//
// For now, we just skip them, as they really don't exist.
continue
}
if filter.Match(adaptStatus(stat)) {
active = append(active, stat)
}
}
return active, nil
}
// status works like stat above except uses the path to the ingest.
func (s *store) status(ingestPath string) (content.Status, error) {
dp := filepath.Join(ingestPath, "data")
fi, err := os.Stat(dp)
if err != nil {
if os.IsNotExist(err) {
err = errors.Wrap(errdefs.ErrNotFound, err.Error())
}
return content.Status{}, err
}
ref, err := readFileString(filepath.Join(ingestPath, "ref"))
if err != nil {
if os.IsNotExist(err) {
err = errors.Wrap(errdefs.ErrNotFound, err.Error())
}
return content.Status{}, err
}
startedAt, err := readFileTimestamp(filepath.Join(ingestPath, "startedat"))
if err != nil {
return content.Status{}, errors.Wrapf(err, "could not read startedat")
}
updatedAt, err := readFileTimestamp(filepath.Join(ingestPath, "updatedat"))
if err != nil {
return content.Status{}, errors.Wrapf(err, "could not read updatedat")
}
// because we don't write updatedat on every write, the mod time may
// actually be more up to date.
if fi.ModTime().After(updatedAt) {
updatedAt = fi.ModTime()
}
return content.Status{
Ref: ref,
Offset: fi.Size(),
Total: s.total(ingestPath),
UpdatedAt: updatedAt,
StartedAt: startedAt,
}, nil
}
func adaptStatus(status content.Status) filters.Adaptor {
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "ref":
return status.Ref, true
}
return "", false
})
}
// total attempts to resolve the total expected size for the write.
func (s *store) total(ingestPath string) int64 {
totalS, err := readFileString(filepath.Join(ingestPath, "total"))
if err != nil {
return 0
}
total, err := strconv.ParseInt(totalS, 10, 64)
if err != nil {
// represents a corrupted file, should probably remove.
return 0
}
return total
}
// Writer begins or resumes the active writer identified by ref. If the writer
// is already in use, an error is returned. Only one writer may be in use per
// ref at a time.
//
// The argument `ref` is used to uniquely identify a long-lived writer transaction.
func (s *store) Writer(ctx context.Context, ref string, total int64, expected digest.Digest) (content.Writer, error) {
var lockErr error
for count := uint64(0); count < 10; count++ {
time.Sleep(time.Millisecond * time.Duration(rand.Intn(1<<count)))
if err := tryLock(ref); err != nil {
if !errdefs.IsUnavailable(err) {
return nil, err
}
lockErr = err
} else {
lockErr = nil
break
}
}
if lockErr != nil {
return nil, lockErr
}
w, err := s.writer(ctx, ref, total, expected)
if err != nil {
unlock(ref)
return nil, err
}
return w, nil // lock is now held by w.
}
// writer provides the main implementation of the Writer method. The caller
// must hold the lock correctly and release on error if there is a problem.
func (s *store) writer(ctx context.Context, ref string, total int64, expected digest.Digest) (content.Writer, error) {
// TODO(stevvooe): Need to actually store expected here. We have
// code in the service that shouldn't be dealing with this.
if expected != "" {
p := s.blobPath(expected)
if _, err := os.Stat(p); err == nil {
return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "content %v", expected)
}
}
path, refp, data := s.ingestPaths(ref)
var (
digester = digest.Canonical.Digester()
offset int64
startedAt time.Time
updatedAt time.Time
)
// ensure that the ingest path has been created.
if err := os.Mkdir(path, 0755); err != nil {
if !os.IsExist(err) {
return nil, err
}
status, err := s.status(path)
if err != nil {
return nil, errors.Wrap(err, "failed reading status of resume write")
}
if ref != status.Ref {
// NOTE(stevvooe): This is fairly catastrophic. Either we have some
// layout corruption or a hash collision for the ref key.
return nil, errors.Wrapf(err, "ref key does not match: %v != %v", ref, status.Ref)
}
if total > 0 && status.Total > 0 && total != status.Total {
return nil, errors.Errorf("provided total differs from status: %v != %v", total, status.Total)
}
// TODO(stevvooe): slow slow slow!!, send to goroutine or use resumable hashes
fp, err := os.Open(data)
if err != nil {
return nil, err
}
defer fp.Close()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
offset, err = io.CopyBuffer(digester.Hash(), fp, *p)
if err != nil {
return nil, err
}
updatedAt = status.UpdatedAt
startedAt = status.StartedAt
total = status.Total
} else {
startedAt = time.Now()
updatedAt = startedAt
// the ingest is new, we need to setup the target location.
// write the ref to a file for later use
if err := ioutil.WriteFile(refp, []byte(ref), 0666); err != nil {
return nil, err
}
if writeTimestampFile(filepath.Join(path, "startedat"), startedAt); err != nil {
return nil, err
}
if writeTimestampFile(filepath.Join(path, "updatedat"), startedAt); err != nil {
return nil, err
}
if total > 0 {
if err := ioutil.WriteFile(filepath.Join(path, "total"), []byte(fmt.Sprint(total)), 0666); err != nil {
return nil, err
}
}
}
fp, err := os.OpenFile(data, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
return nil, errors.Wrap(err, "failed to open data file")
}
if _, err := fp.Seek(offset, io.SeekStart); err != nil {
return nil, errors.Wrap(err, "could not seek to current write offset")
}
return &writer{
s: s,
fp: fp,
ref: ref,
path: path,
offset: offset,
total: total,
digester: digester,
startedAt: startedAt,
updatedAt: updatedAt,
}, nil
}
// Abort an active transaction keyed by ref. If the ingest is active, it will
// be cancelled. Any resources associated with the ingest will be cleaned.
func (s *store) Abort(ctx context.Context, ref string) error {
root := s.ingestRoot(ref)
if err := os.RemoveAll(root); err != nil {
if os.IsNotExist(err) {
return errors.Wrapf(errdefs.ErrNotFound, "ingest ref %q", ref)
}
return err
}
return nil
}
func (s *store) blobPath(dgst digest.Digest) string {
return filepath.Join(s.root, "blobs", dgst.Algorithm().String(), dgst.Hex())
}
func (s *store) ingestRoot(ref string) string {
dgst := digest.FromString(ref)
return filepath.Join(s.root, "ingest", dgst.Hex())
}
// ingestPaths are returned. The paths are the following:
//
// - root: entire ingest directory
// - ref: name of the starting ref, must be unique
// - data: file where data is written
//
func (s *store) ingestPaths(ref string) (string, string, string) {
var (
fp = s.ingestRoot(ref)
rp = filepath.Join(fp, "ref")
dp = filepath.Join(fp, "data")
)
return fp, rp, dp
}
func readFileString(path string) (string, error) {
p, err := ioutil.ReadFile(path)
return string(p), err
}
// readFileTimestamp reads a file with just a timestamp present.
func readFileTimestamp(p string) (time.Time, error) {
b, err := ioutil.ReadFile(p)
if err != nil {
if os.IsNotExist(err) {
err = errors.Wrap(errdefs.ErrNotFound, err.Error())
}
return time.Time{}, err
}
var t time.Time
if err := t.UnmarshalText(b); err != nil {
return time.Time{}, errors.Wrapf(err, "could not parse timestamp file %v", p)
}
return t, nil
}
func writeTimestampFile(p string, t time.Time) error {
b, err := t.MarshalText()
if err != nil {
return err
}
return ioutil.WriteFile(p, b, 0666)
}