blob: 528bdb6c54dec2d9e7e01956fa7fefa389ab6d6c [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <threads.h>
#include <atomic>
#include <new>
#include <zircon/types.h>
#include <zircon/listnode.h>
#include <zircon/syscalls.h>
#include <zircon/processargs.h>
#include <zircon/device/vfs.h>
#include <fuchsia/io/c/fidl.h>
#include <lib/fdio/namespace.h>
#include <lib/fdio/util.h>
#include <lib/fdio/vfs.h>
#include <lib/zxio/null.h>
#include "private.h"
#include "private-remoteio.h"
// A fdio namespace is a simple local filesystem that consists
// of a tree of vnodes, each of which may contain child vnodes
// and a handle for a remote filesystem.
//
// They are expected to be relatively small (perhaps 10-50 total
// local vnodes, acting as roots for the remote filesystems that
// contain the actual items of interest) and as such have a simple
// locking model -- one namespace-wide lock that is held while
// doing the local directory walk part of an OPEN operation.
//
// If an OPEN path matches one of the local vnodes exactly, a
// zxio_dir object is created and returned. This object
// handles further OPEN operations, as well as READDIR and STAT.
// It favors local children over the remote -- so, for example,
// READDIR first returns the vnode's local children, then forwards
// the request to the remote, but filters the results (removing
// matches of its own children).
typedef struct zxio_dir zxio_dir_t;
typedef struct fdio_vnode mxvn_t;
struct fdio_vnode {
mxvn_t* child;
mxvn_t* parent;
mxvn_t* next;
zx_handle_t remote;
uint32_t namelen;
char name[];
};
// refcount is incremented when a fdio_dir references any of its vnodes
// when refcount is nonzero it may not be modified or destroyed
struct fdio_namespace {
mtx_t lock;
int32_t refcount;
mxvn_t root;
};
// The directory represents a local directory (either / or
// some directory between / and a mount point), so it has
// to emulate directory behavior.
struct zxio_dir {
zxio_t io;
mxvn_t* vn;
fdio_ns_t* ns;
// readdir sequence number
// TODO: rewind support (when we have rewinddir)
std::atomic<int32_t> seq;
};
static_assert(offsetof(zxio_dir, io) == 0,
"zxio_dir must be castable to zxio_t");
static_assert(std::is_trivially_destructible<zxio_dir>::value,
"zxio_dir must have trivial destructor to be freed");
static_assert(sizeof(zxio_dir_t) <= sizeof(zxio_storage_t),
"zxio_dir_t must fit inside zxio_storage_t.");
static fdio_t* fdio_dir_create_locked(fdio_ns_t* fs, mxvn_t* vn);
static mxvn_t* vn_lookup_locked(mxvn_t* dir, const char* name, size_t len) {
for (mxvn_t* vn = dir->child; vn; vn = vn->next) {
if ((vn->namelen == len) && (!memcmp(vn->name, name, len))) {
return vn;
}
}
return NULL;
}
static zx_status_t vn_create_locked(mxvn_t* dir, const char* name, size_t len,
zx_handle_t remote, mxvn_t** out) {
if ((len == 0) || (len > NAME_MAX)) {
return ZX_ERR_INVALID_ARGS;
}
if ((len == 1) && (name[0] == '.')) {
return ZX_ERR_INVALID_ARGS;
}
if ((len == 2) && (name[0] == '.') && (name[1] == '.')) {
return ZX_ERR_INVALID_ARGS;
}
mxvn_t* vn = vn_lookup_locked(dir, name, len);
if (vn != NULL) {
// And we do not allow replacing a virtual dir node
// with a real directory node:
if (remote != ZX_HANDLE_INVALID) {
LOG(1, "VN-CREATE FAILED: SHADOWING LOCAL\n");
return ZX_ERR_ALREADY_EXISTS;
}
// if there's already vnode, we do not allow
// overlapping a remoted vnode:
if (vn->remote != ZX_HANDLE_INVALID) {
LOG(1, "VN-CREATE FAILED: SHADOWING REMOTE\n");
return ZX_ERR_NOT_SUPPORTED;
}
*out = vn;
return ZX_OK;
}
if ((vn = static_cast<mxvn_t*>(calloc(1, sizeof(*vn) + len + 1))) == NULL) {
return ZX_ERR_NO_MEMORY;
}
memcpy(vn->name, name, len);
vn->name[len] = 0;
vn->namelen = static_cast<uint32_t>(len);
vn->parent = dir;
vn->remote = remote;
vn->next = dir->child;
dir->child = vn;
*out = vn;
return ZX_OK;
}
// vn_destroy *only* safe to be called on vnodes that have never been
// wrapped in a directory object, because we don't refcount vnodes
// (they're expected to live for the duration of the namespace).
//
// It's used by fdio_ns_bind() to delete intermediate vnodes that
// were created while the ns lock is held, to "undo" a partial mkdir
// operation that failed partway down the pat. Since the lock is not
// released until the full operation completes, this is safe.
static zx_status_t vn_destroy_locked(mxvn_t* child) {
// can't destroy a live node
if (child->remote != ZX_HANDLE_INVALID) {
return ZX_ERR_BAD_STATE;
}
// can't destroy the root
if (child->parent == NULL) {
return ZX_ERR_NOT_SUPPORTED;
}
mxvn_t* dir = child->parent;
if (dir->child == child) {
dir->child = child->next;
} else {
for (mxvn_t* vn = dir->child; vn; vn = vn->next) {
if (vn->next == child) {
vn->next = child->next;
break;
}
}
}
free(child);
return ZX_OK;
}
static void vn_destroy_children_locked(mxvn_t* parent) {
mxvn_t* next;
for (mxvn_t* vn = parent->child; vn; vn = next) {
next = vn->next;
if (vn->child) {
vn_destroy_children_locked(vn);
}
if (vn->remote != ZX_HANDLE_INVALID) {
zx_handle_close(vn->remote);
}
free(vn);
}
}
static zxio_dir_t* fdio_get_zxio_dir(fdio_t* io) {
return reinterpret_cast<zxio_dir_t*>(fdio_get_zxio(io));
}
static zx_status_t zxio_dir_close(fdio_t* io) {
zxio_dir_t* dir = fdio_get_zxio_dir(io);
mtx_lock(&dir->ns->lock);
dir->ns->refcount--;
mtx_unlock(&dir->ns->lock);
dir->ns = NULL;
dir->vn = NULL;
return ZX_OK;
}
static zx_status_t ns_walk_locked(mxvn_t** _vn, const char** _path) {
mxvn_t* vn = *_vn;
const char* path = *_path;
// Empty path or "." matches initial node.
if ((path[0] == 0) || ((path[0] == '.') && (path[1] == 0))) {
return ZX_OK;
}
for (;;) {
// Find the next path segment.
const char* name = path;
const char* next = strchr(path, '/');
size_t len = next ? (size_t)(next - path) : strlen(path);
// Path segments may not be empty.
if (len == 0) {
return ZX_ERR_BAD_PATH;
}
// look for a match
mxvn_t* child = vn_lookup_locked(vn, name, len);
if (child != NULL) {
vn = child;
if (next) {
// Matched, but more path segments to walk.
// Descend and continue.
path = next + 1;
continue;
} else {
// we've matched on the last segment
*_vn = vn;
*_path = ".";
return ZX_OK;
}
}
// If there's remaining path but this is not a mount point,
// we're done.
if (vn->remote == ZX_HANDLE_INVALID) {
return ZX_ERR_NOT_FOUND;
}
*_vn = vn;
*_path = path;
return ZX_OK;
}
}
__BEGIN_CDECLS
__EXPORT
zx_status_t fdio_ns_connect(fdio_ns_t* ns, const char* path,
uint32_t flags, zx_handle_t h) {
mxvn_t* vn = &ns->root;
zx_status_t r = ZX_OK;
LOG(6, "CONNECT '%s'\n", path);
// Require that we start at /
if (path[0] != '/') {
r = ZX_ERR_NOT_FOUND;
goto fail0;
}
path++;
mtx_lock(&ns->lock);
if ((r = ns_walk_locked(&vn, &path)) != ZX_OK) {
goto fail1;
}
// cannot connect via non-mountpoint nodes
if (vn->remote == ZX_HANDLE_INVALID) {
r = ZX_ERR_NOT_SUPPORTED;
goto fail1;
}
r = fdio_open_at(vn->remote, path, flags, h);
mtx_unlock(&ns->lock);
return r;
fail1:
mtx_unlock(&ns->lock);
fail0:
zx_handle_close(h);
return r;
}
__EXPORT
zx_status_t fdio_ns_open(fdio_ns_t* ns, const char* path, uint32_t flags, zx_handle_t* out) {
zx_handle_t h;
if (zx_channel_create(0, &h, out) != ZX_OK) {
return ZX_ERR_INTERNAL;
}
zx_status_t r = fdio_ns_connect(ns, path, flags, h);
if (r != ZX_OK) {
zx_handle_close(*out);
*out = ZX_HANDLE_INVALID;
}
return r;
}
// Expects a canonical path (no ..) with no leading
// slash and no trailing slash
static zx_status_t zxio_dir_open(fdio_t* io, const char* path, uint32_t flags, uint32_t mode,
fdio_t** out) {
zxio_dir_t* dir = fdio_get_zxio_dir(io);
mxvn_t* vn = dir->vn;
zx_status_t r = ZX_OK;
LOG(6, "OPEN '%s'\n", path);
mtx_lock(&dir->ns->lock);
if ((r = ns_walk_locked(&vn, &path)) == ZX_OK) {
if (vn->remote == ZX_HANDLE_INVALID) {
if ((*out = fdio_dir_create_locked(dir->ns, vn)) == NULL) {
r = ZX_ERR_NO_MEMORY;
}
} else {
mtx_unlock(&dir->ns->lock);
// If we're trying to mkdir over top of a mount point,
// the correct error is EEXIST
if ((flags & ZX_FS_FLAG_CREATE) && !strcmp(path, ".")) {
return ZX_ERR_ALREADY_EXISTS;
}
// Active Namespaces are immutable, so referencing remote here
// is safe. We don't want to do a blocking open under the ns lock.
r = zxrio_open_handle(vn->remote, path, flags, mode, out);
LOG(6, "OPEN REMOTE '%s': %d\n", path, r);
return r;
}
}
mtx_unlock(&dir->ns->lock);
return r;
}
__END_CDECLS
class DirentFiller {
public:
explicit DirentFiller(void* buffer, size_t length) :
start_(buffer), buffer_(buffer), length_(length) {}
zx_status_t Add(const char* name, size_t len, uint32_t type) {
size_t sz = sizeof(vdirent_t) + len;
if (sz > length_ || len > NAME_MAX) {
return ZX_ERR_INVALID_ARGS;
}
vdirent_t* de = static_cast<vdirent_t*>(buffer_);
de->ino = fuchsia_io_INO_UNKNOWN;
de->size = static_cast<uint8_t>(len);
de->type = static_cast<uint8_t>(type);
memcpy(de->name, name, len);
buffer_ = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(buffer_) + sz);
length_ -= sz;
return ZX_OK;
}
size_t Used() const {
return reinterpret_cast<uintptr_t>(buffer_) - reinterpret_cast<uintptr_t>(start_);
}
private:
void* start_;
void* buffer_;
size_t length_;
};
static zx_status_t zxio_dir_readdir_locked(zxio_dir_t* dir, void* buf, size_t len) {
DirentFiller dirents(buf, len);
zx_status_t status = dirents.Add(".", 1, VTYPE_TO_DTYPE(V_TYPE_DIR));
if (status != ZX_OK) {
return 0;
}
for (mxvn_t* vn = dir->vn->child; vn; vn = vn->next) {
if ((status = dirents.Add(vn->name, vn->namelen, VTYPE_TO_DTYPE(V_TYPE_DIR))) != ZX_OK) {
break;
}
}
return static_cast<zx_status_t>(dirents.Used());
}
static zx_status_t zxio_dir_get_attr(fdio_t* io, fuchsia_io_NodeAttributes* attr) {
memset(attr, 0, sizeof(*attr));
attr->mode = V_TYPE_DIR | V_IRUSR;
attr->id = fuchsia_io_INO_UNKNOWN;
attr->link_count = 1;
return ZX_OK;
}
static zx_status_t zxio_dir_rewind(fdio_t* io) {
return ZX_OK;
}
static zx_status_t zxio_dir_readdir(fdio_t* io, void* ptr, size_t max, size_t* actual) {
zxio_dir_t* dir = fdio_get_zxio_dir(io);
mtx_lock(&dir->ns->lock);
int n = dir->seq.fetch_add(1);
if (n == 0) {
*actual = zxio_dir_readdir_locked(dir, ptr, max);
} else {
*actual = 0;
}
mtx_unlock(&dir->ns->lock);
return ZX_OK;
}
static zx_status_t zxio_dir_unlink(fdio_t* io, const char* path, size_t len) {
return ZX_ERR_UNAVAILABLE;
}
constexpr fdio_ops_t dir_ops = []() {
fdio_ops_t ops = {};
ops.get_attr = zxio_dir_get_attr;
ops.close = zxio_dir_close;
ops.open = zxio_dir_open;
ops.clone = fdio_default_clone;
ops.ioctl = fdio_default_ioctl;
ops.wait_begin = fdio_default_wait_begin;
ops.wait_end = fdio_default_wait_end;
ops.unwrap = fdio_default_unwrap;
ops.posix_ioctl = fdio_default_posix_ioctl;
ops.get_vmo = fdio_default_get_vmo;
ops.get_token = fdio_default_get_token;
ops.set_attr = fdio_default_set_attr;
ops.readdir = zxio_dir_readdir;
ops.rewind = zxio_dir_rewind;
ops.unlink = zxio_dir_unlink;
ops.truncate = fdio_default_truncate;
ops.rename = fdio_default_rename;
ops.link = fdio_default_link;
ops.get_flags = fdio_default_get_flags;
ops.set_flags = fdio_default_set_flags;
ops.recvfrom = fdio_default_recvfrom;
ops.sendto = fdio_default_sendto;
ops.recvmsg = fdio_default_recvmsg;
ops.sendmsg = fdio_default_sendmsg;
ops.shutdown = fdio_default_shutdown;
return ops;
}();
static fdio_t* fdio_dir_create_locked(fdio_ns_t* ns, mxvn_t* vn) {
fdio_t* io = fdio_alloc(&dir_ops);
if (io == NULL) {
return NULL;
}
// Invoke placement new on the new zxio_dir_t. Since the object is trivially
// destructible, we can avoid invoking the destructor.
char* storage = reinterpret_cast<char*>(fdio_get_zxio_dir(io));
zxio_dir_t* dir = new (storage) zxio_dir_t();
zxio_null_init(&(fdio_get_zxio_storage(io)->io));
dir->ns = ns;
dir->vn = vn;
return io;
}
__BEGIN_CDECLS
__EXPORT
zx_status_t fdio_ns_create(fdio_ns_t** out) {
// +1 is for the "" name
fdio_ns_t* ns = static_cast<fdio_ns_t*>(calloc(1, sizeof(fdio_ns_t) + 1));
if (ns == NULL) {
return ZX_ERR_NO_MEMORY;
}
mtx_init(&ns->lock, mtx_plain);
*out = ns;
return ZX_OK;
}
__EXPORT
zx_status_t fdio_ns_destroy(fdio_ns_t* ns) {
mtx_lock(&ns->lock);
if (ns->refcount != 0) {
mtx_unlock(&ns->lock);
return ZX_ERR_BAD_STATE;
} else {
vn_destroy_children_locked(&ns->root);
mtx_unlock(&ns->lock);
free(ns);
return ZX_OK;
}
}
__EXPORT
zx_status_t fdio_ns_bind(fdio_ns_t* ns, const char* path, zx_handle_t remote) {
LOG(1, "BIND '%s' %x\n", path, remote);
if (remote == ZX_HANDLE_INVALID) {
return ZX_ERR_BAD_HANDLE;
}
if ((path == NULL) || (path[0] != '/')) {
return ZX_ERR_INVALID_ARGS;
}
// skip leading slash
path++;
zx_status_t r = ZX_OK;
mtx_lock(&ns->lock);
mxvn_t* vn = &ns->root;
if (path[0] == 0) {
// the path was "/" so we're trying to bind to the root vnode
if (vn->remote == ZX_HANDLE_INVALID) {
if (vn->child) {
// overlay remotes are disallowed
r = ZX_ERR_NOT_SUPPORTED;
} else {
vn->remote = remote;
}
} else {
r = ZX_ERR_ALREADY_EXISTS;
}
LOG(1, "BIND ROOT: FAILED\n");
goto done;
}
if (vn->remote != ZX_HANDLE_INVALID) {
// if there's something mounted at / we can't shadow it
r = ZX_ERR_NOT_SUPPORTED;
LOG(1, "BIND: FAILED (root bound)\n");
goto done;
}
for (;;) {
const char* next = strchr(path, '/');
if (next) {
// not the final segment, create an intermediate vnode
r = vn_create_locked(vn, path, next - path, ZX_HANDLE_INVALID, &vn);
if (r < 0) {
break;
}
path = next + 1;
} else {
// final segment. create leaf vnode and stop
r = vn_create_locked(vn, path, strlen(path), remote, &vn);
break;
}
}
if (r < 0) {
// we failed, so unwind, removing any intermediate vnodes
// we created. vn_destroy_locked() will error out on any
// vnode that has a remote, or on the root vnode, so we
// it will stop us before we remove anything that already
// existed. (we never create leaf vnodes with no remote)
for (;;) {
mxvn_t* parent = vn->parent;
if (vn_destroy_locked(vn) < 0) {
break;
}
vn = parent;
}
}
done:
mtx_unlock(&ns->lock);
return r;
}
__EXPORT
zx_status_t fdio_ns_bind_fd(fdio_ns_t* ns, const char* path, int fd) {
zx_handle_t handle[FDIO_MAX_HANDLES];
uint32_t type[FDIO_MAX_HANDLES];
zx_status_t r = fdio_clone_fd(fd, 0, handle, type);
if (r < 0) {
return r;
}
if (r == 0) {
return ZX_ERR_INTERNAL;
}
if (type[0] != PA_FDIO_REMOTE) {
// wrong type, discard handles
zx_handle_close_many(handle, r);
return ZX_ERR_WRONG_TYPE;
}
// close any aux handles, then do the actual bind
for (int n = 1; n < r; n++) {
zx_handle_close(handle[n]);
}
if ((r = fdio_ns_bind(ns, path, handle[0])) < 0) {
zx_handle_close(handle[0]);
}
return r;
}
fdio_t* fdio_ns_open_root(fdio_ns_t* ns) {
fdio_t* io;
mtx_lock(&ns->lock);
if (ns->root.remote == ZX_HANDLE_INVALID) {
io = fdio_dir_create_locked(ns, &ns->root);
if (io != NULL) {
ns->refcount++;
}
mtx_unlock(&ns->lock);
} else {
mtx_unlock(&ns->lock);
// Active namespaces are immutable, so safe to access remote
// outside of the lock, avoiding blocking while holding the lock.
zx_status_t r = zxrio_open_handle(ns->root.remote, "", O_RDWR, 0, &io);
if (r != ZX_OK) {
io = NULL;
}
}
return io;
}
__EXPORT
int fdio_ns_opendir(fdio_ns_t* ns) {
fdio_t* io = fdio_ns_open_root(ns);
if (io == NULL) {
errno = ENOMEM;
return -1;
}
int fd = fdio_bind_to_fd(io, -1, 0);
if (fd < 0) {
fdio_release(io);
errno = ENOMEM;
}
return fd;
}
__EXPORT
zx_status_t fdio_ns_chdir(fdio_ns_t* ns) {
fdio_t* io = fdio_ns_open_root(ns);
if (io == NULL) {
return ZX_ERR_NO_MEMORY;
}
fdio_chdir(io, "/");
return ZX_OK;
}
static zx_status_t ns_enum_callback(mxvn_t* vn, void* cookie,
zx_status_t (*func)(void* cookie, const char* path,
size_t len, zx_handle_t h)) {
char path[PATH_MAX];
char* end = path + sizeof(path) - 1;
*end = 0;
zx_handle_t h = vn->remote;
for (;;) {
if ((vn->namelen + 1) > (size_t)(end - path)) {
return ZX_ERR_BAD_PATH;
}
end -= vn->namelen;
memcpy(end, vn->name, vn->namelen);
if ((vn = vn->parent) == NULL) {
size_t len = (sizeof(path) - 1) - (end - path);
if (len > 0) {
return func(cookie, end, len, h);
} else {
// the root vn ends up having length 0, so we
// fake up a correct canonical name for it here
return func(cookie, "/", 1, h);
}
}
end--;
*end = '/';
}
}
static zx_status_t ns_enumerate(mxvn_t* vn, void* cookie,
zx_status_t (*func)(void* cookie, const char* path,
size_t len, zx_handle_t h)) {
while (vn != NULL) {
if (vn->remote != ZX_HANDLE_INVALID) {
ns_enum_callback(vn, cookie, func);
}
if (vn->child) {
ns_enumerate(vn->child, cookie, func);
}
vn = vn->next;
}
return 0;
}
typedef struct {
size_t bytes;
size_t count;
char* buffer;
zx_handle_t* handle;
uint32_t* type;
char** path;
} export_state_t;
static zx_status_t ns_export_count(void* cookie, const char* path,
size_t len, zx_handle_t h) {
export_state_t* es = static_cast<export_state_t*>(cookie);
// Each entry needs one slot in the handle table,
// one slot in the type table, and one slot in the
// path table, plus storage for the path and NUL
es->bytes += sizeof(zx_handle_t) + sizeof(uint32_t) + sizeof(char**) + len + 1;
es->count += 1;
return ZX_OK;
}
static zx_status_t ns_export_copy(void* cookie, const char* path,
size_t len, zx_handle_t h) {
if ((h = fdio_service_clone(h)) == ZX_HANDLE_INVALID) {
return ZX_ERR_BAD_STATE;
}
export_state_t* es = static_cast<export_state_t*>(cookie);
memcpy(es->buffer, path, len + 1);
es->path[es->count] = es->buffer;
es->handle[es->count] = h;
es->type[es->count] = PA_HND(PA_NS_DIR, static_cast<uint32_t>(es->count));
es->buffer += (len + 1);
es->count++;
return ZX_OK;
}
__EXPORT
zx_status_t fdio_ns_export(fdio_ns_t* ns, fdio_flat_namespace_t** out) {
export_state_t es;
es.bytes = sizeof(fdio_flat_namespace_t);
es.count = 0;
mtx_lock(&ns->lock);
ns_enumerate(&ns->root, &es, ns_export_count);
fdio_flat_namespace_t* flat = static_cast<fdio_flat_namespace_t*>(malloc(es.bytes));
if (flat == NULL) {
mtx_unlock(&ns->lock);
return ZX_ERR_NO_MEMORY;
}
// We've allocated enough memory for the flat struct
// followed by count handles, followed by count types,
// followed by count path ptrs followed by enough bytes
// for all the path strings. Point es.* at the right
// slices of that memory:
es.handle = (zx_handle_t*) (flat + 1);
es.type = (uint32_t*) (es.handle + es.count);
es.path = (char**) (es.type + es.count);
es.buffer = (char*) (es.path + es.count);
es.count = 0;
zx_status_t status = ns_enumerate(&ns->root, &es, ns_export_copy);
mtx_unlock(&ns->lock);
if (status < 0) {
for (size_t n = 0; n < es.count; n++) {
zx_handle_close(es.handle[n]);
}
free(flat);
} else {
flat->count = es.count;
flat->handle = es.handle;
flat->type = es.type;
flat->path = (const char* const*) es.path;
*out = flat;
}
return status;
}
__EXPORT
zx_status_t fdio_ns_export_root(fdio_flat_namespace_t** out) {
zx_status_t status;
mtx_lock(&fdio_lock);
if (fdio_root_ns == NULL) {
status = ZX_ERR_NOT_FOUND;
} else {
status = fdio_ns_export(fdio_root_ns, out);
}
mtx_unlock(&fdio_lock);
return status;
}
__EXPORT
void fdio_ns_free_flat_namespace(fdio_flat_namespace_t* ns) {
zx_handle_close_many(ns->handle, ns->count);
free(ns);
}
__END_CDECLS