blob: 6dc6b5a4a5a9bab003066c91d9a5cc381a1f9db1 [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <fuchsia/io2/llcpp/fidl.h>
#include <lib/fdio/fd.h>
#include <lib/fdio/fdio.h>
#include <lib/sync/mutex.h>
#include <lib/zx/event.h>
#include <lib/zxio/null.h>
#include <lib/zxio/ops.h>
#include <sys/eventfd.h>
#include <zircon/assert.h>
#include <zircon/syscalls.h>
#include <algorithm>
#include <utility>
#include "fdio_unistd.h"
#include "internal.h"
namespace fio2 = llcpp::fuchsia::io2;
#define FDIO_EVENT_READABLE static_cast<zx_signals_t>(fio2::DeviceSignal::READABLE)
#define FDIO_EVENT_WRITABLE static_cast<zx_signals_t>(fio2::DeviceSignal::WRITABLE)
// An implementation of a POSIX eventfd.
typedef struct fdio_event {
zxio_t io;
// The zx::event object that implements the eventfd.
zx_handle_t handle;
sync_mutex_t lock;
eventfd_t value __TA_GUARDED(lock);
int flags __TA_GUARDED(lock);
} fdio_event_t;
static_assert(sizeof(fdio_event_t) <= sizeof(zxio_storage_t),
"fdio_event_t must fit inside zxio_storage_t.");
static zx_status_t fdio_event_close(zxio_t* io) {
fdio_event_t* event = reinterpret_cast<fdio_event_t*>(io);
zx_handle_t handle = event->handle;
event->handle = ZX_HANDLE_INVALID;
zx_handle_close(handle);
return ZX_OK;
}
static void fdio_event_update_signals(fdio_event_t* event) __TA_REQUIRES(event->lock) {
zx_signals_t set_mask = ZX_SIGNAL_NONE;
if (event->value > 0) {
set_mask |= FDIO_EVENT_READABLE;
}
if (event->value < UINT64_MAX - 1) {
set_mask |= FDIO_EVENT_WRITABLE;
}
zx_status_t status =
zx_object_signal(event->handle, FDIO_EVENT_READABLE | FDIO_EVENT_WRITABLE, set_mask);
ZX_ASSERT(status == ZX_OK);
}
static zx_status_t fdio_event_readv(zxio_t* io, const zx_iovec_t* vector, size_t vector_count,
zxio_flags_t flags, size_t* out_actual) {
if (fdio_iovec_get_capacity(vector, vector_count) < sizeof(uint64_t)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
fdio_event_t* event = reinterpret_cast<fdio_event_t*>(io);
sync_mutex_lock(&event->lock);
if (event->value == 0u) {
sync_mutex_unlock(&event->lock);
return ZX_ERR_SHOULD_WAIT;
}
uint64_t result = 0u;
if (event->flags & EFD_SEMAPHORE) {
result = 1;
event->value -= 1;
} else {
result = event->value;
event->value = 0u;
}
fdio_iovec_copy_to(reinterpret_cast<const uint8_t*>(&result), sizeof(result), vector,
vector_count, out_actual);
fdio_event_update_signals(event);
sync_mutex_unlock(&event->lock);
return ZX_OK;
}
static zx_status_t fdio_event_writev(zxio_t* io, const zx_iovec_t* vector, size_t vector_count,
zxio_flags_t flags, size_t* out_actual) {
uint64_t increment = 0u;
size_t actual = 0u;
fdio_iovec_copy_from(vector, vector_count, reinterpret_cast<uint8_t*>(&increment),
sizeof(increment), &actual);
if (actual != sizeof(increment)) {
return ZX_ERR_BUFFER_TOO_SMALL;
}
if (increment == UINT64_MAX) {
// UINT64_MAX is specifically disallowed, presumably to avoid clients passing -1 by mistake.
return ZX_ERR_INVALID_ARGS;
}
fdio_event_t* event = reinterpret_cast<fdio_event_t*>(io);
sync_mutex_lock(&event->lock);
uint64_t new_value = 0u;
if (add_overflow(event->value, increment, &new_value) || new_value == UINT64_MAX) {
// If we overflow, we need to block until the next read, which means we need to clear the
// writable signal. The next read is not guaranteed to make enough room for this write, but
// the documentation says we should wake up and try again regardless.
//
// This design has an observable different from Linux. If you make a write()
// that goes down this codepath, this call will clear the POLLOUT bit, which
// is observable using select() and similar functions, regardless of whether
// the write is blocking or non-blocking. The Linux implementation
// differentiates between blocking and non-blocking writes. Blocking writes
// block internally without clearing the POLLOUT bit.
//
// To match the Linux behavior exactly, we would need to plumb the
// information about whether this is a blocking or non-blocking write to
// this location. If the write is non-blocking, we should return
// ZX_ERR_SHOULD_WAIT without clearing the POLLOUT bit. (Of course, this
// will cause code that attempts to wait for POLLOUT to spin hot, but that's
// true on Linux as well.) If the write is blocking, then we should block on
// a sync_completion_t, which should be signaled in fdio_event_readv.
//
// We hope the behavior we have implemented here is sufficiently compatible
// to be useful. If not, we might need to restructure how we do blocking
// read and write operations (e.g., by including the "should block" flag in
// zxio_flags_t.
zx_status_t status = zx_object_signal(event->handle, FDIO_EVENT_WRITABLE, ZX_SIGNAL_NONE);
ZX_ASSERT(status == ZX_OK);
sync_mutex_unlock(&event->lock);
return ZX_ERR_SHOULD_WAIT;
}
event->value = new_value;
fdio_event_update_signals(event);
sync_mutex_unlock(&event->lock);
*out_actual = actual;
return ZX_OK;
}
static void fdio_event_wait_begin(zxio_t* io, zxio_signals_t zxio_signals, zx_handle_t* out_handle,
zx_signals_t* out_zx_signals) {
fdio_event_t* event = reinterpret_cast<fdio_event_t*>(io);
zx_signals_t zx_signals = ZX_SIGNAL_NONE;
if (zxio_signals & ZXIO_SIGNAL_READABLE) {
zx_signals |= FDIO_EVENT_READABLE;
}
if (zxio_signals & ZXIO_SIGNAL_WRITABLE) {
zx_signals |= FDIO_EVENT_WRITABLE;
}
*out_handle = event->handle;
*out_zx_signals = zx_signals;
}
static void fdio_event_wait_end(zxio_t* io, zx_signals_t zx_signals,
zxio_signals_t* out_zxio_signals) {
zxio_signals_t zxio_signals = ZXIO_SIGNAL_NONE;
if (zx_signals & FDIO_EVENT_READABLE) {
zxio_signals |= ZXIO_SIGNAL_READABLE;
}
if (zx_signals & FDIO_EVENT_WRITABLE) {
zxio_signals |= ZXIO_SIGNAL_WRITABLE;
}
*out_zxio_signals = zxio_signals;
}
static constexpr zxio_ops_t fdio_event_ops = []() {
zxio_ops_t ops = zxio_default_ops;
ops.close = fdio_event_close;
ops.readv = fdio_event_readv;
ops.writev = fdio_event_writev;
ops.wait_begin = fdio_event_wait_begin;
ops.wait_end = fdio_event_wait_end;
return ops;
}();
static fdio_t* fdio_event_create(zx::event handle, eventfd_t initial_value, int flags) {
zxio_storage_t* storage = nullptr;
fdio_t* io = fdio_zxio_create(&storage);
if (io == nullptr) {
return nullptr;
}
fdio_event_t* event = reinterpret_cast<fdio_event_t*>(storage);
zxio_init(&event->io, &fdio_event_ops);
event->handle = handle.release();
event->lock = {};
sync_mutex_lock(&event->lock);
event->value = initial_value;
event->flags = flags;
fdio_event_update_signals(event);
sync_mutex_unlock(&event->lock);
return io;
}
__EXPORT
int eventfd(unsigned int initval, int flags) {
if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) {
return ERRNO(EINVAL);
}
zx::event event;
zx_status_t status = zx::event::create(0, &event);
if (status != ZX_OK) {
return ERROR(status);
}
fdio_t* io = nullptr;
if ((io = fdio_event_create(std::move(event), initval, flags)) == nullptr) {
return ERROR(ZX_ERR_NO_MEMORY);
}
if (flags & EFD_CLOEXEC) {
*fdio_get_ioflag(io) |= IOFLAG_CLOEXEC;
}
if (flags & EFD_NONBLOCK) {
*fdio_get_ioflag(io) |= IOFLAG_NONBLOCK;
}
int fd = fdio_bind_to_fd(io, -1, 0);
if (fd < 0) {
fdio_release(io);
}
// fdio_bind_to_fd already sets errno.
return fd;
}
__EXPORT
int eventfd_read(int fd, eventfd_t* value) {
return read(fd, value, sizeof(eventfd_t)) != sizeof(eventfd_t) ? -1 : 0;
}
__EXPORT
int eventfd_write(int fd, eventfd_t value) {
return write(fd, &value, sizeof(eventfd_t)) != sizeof(eventfd_t) ? -1 : 0;
}