blob: ee121bec08ae60d5481448925ad7044ed113a38e [file] [log] [blame]
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <assert.h>
#include <lib/fdio/fdio.h>
#include <lib/fdio/unsafe.h>
#include <lib/fit/defer.h>
#include <lib/zxio/null.h>
#include <lib/zxio/ops.h>
#include <poll.h>
#include <sys/epoll.h>
#include <threads.h>
#include <zircon/syscalls.h>
#include <zircon/syscalls/port.h>
#include <unordered_map>
#include <vector>
#include <fbl/mutex.h>
#include "private.h"
// We use a subset of the EPOLL* macros as canonical ways to
// express types of events to wait for in fdio, and depend on the
// corresponding POLL* matching them.
static_assert(EPOLLIN == POLLIN, "");
static_assert(EPOLLPRI == POLLPRI, "");
static_assert(EPOLLOUT == POLLOUT, "");
static_assert(EPOLLRDNORM == POLLRDNORM, "");
static_assert(EPOLLRDBAND == POLLRDBAND, "");
static_assert(EPOLLWRNORM == POLLWRNORM, "");
static_assert(EPOLLWRBAND == POLLWRBAND, "");
static_assert(EPOLLMSG == POLLMSG, "");
static_assert(EPOLLERR == POLLERR, "");
static_assert(EPOLLHUP == POLLHUP, "");
static_assert(EPOLLRDHUP == POLLRDHUP, "");
struct nix_epoll_fd {
nix_epoll_fd(uint32_t events, epoll_data data, bool edge_triggered) {
handle = ZX_HANDLE_INVALID;
signals = ZXIO_SIGNAL_NONE;
event.events = events;
event.data = data;
wait_options = (edge_triggered ? ZX_WAIT_ASYNC_EDGE : 0);
}
epoll_event event;
zx_handle_t handle; // a borrowed handle from fdio_unsafe_wait_begin
zx_signals_t signals;
uint32_t wait_options;
};
struct nix_epoll {
zxio_t io;
zx_handle_t port;
mtx_t lock;
std::unordered_map<uint64_t, nix_epoll_fd>* fd_to_event;
std::vector<uint64_t>* inactive_fds;
};
static_assert(sizeof(nix_epoll) <= sizeof(zxio_storage_t),
"nix_epoll must fit inside zxio_storage_t.");
inline nix_epoll* zxio_to_epoll(zxio_t* zxio) { return reinterpret_cast<nix_epoll*>(zxio); }
bool epoll_remove_fd(zxio_t* io, int fd) {
nix_epoll* epoll = zxio_to_epoll(io);
auto pos = epoll->fd_to_event->find(fd);
if (pos == epoll->fd_to_event->end()) {
return false;
}
zx_port_cancel(epoll->port, pos->second.handle, static_cast<uint64_t>(fd));
epoll->fd_to_event->erase(pos);
return true;
}
zx_status_t epoll_close(zxio_t* io) {
nix_epoll* epoll = zxio_to_epoll(io);
for (auto it = epoll->fd_to_event->begin(); it != epoll->fd_to_event->end(); ++it) {
zx_port_cancel(epoll->port, it->second.handle, it->first);
}
delete epoll->fd_to_event;
delete epoll->inactive_fds;
zx_handle_close(epoll->port);
return ZX_OK;
}
static constexpr zxio_ops_t epoll_ops = []() {
zxio_ops_t ops = zxio_default_ops;
ops.close = epoll_close;
return ops;
}();
bool fdio_is_epoll(fdio_t* io) {
if (!io) {
return false;
}
return zxio_get_ops(fdio_get_zxio(io)) == &epoll_ops;
}
__EXPORT
int epoll_create(int size) {
if (size < 1) {
return ERRNO(EINVAL);
}
return epoll_create1(0);
}
__EXPORT
int epoll_create1(int flags) {
// |flags| is unused as the only valid value is EPOLL_CLOEXEC
// which is meaningless, since there is no exec on Fuchsia.
// Do not throw an error if specified, as exising code
// will use this.
zxio_storage_t* storage = nullptr;
fdio_t* fdio = fdio_zxio_create(&storage);
if (fdio == nullptr) {
return ERRNO(ENOMEM);
}
nix_epoll* epoll = reinterpret_cast<nix_epoll*>(storage);
zxio_init(&epoll->io, &epoll_ops);
epoll->port = ZX_HANDLE_INVALID;
epoll->fd_to_event = new std::unordered_map<uint64_t, nix_epoll_fd>;
epoll->inactive_fds = new std::vector<uint64_t>;
epoll->lock = MTX_INIT;
zx_status_t status = zx_port_create(0, &epoll->port);
if (status != ZX_OK) {
fdio_unsafe_release(fdio);
return ERRNO(ENOMEM);
}
int fd = fdio_bind_to_fd(fdio, -1, 0);
if (fd < 0) {
fdio_unsafe_release(fdio);
return ERRNO(ENOMEM);
}
return fd;
}
__EXPORT
int epoll_ctl(int epfd, int op, int fd, struct epoll_event* event) {
if (op != EPOLL_CTL_ADD && op != EPOLL_CTL_MOD && op != EPOLL_CTL_DEL) {
return ERRNO(EINVAL);
}
// These are the only flags supported in fdio_zxio_wait_begin,
// fdio_zxio_remote_wait_begin and poll_events_to_zxio_signals
const uint32_t allowed_events = EPOLLIN | EPOLLPRI | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLRDHUP;
bool edge_triggered = (event->events & EPOLLET) != 0;
if (event->events & ~(allowed_events | EPOLLET)) {
return ERRNO(ENOTSUP);
}
fdio_t* io = fdio_unsafe_fd_to_io(epfd);
auto clean_io = fit::defer([io] {
if (io) {
fdio_unsafe_release(io);
}
});
if (!fdio_is_epoll(io)) {
return ERRNO(EBADF);
}
nix_epoll* epoll = zxio_to_epoll(fdio_get_zxio(io));
mtx_lock(&epoll->lock);
fdio_t* pollio = fdio_unsafe_fd_to_io(fd);
if (pollio == nullptr) {
mtx_unlock(&epoll->lock);
return ERRNO(EBADF);
}
auto clean_io_poll = fit::defer([pollio] { fdio_unsafe_release(pollio); });
if (op == EPOLL_CTL_MOD || op == EPOLL_CTL_DEL) {
if (!epoll_remove_fd(fdio_get_zxio(io), fd)) {
mtx_unlock(&epoll->lock);
return ERRNO(ENOENT);
}
}
if (op == EPOLL_CTL_MOD || op == EPOLL_CTL_ADD) {
uint64_t key = static_cast<uint64_t>(fd);
auto res = epoll->fd_to_event->emplace(
key, nix_epoll_fd(event->events & allowed_events, event->data, edge_triggered));
if (!res.second) {
mtx_unlock(&epoll->lock);
return ERRNO(EEXIST);
}
fdio_unsafe_wait_begin(pollio, event->events, &res.first->second.handle,
&res.first->second.signals);
zx_status_t status = zx_object_wait_async(res.first->second.handle, epoll->port, key,
res.first->second.signals, 0);
if (status != ZX_OK) {
mtx_unlock(&epoll->lock);
return ERRNO(EINVAL);
}
}
mtx_unlock(&epoll->lock);
return 0;
}
__EXPORT
int epoll_wait(int epfd, struct epoll_event* events, int maxevents, int timeout) {
if (maxevents < 1) {
return ERRNO(EINVAL);
}
fdio_t* io = fdio_unsafe_fd_to_io(epfd);
auto clean_io = fit::defer([io] {
if (io) {
fdio_unsafe_release(io);
}
});
if (!fdio_is_epoll(io)) {
return ERRNO(EBADF);
}
zx_time_t deadline = ZX_TIME_INFINITE;
if (timeout >= 0) {
deadline = zx_deadline_after(zx_duration_from_msec(timeout));
} else if (timeout != -1) {
return ERRNO(EINVAL);
}
nix_epoll* epoll = zxio_to_epoll(fdio_get_zxio(io));
mtx_lock(&epoll->lock);
bool wait_err = false;
for (auto it = epoll->inactive_fds->begin(); it != epoll->inactive_fds->end(); ++it) {
uint64_t fd = *it;
auto evinfo = epoll->fd_to_event->find(fd);
if (evinfo != epoll->fd_to_event->end()) {
fdio_t* pollio = fdio_unsafe_fd_to_io(static_cast<int>(fd));
if (pollio) {
fdio_unsafe_wait_begin(pollio, evinfo->second.event.events, &evinfo->second.handle,
&evinfo->second.signals);
zx_status_t status =
zx_object_wait_async(evinfo->second.handle, epoll->port, fd, evinfo->second.signals,
evinfo->second.wait_options);
fdio_unsafe_release(pollio);
if (status != ZX_OK) {
wait_err = true;
}
}
}
}
epoll->inactive_fds->clear();
mtx_unlock(&epoll->lock);
if (wait_err) {
return ERRNO(EINVAL);
}
// Ideally we should have a means of waiting on the port that
// that can return a vector of packets that are ready.
std::vector<zx_port_packet_t> packets;
zx_port_packet_t packet;
zx_status_t status = zx_port_wait(epoll->port, deadline, &packet);
if (status == ZX_OK) {
packets.push_back(packet);
}
while (status == ZX_OK && packets.size() < static_cast<unsigned long>(maxevents)) {
status = zx_port_wait(epoll->port, ZX_TIME_INFINITE_PAST, &packet);
if (status == ZX_OK) {
packets.push_back(packet);
}
}
mtx_lock(&epoll->lock);
int ready_count = 0;
wait_err = false;
if (status == ZX_OK || status == ZX_ERR_TIMED_OUT) {
for (auto pkt : packets) {
// The packet type produced by zx_object_wait_async.
if (pkt.type == ZX_PKT_TYPE_SIGNAL_ONE) {
// |pkt.key| is the file descriptor.
int fd = static_cast<int>(pkt.key);
fdio_t* pollio = fdio_unsafe_fd_to_io(fd);
if (pollio) {
uint32_t evs;
fdio_unsafe_wait_end(pollio, pkt.signal.observed, &evs);
fdio_unsafe_release(pollio);
auto evinfo = epoll->fd_to_event->find(fd);
// This file descriptor could have been removed
// with epoll_ctl/EPOLL_CTL_DEL, so it will not
// be in the |fd_to_event| map. Ignore it.
if (evinfo != epoll->fd_to_event->end()) {
events[ready_count].events = evs;
events[ready_count].data = evinfo->second.event.data;
// If this is not edge-triggered, wait until the next
// epoll_wait to call zx_object_wait_async. If it is
// edge-triggered, start waiting immediately so that when
// the I/O returns EWOULDBLOCK, the fd is already being
// waited on.
if ((evinfo->second.wait_options & ZX_WAIT_ASYNC_EDGE) == 0) {
epoll->inactive_fds->push_back(pkt.key);
} else {
zx_status_t wait_status =
zx_object_wait_async(evinfo->second.handle, epoll->port, fd,
evinfo->second.signals, evinfo->second.wait_options);
if (wait_status != ZX_OK) {
wait_err = true;
}
}
ready_count++;
}
}
}
}
}
mtx_unlock(&epoll->lock);
if (wait_err || (status != ZX_OK && status != ZX_ERR_TIMED_OUT)) {
return ERRNO(EINVAL);
}
return ready_count;
}
__EXPORT
int epoll_pwait(int epfd, struct epoll_event* events, int maxevents, int timeout,
const sigset_t* sigmask) {
return ERRNO(ENOSYS);
}