blob: eff09f34f51ccceb781b5c38a4968813f28657d4 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use crate::bpf::attachments::SetSockOptProgramResult;
use crate::mm::{IOVecPtr, MemoryAccessor, MemoryAccessorExt};
use crate::security;
use crate::syscalls::time::TimeSpecPtr;
use crate::task::{CurrentTask, IpTables, Task, WaitCallback, Waiter};
use crate::vfs::buffers::{
AncillaryData, ControlMsg, UserBuffersInputBuffer, UserBuffersOutputBuffer,
};
use crate::vfs::socket::{
SA_FAMILY_SIZE, SA_STORAGE_SIZE, Socket, SocketAddress, SocketDomain, SocketFile,
SocketMessageFlags, SocketPeer, SocketProtocol, SocketShutdownFlags, SocketType, UnixSocket,
resolve_unix_socket_address,
};
use crate::vfs::{FdFlags, FdNumber, FileHandle, FsString, LookupContext};
use starnix_logging::{log_trace, track_stub};
use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
use starnix_types::augmented::Augmented;
use starnix_types::time::duration_from_timespec;
use starnix_types::user_buffer::{UserBuffer, UserBuffers};
use starnix_uapi::auth::CAP_NET_BIND_SERVICE;
use starnix_uapi::errors::{EEXIST, EINPROGRESS, Errno};
use starnix_uapi::file_mode::FileMode;
use starnix_uapi::math::round_up_to_increment;
use starnix_uapi::open_flags::OpenFlags;
use starnix_uapi::user_address::{
ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserRef,
};
use starnix_uapi::user_value::UserValue;
use starnix_uapi::vfs::FdEvents;
use starnix_uapi::{
MSG_CTRUNC, MSG_DONTWAIT, MSG_TRUNC, MSG_WAITFORONE, SHUT_RD, SHUT_RDWR, SHUT_WR, SOCK_CLOEXEC,
SOCK_NONBLOCK, UIO_MAXIOV, errno, error, socklen_t, uapi,
};
use std::ops::DerefMut;
uapi::check_arch_independent_layout! {
socklen_t {}
}
/// A `msghdr` can be augmented with a `UserBuffer`. In that case, the `UserBuffer` is used for
/// the I/O, instead of the `iovec` fields from the `msghdr`.
pub type WithAlternateBuffer<T> = Augmented<T, UserBuffer>;
pub type MsgHdrPtr = MappingMultiArchUserRef<MsgHdr, uapi::msghdr, uapi::arch32::msghdr>;
#[derive(Debug, Clone)]
pub struct MsgHdr {
pub name: UserAddress,
pub name_len: socklen_t,
pub iov: IOVecPtr,
pub iovlen: UserValue<usize>,
pub control: UserAddress,
pub control_len: usize,
pub flags: u32,
}
/// A reference to a `msghdr`.
///
/// This enum is used to abstract over whether the `msghdr` is in user memory (and needs to be
/// read) or has been constructed in the kernel. This is used by `io_uring` to provide a buffer
/// for `recvmsg`.
#[derive(Debug, Clone)]
pub enum MsgHdrRef {
Ptr(MsgHdrPtr),
Value(WithAlternateBuffer<MsgHdr>),
}
impl From<MsgHdrPtr> for MsgHdrRef {
fn from(ptr: MsgHdrPtr) -> Self {
Self::Ptr(ptr)
}
}
impl From<WithAlternateBuffer<MsgHdr>> for MsgHdrRef {
fn from(value: WithAlternateBuffer<MsgHdr>) -> Self {
Self::Value(value)
}
}
pub type MMsgHdrPtr = MappingMultiArchUserRef<MMsgHdr, uapi::mmsghdr, uapi::arch32::mmsghdr>;
pub struct MMsgHdr {
hdr: MsgHdr,
len: usize,
}
uapi::arch_map_data! {
BidiTryFrom<MsgHdr, msghdr> {
name = msg_name;
name_len = msg_namelen;
iov = msg_iov;
iovlen = msg_iovlen;
control = msg_control;
control_len = msg_controllen;
flags = msg_flags;
}
BidiTryFrom<MMsgHdr, mmsghdr> {
hdr = msg_hdr;
len = msg_len;
}
}
pub type CMsgHdrPtr = MultiArchUserRef<uapi::cmsghdr, uapi::arch32::cmsghdr>;
pub fn sys_socket(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
domain: u32,
socket_type: u32,
protocol: u32,
) -> Result<FdNumber, Errno> {
let flags = socket_type & (SOCK_NONBLOCK | SOCK_CLOEXEC);
let domain = parse_socket_domain(domain)?;
let socket_type = parse_socket_type(domain, socket_type)?;
// Should we use parse_socket_protocol here?
let protocol = SocketProtocol::from_raw(protocol);
let open_flags = socket_flags_to_open_flags(flags);
let socket_file = SocketFile::new_socket(
locked,
current_task,
domain,
socket_type,
open_flags,
protocol,
/*kernel_private=*/ false,
)?;
let fd_flags = socket_flags_to_fd_flags(flags);
let fd = current_task.add_file(locked, socket_file, fd_flags)?;
Ok(fd)
}
fn socket_flags_to_open_flags(flags: u32) -> OpenFlags {
OpenFlags::RDWR
| if flags & SOCK_NONBLOCK != 0 { OpenFlags::NONBLOCK } else { OpenFlags::empty() }
}
fn socket_flags_to_fd_flags(flags: u32) -> FdFlags {
if flags & SOCK_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() }
}
fn parse_socket_domain(domain: u32) -> Result<SocketDomain, Errno> {
SocketDomain::from_raw(domain.try_into().map_err(|_| errno!(EAFNOSUPPORT))?).ok_or_else(|| {
track_stub!(TODO("https://fxbug.dev/322875074"), "parse socket domain", domain);
errno!(EAFNOSUPPORT)
})
}
fn parse_socket_type(domain: SocketDomain, socket_type: u32) -> Result<SocketType, Errno> {
let socket_type = SocketType::from_raw(socket_type & 0xf).ok_or_else(|| {
track_stub!(TODO("https://fxbug.dev/322875418"), "parse socket type", socket_type);
errno!(EINVAL)
})?;
// For AF_UNIX, SOCK_RAW sockets are treated as if they were SOCK_DGRAM.
Ok(if domain == SocketDomain::Unix && socket_type == SocketType::Raw {
SocketType::Datagram
} else {
socket_type
})
}
fn parse_socket_protocol(
domain: SocketDomain,
socket_type: SocketType,
protocol: u32,
) -> Result<SocketProtocol, Errno> {
let protocol = SocketProtocol::from_raw(protocol);
if domain == SocketDomain::Inet {
match (socket_type, protocol) {
(SocketType::Raw, _) => {
// Should we have different behavior error when called by root?
return error!(EPROTONOSUPPORT);
}
(SocketType::Datagram, SocketProtocol::UDP) => (),
(SocketType::Datagram, _) => return error!(EPROTONOSUPPORT),
(SocketType::Stream, SocketProtocol::TCP) => (),
(SocketType::Stream, _) => return error!(EPROTONOSUPPORT),
_ => (),
}
}
Ok(protocol)
}
fn parse_socket_address(
task: &Task,
user_socket_address: UserAddress,
user_address_length: usize,
) -> Result<SocketAddress, Errno> {
if user_address_length < SA_FAMILY_SIZE || user_address_length > SA_STORAGE_SIZE {
return error!(EINVAL);
}
let address = task.read_memory_to_vec(user_socket_address, user_address_length)?;
SocketAddress::from_bytes(address)
}
fn maybe_parse_socket_address(
task: &Task,
user_socket_address: UserAddress,
user_address_length: usize,
) -> Result<Option<SocketAddress>, Errno> {
if user_address_length > i32::MAX as usize {
return error!(EINVAL);
}
Ok(if user_socket_address.is_null() {
None
} else {
Some(parse_socket_address(task, user_socket_address, user_address_length)?)
})
}
// See "Autobind feature" section of https://man7.org/linux/man-pages/man7/unix.7.html
fn generate_autobind_address() -> FsString {
let mut bytes = [0u8; 4];
zx::cprng_draw(&mut bytes);
let value = u32::from_ne_bytes(bytes) & 0xFFFFF;
format!("\0{value:05x}").into()
}
pub fn sys_bind(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_socket_address: UserAddress,
user_address_length: usize,
) -> Result<(), Errno> {
let file = current_task.files.get(fd)?;
let socket = Socket::get_from_file(&file)?;
let address = parse_socket_address(current_task, user_socket_address, user_address_length)?;
if !address.valid_for_domain(socket.domain) {
return match socket.domain {
SocketDomain::Unix
| SocketDomain::Vsock
| SocketDomain::Inet6
| SocketDomain::Netlink
| SocketDomain::Key
| SocketDomain::Packet
| SocketDomain::Qipcrtr => error!(EINVAL),
SocketDomain::Inet => error!(EAFNOSUPPORT),
};
}
if let Some(port) = address.maybe_inet_port() {
// See <https://man7.org/linux/man-pages/man7/ip.7.html>:
//
// The port numbers below 1024 are called privileged ports (or
// sometimes: reserved ports). Only a privileged process (on Linux:
// a process that has the CAP_NET_BIND_SERVICE capability in the
// user namespace governing its network namespace) may bind(2) to
// these sockets.
if port != 0 && port < 1024 {
security::check_task_capable(current_task, CAP_NET_BIND_SERVICE)
.map_err(|_| errno!(EACCES))?;
}
}
security::check_socket_bind_access(current_task, socket, &address)?;
match address {
SocketAddress::Unspecified => return error!(EINVAL),
SocketAddress::Unix(mut name) => {
if name.is_empty() {
// If the name is empty, then we're supposed to generate an
// autobind address, which is always abstract.
name = generate_autobind_address();
}
// If there is a null byte at the start of the sun_path, then the
// address is abstract.
if name[0] == b'\0' {
current_task.abstract_socket_namespace.bind(locked, current_task, name, socket)?;
} else {
let mode = file.node().info().mode;
let mode = current_task.fs().apply_umask(mode).with_type(FileMode::IFSOCK);
let (parent, basename) = current_task.lookup_parent_at(
locked,
&mut LookupContext::default(),
FdNumber::AT_FDCWD,
name.as_ref(),
)?;
parent
.bind_socket(
locked,
current_task,
basename,
socket.clone(),
SocketAddress::Unix(name.clone()),
mode,
)
.map_err(|errno| if errno == EEXIST { errno!(EADDRINUSE) } else { errno })?;
}
}
SocketAddress::Vsock { port, .. } => {
current_task.abstract_vsock_namespace.bind(locked, current_task, port, socket)?;
}
SocketAddress::Inet(_)
| SocketAddress::Inet6(_)
| SocketAddress::Netlink(_)
| SocketAddress::Packet(_)
| SocketAddress::Qipcrtr(_) => socket.bind(locked, current_task, address)?,
}
Ok(())
}
pub fn sys_listen(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
backlog: i32,
) -> Result<(), Errno> {
let file = current_task.files.get(fd)?;
let socket = Socket::get_from_file(&file)?;
socket.listen(locked, current_task, backlog)?;
Ok(())
}
pub fn sys_accept(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_socket_address: UserAddress,
user_address_length: UserRef<socklen_t>,
) -> Result<FdNumber, Errno> {
sys_accept4(locked, current_task, fd, user_socket_address, user_address_length, 0)
}
pub fn sys_accept4(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_socket_address: UserAddress,
user_address_length: UserRef<socklen_t>,
flags: u32,
) -> Result<FdNumber, Errno> {
let file = current_task.files.get(fd)?;
let listening_socket = Socket::get_from_file(&file)?;
let accepted_socket = file.blocking_op(
locked,
current_task,
FdEvents::POLLIN | FdEvents::POLLHUP,
None,
|locked| listening_socket.accept(locked, current_task),
)?;
if !user_socket_address.is_null() {
let address_bytes = accepted_socket.getpeername(locked)?.to_bytes();
write_socket_address(
current_task,
user_socket_address,
user_address_length,
&address_bytes,
)?;
}
let open_flags = socket_flags_to_open_flags(flags);
let accepted_socket_file = SocketFile::from_socket(
locked,
current_task,
accepted_socket,
open_flags,
/* kernel_private= */ false,
)?;
let listening_socket = SocketFile::get_from_file(&file)?;
let accepted_socket = SocketFile::get_from_file(&accepted_socket_file)?;
security::socket_accept(current_task, listening_socket, accepted_socket)?;
let fd_flags = if flags & SOCK_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
let accepted_fd = current_task.add_file(locked, accepted_socket_file, fd_flags)?;
Ok(accepted_fd)
}
pub fn sys_connect(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_socket_address: UserAddress,
user_address_length: usize,
) -> Result<(), Errno> {
let client = current_task.files.get(fd)?;
let client = SocketFile::get_from_file(&client)?;
let address = parse_socket_address(current_task, user_socket_address, user_address_length)?;
let peer = match address {
SocketAddress::Unspecified => return error!(EAFNOSUPPORT),
SocketAddress::Unix(ref name) => {
log_trace!("connect to unix socket named \"{name}\"");
if name.is_empty() {
return error!(ECONNREFUSED);
}
SocketPeer::Handle(resolve_unix_socket_address(locked, current_task, name.as_ref())?)
}
// TODO(https://fxbug.dev/445433238): Connect not available for AF_VSOCK
SocketAddress::Vsock { .. } => return error!(ENOSYS),
SocketAddress::Inet(ref addr) | SocketAddress::Inet6(ref addr) => {
log_trace!("connect to inet socket named {:?}", addr);
SocketPeer::Address(address)
}
SocketAddress::Netlink(_) => SocketPeer::Address(address),
SocketAddress::Packet(ref addr) => {
log_trace!("connect to packet socket named {:?}", addr);
SocketPeer::Address(address)
}
SocketAddress::Qipcrtr(ref addr) => {
log_trace!("connect to qipcrtr socket named {:?}", addr);
SocketPeer::Address(address)
}
};
let result = client.connect(locked, current_task, peer.clone());
if client.file().is_non_blocking() {
return result;
}
match result {
// EINPROGRESS may be returned for inet sockets when `connect()` is completed
// asynchronously.
Err(errno) if errno.code == EINPROGRESS => {
let waiter = Waiter::new();
client.file().wait_async(
locked,
current_task,
&waiter,
FdEvents::POLLOUT,
WaitCallback::none(),
);
if !client.file().query_events(locked, current_task)?.contains(FdEvents::POLLOUT) {
waiter.wait(locked, current_task)?;
}
client.connect(locked, current_task, peer)
}
// TODO(tbodt): Support blocking when the UNIX domain socket queue fills up. This one's
// weird because as far as I can tell, removing a socket from the queue does not actually
// trigger FdEvents on anything.
result => result,
}
}
fn write_socket_address(
current_task: &CurrentTask,
user_socket_address: UserAddress,
user_address_length: UserRef<socklen_t>,
address_bytes: &[u8],
) -> Result<(), Errno> {
let capacity = current_task.read_object(user_address_length)?;
if capacity > i32::MAX as socklen_t {
return error!(EINVAL);
}
let length = address_bytes.len() as socklen_t;
if length > 0 {
let actual = std::cmp::min(length, capacity) as usize;
current_task.write_memory(user_socket_address, &address_bytes[..actual])?;
}
current_task.write_object(user_address_length, &length)?;
Ok(())
}
pub fn sys_getsockname(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_socket_address: UserAddress,
user_address_length: UserRef<socklen_t>,
) -> Result<(), Errno> {
let file = current_task.files.get(fd)?;
let socket = Socket::get_from_file(&file)?;
security::check_socket_getsockname_access(current_task, socket)?;
let address_bytes = socket.getsockname(locked)?.to_bytes();
write_socket_address(current_task, user_socket_address, user_address_length, &address_bytes)?;
Ok(())
}
pub fn sys_getpeername(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_socket_address: UserAddress,
user_address_length: UserRef<socklen_t>,
) -> Result<(), Errno> {
let file = current_task.files.get(fd)?;
let socket = Socket::get_from_file(&file)?;
security::check_socket_getpeername_access(current_task, socket)?;
let address_bytes = socket.getpeername(locked)?.to_bytes();
write_socket_address(current_task, user_socket_address, user_address_length, &address_bytes)?;
Ok(())
}
pub fn sys_socketpair(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
domain: u32,
socket_type: u32,
protocol: u32,
user_sockets: UserRef<[FdNumber; 2]>,
) -> Result<(), Errno> {
let flags = socket_type & (SOCK_NONBLOCK | SOCK_CLOEXEC);
let domain = parse_socket_domain(domain)?;
if !matches!(domain, SocketDomain::Unix | SocketDomain::Inet) {
return error!(EAFNOSUPPORT);
}
let socket_type = parse_socket_type(domain, socket_type)?;
let _protocol = parse_socket_protocol(domain, socket_type, protocol)?;
if domain != SocketDomain::Unix {
return error!(EOPNOTSUPP);
}
let open_flags = socket_flags_to_open_flags(flags);
let (left, right) =
UnixSocket::new_pair(locked, current_task, domain, socket_type, open_flags)?;
let fd_flags = socket_flags_to_fd_flags(flags);
// TODO: Eventually this will need to allocate two fd numbers (each of which could
// potentially fail), and only populate the fd numbers (which can't fail) if both allocations
// succeed.
let left_fd = current_task.add_file(locked, left, fd_flags)?;
let right_fd = current_task.add_file(locked, right, fd_flags)?;
let fds = [left_fd, right_fd];
log_trace!("socketpair -> [{:#x}, {:#x}]", fds[0].raw(), fds[1].raw());
current_task.write_object(user_sockets, &fds)?;
Ok(())
}
fn read_iovec_from_msghdr(
current_task: &CurrentTask,
message_header: WithAlternateBuffer<&MsgHdr>,
) -> Result<UserBuffers, Errno> {
if let WithAlternateBuffer::WithAux(_, b) = message_header {
return Ok(UserBuffers::from_buf([b]));
}
let iovec_count = message_header.iovlen;
// In `CurrentTask::read_iovec()` the same check fails with `EINVAL`. This works for all
// syscalls that use `iovec`, except `sendmsg()` and `recvmsg()`, which need to fail with
// EMSGSIZE.
if iovec_count.raw() > UIO_MAXIOV as usize {
return error!(EMSGSIZE);
}
current_task.read_iovec(message_header.iov, iovec_count)
}
fn recvmsg_internal<L>(
locked: &mut Locked<L>,
current_task: &CurrentTask,
file: &FileHandle,
user_message_header: &mut MsgHdrRef,
flags: u32,
deadline: Option<zx::MonotonicInstant>,
) -> Result<usize, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
let mut message_header = match *user_message_header {
MsgHdrRef::Ptr(ptr) => current_task.read_multi_arch_object(ptr)?.into(),
MsgHdrRef::Value(ref value) => value.clone(),
};
let result = recvmsg_internal_with_header(
locked,
current_task,
file,
message_header.as_mut(),
flags,
deadline,
)?;
match *user_message_header {
MsgHdrRef::Ptr(ptr) => {
current_task.write_multi_arch_object(ptr, message_header.extract())?;
}
MsgHdrRef::Value(ref mut value) => {
*value.deref_mut() = message_header.extract();
}
}
Ok(result)
}
fn recvmsg_internal_with_header<L>(
locked: &mut Locked<L>,
current_task: &CurrentTask,
file: &FileHandle,
mut message_header: WithAlternateBuffer<&mut MsgHdr>,
flags: u32,
deadline: Option<zx::MonotonicInstant>,
) -> Result<usize, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
let iovec = read_iovec_from_msghdr(current_task, message_header.as_unmut())?;
let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
let socket_ops = file.downcast_file::<SocketFile>().unwrap();
let info = socket_ops.recvmsg(
locked,
current_task,
file,
&mut UserBuffersOutputBuffer::unified_new(current_task, iovec)?,
flags,
deadline,
)?;
message_header.flags = 0;
let cmsg_buffer_size = message_header.control_len;
let mut cmsg_bytes_written = 0;
let header_size = CMsgHdrPtr::size_of_object_for(current_task);
for ancillary_data in info.ancillary_data {
if ancillary_data.total_size(current_task) == 0 {
// Skip zero-byte ancillary data on the receiving end. Not doing this trips this
// assert:
// https://cs.android.com/android/platform/superproject/+/master:system/libbase/cmsg.cpp;l=144;drc=15ec2c7a23cda814351a064a345a8270ed8c83ab
continue;
}
let expected_size = header_size + ancillary_data.total_size(current_task);
let message_bytes = ancillary_data.into_bytes(
locked,
current_task,
flags,
cmsg_buffer_size - cmsg_bytes_written,
)?;
// If the message is smaller than expected, set the MSG_CTRUNC flag, so the caller can tell
// some of the message is missing.
let truncated = message_bytes.len() < expected_size;
if truncated {
message_header.flags |= MSG_CTRUNC;
}
if message_bytes.len() < header_size {
// Can't fit the header, so stop trying to write.
break;
}
if !message_bytes.is_empty() {
current_task
.write_memory((message_header.control + cmsg_bytes_written)?, &message_bytes)?;
cmsg_bytes_written += message_bytes.len();
if !truncated {
cmsg_bytes_written = cmsg_align(current_task, cmsg_bytes_written)?;
}
}
}
message_header.control_len = cmsg_bytes_written;
let msg_name = message_header.name;
if !msg_name.is_null() {
if message_header.name_len > i32::MAX as u32 {
return error!(EINVAL);
}
let bytes = info.address.map(|a| a.to_bytes()).unwrap_or_else(|| vec![]);
let num_bytes = std::cmp::min(message_header.name_len as usize, bytes.len());
message_header.name_len = bytes.len() as u32;
if num_bytes > 0 {
current_task.write_memory(msg_name, &bytes[..num_bytes])?;
}
}
if info.bytes_read != info.message_length {
message_header.flags |= MSG_TRUNC;
}
if flags.contains(SocketMessageFlags::TRUNC) {
Ok(info.message_length)
} else {
Ok(info.bytes_read)
}
}
pub fn sys_recvmsg(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_message_header: MsgHdrPtr,
flags: u32,
) -> Result<usize, Errno> {
recvmsg_impl(locked, current_task, fd, &mut user_message_header.into(), flags)
}
/// Implementation of `recvmsg`.
///
/// This function is used by `sys_recvmsg`, but can also be called from other parts of the kernel
/// that need to override the `iovec` from the `msghdr`. For example, when using `io_uring` with
/// ring buffers.
pub fn recvmsg_impl(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_message_header: &mut MsgHdrRef,
flags: u32,
) -> Result<usize, Errno> {
let file = current_task.files.get(fd)?;
if !file.node().is_sock() {
return error!(ENOTSOCK);
}
recvmsg_internal(locked, current_task, &file, user_message_header, flags, None)
}
pub fn sys_recvmmsg(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_mmsgvec: MMsgHdrPtr,
vlen: u32,
mut flags: u32,
user_timeout: TimeSpecPtr,
) -> Result<usize, Errno> {
let file = current_task.files.get(fd)?;
if !file.node().is_sock() {
return error!(ENOTSOCK);
}
if vlen > UIO_MAXIOV {
return error!(EINVAL);
}
let deadline = if user_timeout.is_null() {
None
} else {
let ts = current_task.read_multi_arch_object(user_timeout)?;
Some(zx::MonotonicInstant::after(duration_from_timespec(ts)?))
};
let mut index = 0usize;
while index < vlen as usize {
let current_ptr = user_mmsgvec.at(index)?;
let mut current_mmsghdr = current_task.read_multi_arch_object(current_ptr)?;
match recvmsg_internal_with_header(
locked,
current_task,
&file,
(&mut current_mmsghdr.hdr).into(),
flags,
deadline,
) {
Err(error) => {
if index == 0 {
return Err(error);
}
break;
}
Ok(bytes_read) => {
current_mmsghdr.len = bytes_read;
current_task.write_multi_arch_object(current_ptr, current_mmsghdr)?;
}
}
index += 1;
if flags & MSG_WAITFORONE != 0 {
flags |= MSG_DONTWAIT;
}
}
Ok(index)
}
pub fn sys_recvfrom(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_buffer: UserAddress,
buffer_length: usize,
flags: u32,
user_src_address: UserAddress,
user_src_address_length: UserRef<socklen_t>,
) -> Result<usize, Errno> {
let file = current_task.files.get(fd)?;
if !file.node().is_sock() {
return error!(ENOTSOCK);
}
let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
let socket_ops = file.downcast_file::<SocketFile>().unwrap();
let info = socket_ops.recvmsg(
locked,
current_task,
&file,
&mut UserBuffersOutputBuffer::unified_new_at(current_task, user_buffer, buffer_length)?,
flags,
None,
)?;
if !user_src_address.is_null() {
let bytes = info.address.map(|a| a.to_bytes()).unwrap_or_else(|| vec![]);
write_socket_address(current_task, user_src_address, user_src_address_length, &bytes)?;
}
if flags.contains(SocketMessageFlags::TRUNC) {
Ok(info.message_length)
} else {
Ok(info.bytes_read)
}
}
fn sendmsg_internal<L>(
locked: &mut Locked<L>,
current_task: &CurrentTask,
file: &FileHandle,
user_message_header: MsgHdrPtr,
flags: u32,
) -> Result<usize, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
let message_header = current_task.read_multi_arch_object(user_message_header)?;
sendmsg_internal_with_header(locked, current_task, file, &message_header, flags)
}
fn sendmsg_internal_with_header<L>(
locked: &mut Locked<L>,
current_task: &CurrentTask,
file: &FileHandle,
message_header: &MsgHdr,
flags: u32,
) -> Result<usize, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
if message_header.name_len > i32::MAX as u32 {
return error!(EINVAL);
}
if message_header.control_len > 20480 {
return error!(ENOBUFS);
}
let dest_address = maybe_parse_socket_address(
current_task,
message_header.name,
message_header.name_len as usize,
)?;
let iovec = read_iovec_from_msghdr(current_task, message_header.into())?;
let mut next_message_offset: usize = 0;
let mut ancillary_data = Vec::new();
let header_size = CMsgHdrPtr::size_of_object_for(current_task);
loop {
let space = message_header.control_len.saturating_sub(next_message_offset);
if space < header_size {
break;
}
let cmsg_ref =
CMsgHdrPtr::new(current_task, (message_header.control + next_message_offset)?);
let cmsg = current_task.read_multi_arch_object(cmsg_ref)?;
// If the message header is not long enough to fit the required fields of the
// control data, return EINVAL.
if (cmsg.cmsg_len as usize) < header_size {
return error!(EINVAL);
}
let data_size = std::cmp::min(cmsg.cmsg_len as usize - header_size, space);
let next_data_offset = next_message_offset + header_size;
let data = current_task
.read_memory_to_vec((message_header.control + next_data_offset)?, data_size)?;
next_message_offset += cmsg_align(current_task, header_size + data.len())?;
let data = AncillaryData::from_cmsg(
current_task,
ControlMsg::new(cmsg.cmsg_level, cmsg.cmsg_type, data),
)?;
if data.total_size(current_task) == 0 {
continue;
}
ancillary_data.push(data);
}
let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EOPNOTSUPP))?;
let socket_ops = file.downcast_file::<SocketFile>().unwrap();
socket_ops.sendmsg(
locked,
current_task,
file,
&mut UserBuffersInputBuffer::unified_new(current_task, iovec)?,
dest_address,
ancillary_data,
flags,
)
}
pub fn sys_sendmsg(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_message_header: MsgHdrPtr,
flags: u32,
) -> Result<usize, Errno> {
let file = current_task.files.get(fd)?;
if !file.node().is_sock() {
return error!(ENOTSOCK);
}
sendmsg_internal(locked, current_task, &file, user_message_header, flags)
}
pub fn sys_sendmmsg(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_mmsgvec: MMsgHdrPtr,
mut vlen: u32,
flags: u32,
) -> Result<usize, Errno> {
let file = current_task.files.get(fd)?;
if !file.node().is_sock() {
return error!(ENOTSOCK);
}
// vlen is capped at UIO_MAXIOV.
if vlen > UIO_MAXIOV {
vlen = UIO_MAXIOV;
}
let mut index = 0usize;
while index < vlen as usize {
let current_ptr = user_mmsgvec.at(index)?;
let mut current_mmsghdr = current_task.read_multi_arch_object(current_ptr)?;
match sendmsg_internal_with_header(locked, current_task, &file, &current_mmsghdr.hdr, flags)
{
Err(error) => {
if index == 0 {
return Err(error);
}
break;
}
Ok(bytes_read) => {
current_mmsghdr.len = bytes_read;
current_task.write_multi_arch_object(current_ptr, current_mmsghdr)?;
}
}
index += 1;
}
Ok(index)
}
pub fn sys_sendto(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_buffer: UserAddress,
user_buffer_length: usize,
flags: u32,
user_dest_address: UserAddress,
user_dest_address_length: socklen_t,
) -> Result<usize, Errno> {
let file = current_task.files.get(fd)?;
if !file.node().is_sock() {
return error!(ENOTSOCK);
}
let dest_address = maybe_parse_socket_address(
current_task,
user_dest_address,
user_dest_address_length as usize,
)?;
let mut data =
UserBuffersInputBuffer::unified_new_at(current_task, user_buffer, user_buffer_length)?;
let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EOPNOTSUPP))?;
let socket_file = file.downcast_file::<SocketFile>().unwrap();
socket_file.sendmsg(locked, current_task, &file, &mut data, dest_address, vec![], flags)
}
pub fn sys_getsockopt(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
level: u32,
optname: u32,
user_optval: UserAddress,
user_optlen: UserRef<socklen_t>,
) -> Result<(), Errno> {
let file = current_task.files.get(fd)?;
let socket = Socket::get_from_file(&file)?;
let optlen = current_task.read_object(user_optlen)? as usize;
let optval_buffer_len = optlen;
let mut optval = current_task.read_memory_to_vec(user_optval, optlen as usize)?;
let result = if socket.domain.is_inet() && IpTables::can_handle_getsockopt(level, optname) {
current_task.kernel().iptables().getsockopt(
locked,
current_task,
socket,
optname,
optval.clone(),
)
} else {
socket.getsockopt(locked, current_task, level, optname, optlen as u32)
};
// Even if `getsockopt()` above returned an error we still need to run
// the eBPF program - it may handle the error.
let (optlen, error) = match result {
Ok(new_optval) if new_optval.len() > optval.len() => (optlen, Some(errno!(EINVAL))),
Ok(new_optval) => {
// Copy the returned value to the buffer, but don't truncate it yet
// - this will allow to use the whole buffer in the eBPF program.
optval[..new_optval.len()].copy_from_slice(&new_optval);
(new_optval.len(), None)
}
Err(e) => (optlen, Some(e)),
};
let root_cgroup = current_task.kernel().ebpf_state.attachments.root_cgroup();
let (optval, optlen) = root_cgroup.run_getsockopt_prog(
locked.cast_locked(),
current_task,
level,
optname,
optval,
optlen,
error,
)?;
assert!(optlen <= optval_buffer_len);
current_task.write_memory(user_optval, &optval[..optlen])?;
current_task.write_object(user_optlen, &(optlen as u32))?;
Ok(())
}
pub fn sys_setsockopt(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
level: u32,
optname: u32,
user_optval: UserAddress,
optlen: socklen_t,
) -> Result<(), Errno> {
let file = current_task.files.get(fd)?;
let socket = Socket::get_from_file(&file)?;
let user_opt = UserBuffer { address: user_optval, length: optlen as usize };
// Run eBPF program if any.
let root_cgroup = current_task.kernel().ebpf_state.attachments.root_cgroup();
let optval = match root_cgroup.run_setsockopt_prog(
locked.cast_locked(),
current_task,
level,
optname,
user_opt.into(),
) {
SetSockOptProgramResult::Allow(value) => value,
SetSockOptProgramResult::Fail(errno) => return Err(errno),
SetSockOptProgramResult::Bypass => return Ok(()), // The option was handled by eBPF.
};
if socket.domain.is_inet() && IpTables::can_handle_setsockopt(level, optname) {
current_task.kernel().iptables().setsockopt(locked, current_task, socket, optname, optval)
} else {
socket.setsockopt(locked, current_task, level, optname, optval)
}
}
pub fn sys_shutdown(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
how: u32,
) -> Result<(), Errno> {
let file = current_task.files.get(fd)?;
let socket = Socket::get_from_file(&file)?;
let how = match how {
SHUT_RD => SocketShutdownFlags::READ,
SHUT_WR => SocketShutdownFlags::WRITE,
SHUT_RDWR => SocketShutdownFlags::READ | SocketShutdownFlags::WRITE,
_ => return error!(EINVAL),
};
socket.shutdown(locked, current_task, how)?;
Ok(())
}
pub fn cmsg_align(current_task: &CurrentTask, value: usize) -> Result<usize, Errno> {
let alignment = if current_task.is_arch32() { 4 } else { 8 };
round_up_to_increment(value, alignment)
}
// Syscalls for arch32 usage
#[cfg(target_arch = "aarch64")]
mod arch32 {
use crate::task::CurrentTask;
use crate::vfs::FdNumber;
use starnix_sync::{Locked, Unlocked};
use starnix_uapi::errors::Errno;
use starnix_uapi::user_address::UserAddress;
pub use super::{
sys_accept as sys_arch32_accept, sys_accept4 as sys_arch32_accept4,
sys_bind as sys_arch32_bind, sys_getpeername as sys_arch32_getpeername,
sys_getsockname as sys_arch32_getsockname, sys_getsockopt as sys_arch32_getsockopt,
sys_listen as sys_arch32_listen, sys_recvfrom as sys_arch32_recvfrom,
sys_recvmmsg as sys_arch32_recvmmsg, sys_recvmsg as sys_arch32_recvmsg,
sys_sendmsg as sys_arch32_sendmsg, sys_sendto as sys_arch32_sendto,
sys_setsockopt as sys_arch32_setsockopt, sys_shutdown as sys_arch32_shutdown,
sys_socketpair as sys_arch32_socketpair,
};
pub fn sys_arch32_send(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_buffer: UserAddress,
user_buffer_length: usize,
flags: u32,
) -> Result<usize, Errno> {
super::sys_sendto(
locked,
current_task,
fd,
user_buffer,
user_buffer_length,
flags,
Default::default(),
Default::default(),
)
}
pub fn sys_arch32_recv(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
fd: FdNumber,
user_buffer: UserAddress,
buffer_length: usize,
flags: u32,
) -> Result<usize, Errno> {
super::sys_recvfrom(
locked,
current_task,
fd,
user_buffer,
buffer_length,
flags,
Default::default(),
Default::default(),
)
}
}
#[cfg(target_arch = "aarch64")]
pub use arch32::*;
#[cfg(test)]
mod tests {
use super::*;
use crate::testing::spawn_kernel_and_run;
use starnix_uapi::{AF_INET, AF_UNIX, SOCK_STREAM};
#[::fuchsia::test]
async fn test_socketpair_invalid_arguments() {
spawn_kernel_and_run(async |locked, current_task| {
assert_eq!(
sys_socketpair(
locked,
current_task,
AF_INET as u32,
SOCK_STREAM,
0,
UserRef::new(UserAddress::default())
),
error!(EPROTONOSUPPORT)
);
assert_eq!(
sys_socketpair(
locked,
current_task,
AF_UNIX as u32,
7,
0,
UserRef::new(UserAddress::default())
),
error!(EINVAL)
);
assert_eq!(
sys_socketpair(
locked,
current_task,
AF_UNIX as u32,
SOCK_STREAM,
0,
UserRef::new(UserAddress::default())
),
error!(EFAULT)
);
})
.await;
}
#[::fuchsia::test]
fn test_generate_autobind_address() {
let address = generate_autobind_address();
assert_eq!(address.len(), 6);
assert_eq!(address[0], 0);
for byte in address[1..].iter() {
match byte {
b'0'..=b'9' | b'a'..=b'f' => {
// Ok.
}
bad => {
panic!("bad byte: {bad}");
}
}
}
}
}