| // Copyright 2016 syzkaller project authors. All rights reserved. |
| // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. |
| |
| // This file is shared between executor and csource package. |
| |
| #ifndef _GNU_SOURCE |
| #define _GNU_SOURCE |
| #endif |
| |
| #include <endian.h> |
| #include <sys/syscall.h> |
| #include <unistd.h> |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_THREADED) || defined(SYZ_COLLIDE) |
| #include <linux/futex.h> |
| #include <pthread.h> |
| #include <stdlib.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT)) |
| #include <errno.h> |
| #include <signal.h> |
| #include <stdarg.h> |
| #include <stdio.h> |
| #include <sys/time.h> |
| #include <sys/wait.h> |
| #include <time.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT)) |
| #include <sys/prctl.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT) && defined(SYZ_USE_TMP_DIR)) |
| #include <dirent.h> |
| #include <sys/mount.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NONE) || defined(SYZ_SANDBOX_SETUID) || defined(SYZ_SANDBOX_NAMESPACE) |
| #include <errno.h> |
| #include <sched.h> |
| #include <signal.h> |
| #include <stdarg.h> |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <sys/prctl.h> |
| #include <sys/resource.h> |
| #include <sys/time.h> |
| #include <sys/wait.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_FAULT_INJECTION) || defined(SYZ_SANDBOX_NAMESPACE) || \ |
| defined(SYZ_ENABLE_CGROUPS) |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_SETUID) |
| #include <grp.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NAMESPACE) |
| #include <linux/capability.h> |
| #include <sys/mman.h> |
| #include <sys/mount.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_TUN_ENABLE) |
| #include <arpa/inet.h> |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <linux/if.h> |
| #include <linux/if_ether.h> |
| #include <linux/if_tun.h> |
| #include <linux/ip.h> |
| #include <linux/tcp.h> |
| #include <net/if_arp.h> |
| #include <stdarg.h> |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <sys/ioctl.h> |
| #include <sys/stat.h> |
| #include <sys/uio.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_RESET_NET_NAMESPACE) |
| #include <linux/net.h> |
| #include <netinet/in.h> |
| #include <sys/socket.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_FAULT_INJECTION) |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <stdarg.h> |
| #include <stdbool.h> |
| #include <stdio.h> |
| #include <sys/stat.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_open_dev) || defined(__NR_syz_open_procfs) |
| #include <fcntl.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <sys/stat.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_fuse_mount) || defined(__NR_syz_fuseblk_mount) |
| #include <fcntl.h> |
| #include <stdio.h> |
| #include <sys/stat.h> |
| #include <sys/sysmacros.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_open_pts) |
| #include <fcntl.h> |
| #include <stdio.h> |
| #include <sys/ioctl.h> |
| #include <sys/stat.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_kvm_setup_cpu) |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <linux/kvm.h> |
| #include <stdarg.h> |
| #include <stddef.h> |
| #include <stdio.h> |
| #include <sys/ioctl.h> |
| #include <sys/stat.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_init_net_socket) |
| #include <fcntl.h> |
| #include <sched.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #include <unistd.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_genetlink_get_family_id) |
| #include <errno.h> |
| #include <linux/genetlink.h> |
| #include <linux/netlink.h> |
| #include <sys/socket.h> |
| #include <sys/types.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| #include <sys/mount.h> |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_mount_image) || defined(__NR_syz_read_part_table) |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <linux/loop.h> |
| #include <stdio.h> |
| #include <sys/ioctl.h> |
| #include <sys/mount.h> |
| #include <sys/stat.h> |
| #include <sys/types.h> |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT)) || \ |
| defined(SYZ_USE_TMP_DIR) || defined(SYZ_HANDLE_SEGV) || defined(SYZ_TUN_ENABLE) || \ |
| defined(SYZ_SANDBOX_NAMESPACE) || defined(SYZ_SANDBOX_SETUID) || \ |
| defined(SYZ_SANDBOX_NONE) || defined(SYZ_FAULT_INJECTION) || \ |
| defined(__NR_syz_kvm_setup_cpu) || defined(__NR_syz_init_net_socket) && (defined(SYZ_SANDBOX_NONE) || defined(SYZ_SANDBOX_SETUID) || defined(SYZ_SANDBOX_NAMESPACE)) |
| // One does not simply exit. |
| // _exit can in fact fail. |
| // syzkaller did manage to generate a seccomp filter that prohibits exit_group syscall. |
| // Previously, we get into infinite recursion via segv_handler in such case |
| // and corrupted output_data, which does matter in our case since it is shared |
| // with fuzzer process. Loop infinitely instead. Parent will kill us. |
| // But one does not simply loop either. Compilers are sure that _exit never returns, |
| // so they remove all code after _exit as dead. Call _exit via volatile indirection. |
| // And this does not work as well. _exit has own handling of failing exit_group |
| // in the form of HLT instruction, it will divert control flow from our loop. |
| // So call the syscall directly. |
| __attribute__((noreturn)) static void doexit(int status) |
| { |
| volatile unsigned i; |
| syscall(__NR_exit_group, status); |
| for (i = 0;; i++) { |
| } |
| } |
| #endif |
| |
| #include "common.h" |
| |
| #if defined(SYZ_EXECUTOR) |
| struct thread_t; |
| void cover_reset(thread_t* th); |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_HANDLE_SEGV) |
| static __thread int skip_segv; |
| static __thread jmp_buf segv_env; |
| |
| static void segv_handler(int sig, siginfo_t* info, void* uctx) |
| { |
| // Generated programs can contain bad (unmapped/protected) addresses, |
| // which cause SIGSEGVs during copyin/copyout. |
| // This handler ignores such crashes to allow the program to proceed. |
| // We additionally opportunistically check that the faulty address |
| // is not within executable data region, because such accesses can corrupt |
| // output region and then fuzzer will fail on corrupted data. |
| uintptr_t addr = (uintptr_t)info->si_addr; |
| const uintptr_t prog_start = 1 << 20; |
| const uintptr_t prog_end = 100 << 20; |
| if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED) && (addr < prog_start || addr > prog_end)) { |
| debug("SIGSEGV on %p, skipping\n", (void*)addr); |
| _longjmp(segv_env, 1); |
| } |
| debug("SIGSEGV on %p, exiting\n", (void*)addr); |
| doexit(sig); |
| } |
| |
| static void install_segv_handler() |
| { |
| struct sigaction sa; |
| |
| // Don't need that SIGCANCEL/SIGSETXID glibc stuff. |
| // SIGCANCEL sent to main thread causes it to exit |
| // without bringing down the whole group. |
| memset(&sa, 0, sizeof(sa)); |
| sa.sa_handler = SIG_IGN; |
| syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); |
| syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8); |
| |
| memset(&sa, 0, sizeof(sa)); |
| sa.sa_sigaction = segv_handler; |
| sa.sa_flags = SA_NODEFER | SA_SIGINFO; |
| sigaction(SIGSEGV, &sa, NULL); |
| sigaction(SIGBUS, &sa, NULL); |
| } |
| |
| #define NONFAILING(...) \ |
| { \ |
| __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ |
| if (_setjmp(segv_env) == 0) { \ |
| __VA_ARGS__; \ |
| } \ |
| __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT)) |
| static uint64 current_time_ms() |
| { |
| struct timespec ts; |
| |
| if (clock_gettime(CLOCK_MONOTONIC, &ts)) |
| fail("clock_gettime failed"); |
| return (uint64)ts.tv_sec * 1000 + (uint64)ts.tv_nsec / 1000000; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) |
| static void sleep_ms(uint64 ms) |
| { |
| usleep(ms * 1000); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_USE_TMP_DIR) |
| static void use_temporary_dir() |
| { |
| char tmpdir_template[] = "./syzkaller.XXXXXX"; |
| char* tmpdir = mkdtemp(tmpdir_template); |
| if (!tmpdir) |
| fail("failed to mkdtemp"); |
| if (chmod(tmpdir, 0777)) |
| fail("failed to chmod"); |
| if (chdir(tmpdir)) |
| fail("failed to chdir"); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_TUN_ENABLE) |
| static void vsnprintf_check(char* str, size_t size, const char* format, va_list args) |
| { |
| int rv; |
| |
| rv = vsnprintf(str, size, format, args); |
| if (rv < 0) |
| fail("tun: snprintf failed"); |
| if ((size_t)rv >= size) |
| fail("tun: string '%s...' doesn't fit into buffer", str); |
| } |
| |
| static void snprintf_check(char* str, size_t size, const char* format, ...) |
| { |
| va_list args; |
| |
| va_start(args, format); |
| vsnprintf_check(str, size, format, args); |
| va_end(args); |
| } |
| |
| #define COMMAND_MAX_LEN 128 |
| #define PATH_PREFIX "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin " |
| #define PATH_PREFIX_LEN (sizeof(PATH_PREFIX) - 1) |
| |
| static void execute_command(bool panic, const char* format, ...) |
| { |
| va_list args; |
| char command[PATH_PREFIX_LEN + COMMAND_MAX_LEN]; |
| int rv; |
| |
| va_start(args, format); |
| // Executor process does not have any env, including PATH. |
| // On some distributions, system/shell adds a minimal PATH, on some it does not. |
| // Set own standard PATH to make it work across distributions. |
| memcpy(command, PATH_PREFIX, PATH_PREFIX_LEN); |
| vsnprintf_check(command + PATH_PREFIX_LEN, COMMAND_MAX_LEN, format, args); |
| va_end(args); |
| rv = system(command); |
| if (rv) { |
| if (panic) |
| fail("command '%s' failed: %d", &command[0], rv); |
| debug("command '%s': %d\n", &command[0], rv); |
| } |
| } |
| |
| static int tunfd = -1; |
| static int tun_frags_enabled; |
| |
| // We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp). |
| // Rest of the packet (if any) will be silently truncated which is fine. |
| #define SYZ_TUN_MAX_PACKET_SIZE 1000 |
| |
| #define TUN_IFACE "syz_tun" |
| |
| #define LOCAL_MAC "aa:aa:aa:aa:aa:aa" |
| #define REMOTE_MAC "aa:aa:aa:aa:aa:bb" |
| |
| #define LOCAL_IPV4 "172.20.20.170" |
| #define REMOTE_IPV4 "172.20.20.187" |
| |
| #define LOCAL_IPV6 "fe80::aa" |
| #define REMOTE_IPV6 "fe80::bb" |
| |
| #ifndef IFF_NAPI |
| #define IFF_NAPI 0x0010 |
| #endif |
| #ifndef IFF_NAPI_FRAGS |
| #define IFF_NAPI_FRAGS 0x0020 |
| #endif |
| |
| #ifdef SYZ_EXECUTOR |
| extern bool flag_enable_tun; |
| #endif |
| |
| static void initialize_tun(void) |
| { |
| #ifdef SYZ_EXECUTOR |
| if (!flag_enable_tun) |
| return; |
| #endif |
| tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); |
| if (tunfd == -1) { |
| #ifdef SYZ_EXECUTOR |
| fail("tun: can't open /dev/net/tun\n"); |
| #else |
| printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n"); |
| printf("otherwise fuzzing or reproducing might not work as intended\n"); |
| return; |
| #endif |
| } |
| // Remap tun onto higher fd number to hide it from fuzzer and to keep |
| // fd numbers stable regardless of whether tun is opened or not. |
| const int kTunFd = 252; |
| if (dup2(tunfd, kTunFd) < 0) |
| fail("dup2(tunfd, kTunFd) failed"); |
| close(tunfd); |
| tunfd = kTunFd; |
| |
| struct ifreq ifr; |
| memset(&ifr, 0, sizeof(ifr)); |
| strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ); |
| ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS; |
| if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) { |
| // IFF_NAPI_FRAGS requires root, so try without it. |
| ifr.ifr_flags = IFF_TAP | IFF_NO_PI; |
| if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) |
| fail("tun: ioctl(TUNSETIFF) failed"); |
| } |
| // If IFF_NAPI_FRAGS is not supported it will be silently dropped, |
| // so query the effective flags. |
| if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0) |
| fail("tun: ioctl(TUNGETIFF) failed"); |
| tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0; |
| debug("tun_frags_enabled=%d\n", tun_frags_enabled); |
| |
| // Disable IPv6 DAD, otherwise the address remains unusable until DAD completes. |
| execute_command(1, "sysctl -w net.ipv6.conf.%s.accept_dad=0", TUN_IFACE); |
| |
| // Disable IPv6 router solicitation to prevent IPv6 spam. |
| execute_command(1, "sysctl -w net.ipv6.conf.%s.router_solicitations=0", TUN_IFACE); |
| // There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam. |
| |
| execute_command(1, "ip link set dev %s address %s", TUN_IFACE, LOCAL_MAC); |
| execute_command(1, "ip addr add %s/24 dev %s", LOCAL_IPV4, TUN_IFACE); |
| execute_command(1, "ip -6 addr add %s/120 dev %s", LOCAL_IPV6, TUN_IFACE); |
| execute_command(1, "ip neigh add %s lladdr %s dev %s nud permanent", |
| REMOTE_IPV4, REMOTE_MAC, TUN_IFACE); |
| execute_command(1, "ip -6 neigh add %s lladdr %s dev %s nud permanent", |
| REMOTE_IPV6, REMOTE_MAC, TUN_IFACE); |
| execute_command(1, "ip link set dev %s up", TUN_IFACE); |
| } |
| |
| // Addresses are chosen to be in the same subnet as tun addresses. |
| #define DEV_IPV4 "172.20.20.%d" |
| #define DEV_IPV6 "fe80::%02hx" |
| #define DEV_MAC "aa:aa:aa:aa:aa:%02hx" |
| |
| // We test in a separate namespace, which does not have any network devices initially (even lo). |
| // Create/up as many as we can. |
| static void initialize_netdevices(void) |
| { |
| unsigned i; |
| const char* devtypes[] = {"ip6gretap", "bridge", "vcan", "bond", "team"}; |
| // If you extend this array, also update netdev_addr_id in vnet.txt. |
| const char* devnames[] = {"lo", "sit0", "bridge0", "vcan0", "tunl0", |
| "gre0", "gretap0", "ip_vti0", "ip6_vti0", |
| "ip6tnl0", "ip6gre0", "ip6gretap0", |
| "erspan0", "bond0", "veth0", "veth1", "team0", |
| "veth0_to_bridge", "veth1_to_bridge", |
| "veth0_to_bond", "veth1_to_bond", |
| "veth0_to_team", "veth1_to_team"}; |
| const char* devmasters[] = {"bridge", "bond", "team"}; |
| |
| #ifdef SYZ_EXECUTOR |
| if (!flag_enable_tun) |
| return; |
| #endif |
| for (i = 0; i < sizeof(devtypes) / (sizeof(devtypes[0])); i++) |
| execute_command(0, "ip link add dev %s0 type %s", devtypes[i], devtypes[i]); |
| // This adds connected veth0 and veth1 devices. |
| execute_command(0, "ip link add type veth"); |
| |
| // This creates connected bridge/bond/team_slave devices of type veth, |
| // and makes them slaves of bridge/bond/team devices, respectively. |
| // Note: slave devices don't need MAC/IP addresses, only master devices. |
| // veth0_to_* is not slave devices, which still need ip addresses. |
| for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) { |
| execute_command(0, "ip link add name %s_slave_0 type veth peer name veth0_to_%s", devmasters[i], devmasters[i]); |
| execute_command(0, "ip link add name %s_slave_1 type veth peer name veth1_to_%s", devmasters[i], devmasters[i]); |
| execute_command(0, "ip link set %s_slave_0 master %s0", devmasters[i], devmasters[i]); |
| execute_command(0, "ip link set %s_slave_1 master %s0", devmasters[i], devmasters[i]); |
| execute_command(0, "ip link set veth0_to_%s up", devmasters[i]); |
| execute_command(0, "ip link set veth1_to_%s up", devmasters[i]); |
| } |
| // bond/team_slave_* will set up automatically when set their master. |
| // But bridge_slave_* need to set up manually. |
| execute_command(0, "ip link set bridge_slave_0 up"); |
| execute_command(0, "ip link set bridge_slave_1 up"); |
| |
| for (i = 0; i < sizeof(devnames) / (sizeof(devnames[0])); i++) { |
| char addr[32]; |
| // Assign some unique address to devices. Some devices won't up without this. |
| // Devices that don't need these addresses will simply ignore them. |
| // Shift addresses by 10 because 0 subnet address can mean special things. |
| snprintf_check(addr, sizeof(addr), DEV_IPV4, i + 10); |
| execute_command(0, "ip -4 addr add %s/24 dev %s", addr, devnames[i]); |
| snprintf_check(addr, sizeof(addr), DEV_IPV6, i + 10); |
| execute_command(0, "ip -6 addr add %s/120 dev %s", addr, devnames[i]); |
| snprintf_check(addr, sizeof(addr), DEV_MAC, i + 10); |
| execute_command(0, "ip link set dev %s address %s", devnames[i], addr); |
| execute_command(0, "ip link set dev %s up", devnames[i]); |
| } |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_TUN_ENABLE) && (defined(__NR_syz_extract_tcp_res) || defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT))) |
| static int read_tun(char* data, int size) |
| { |
| if (tunfd < 0) |
| return -1; |
| |
| int rv = read(tunfd, data, size); |
| if (rv < 0) { |
| if (errno == EAGAIN) |
| return -1; |
| // Tun sometimes returns this, unclear if it's a kernel bug or not. |
| if (errno == EBADFD) |
| return -1; |
| fail("tun: read failed with %d", rv); |
| } |
| return rv; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_DEBUG) && defined(SYZ_TUN_ENABLE) && (defined(__NR_syz_emit_ethernet) || defined(__NR_syz_extract_tcp_res))) |
| static void debug_dump_data(const char* data, int length) |
| { |
| int i; |
| for (i = 0; i < length; i++) { |
| debug("%02x ", data[i] & 0xff); |
| if (i % 16 == 15) |
| debug("\n"); |
| } |
| if (i % 16 != 0) |
| debug("\n"); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(__NR_syz_emit_ethernet) && defined(SYZ_TUN_ENABLE)) |
| #define MAX_FRAGS 4 |
| struct vnet_fragmentation { |
| uint32 full; |
| uint32 count; |
| uint32 frags[MAX_FRAGS]; |
| }; |
| |
| static uintptr_t syz_emit_ethernet(uintptr_t a0, uintptr_t a1, uintptr_t a2) |
| { |
| // syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt]) |
| // vnet_fragmentation { |
| // full int32[0:1] |
| // count int32[1:4] |
| // frags array[int32[0:4096], 4] |
| // } |
| if (tunfd < 0) |
| return (uintptr_t)-1; |
| |
| uint32 length = a0; |
| char* data = (char*)a1; |
| debug_dump_data(data, length); |
| |
| struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2; |
| struct iovec vecs[MAX_FRAGS + 1]; |
| uint32 nfrags = 0; |
| if (!tun_frags_enabled || frags == NULL) { |
| vecs[nfrags].iov_base = data; |
| vecs[nfrags].iov_len = length; |
| nfrags++; |
| } else { |
| bool full = true; |
| uint32 i, count = 0; |
| NONFAILING(full = frags->full); |
| NONFAILING(count = frags->count); |
| if (count > MAX_FRAGS) |
| count = MAX_FRAGS; |
| for (i = 0; i < count && length != 0; i++) { |
| uint32 size = 0; |
| NONFAILING(size = frags->frags[i]); |
| if (size > length) |
| size = length; |
| vecs[nfrags].iov_base = data; |
| vecs[nfrags].iov_len = size; |
| nfrags++; |
| data += size; |
| length -= size; |
| } |
| if (length != 0 && (full || nfrags == 0)) { |
| vecs[nfrags].iov_base = data; |
| vecs[nfrags].iov_len = length; |
| nfrags++; |
| } |
| } |
| return writev(tunfd, vecs, nfrags); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT) && defined(SYZ_TUN_ENABLE)) |
| static void flush_tun() |
| { |
| char data[SYZ_TUN_MAX_PACKET_SIZE]; |
| while (read_tun(&data[0], sizeof(data)) != -1) |
| ; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(__NR_syz_extract_tcp_res) && defined(SYZ_TUN_ENABLE)) |
| #ifndef __ANDROID__ |
| // Can't include <linux/ipv6.h>, since it causes |
| // conflicts due to some structs redefinition. |
| struct ipv6hdr { |
| __u8 priority : 4, |
| version : 4; |
| __u8 flow_lbl[3]; |
| |
| __be16 payload_len; |
| __u8 nexthdr; |
| __u8 hop_limit; |
| |
| struct in6_addr saddr; |
| struct in6_addr daddr; |
| }; |
| #endif |
| |
| struct tcp_resources { |
| uint32 seq; |
| uint32 ack; |
| }; |
| |
| static uintptr_t syz_extract_tcp_res(uintptr_t a0, uintptr_t a1, uintptr_t a2) |
| { |
| // syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32) |
| |
| if (tunfd < 0) |
| return (uintptr_t)-1; |
| |
| char data[SYZ_TUN_MAX_PACKET_SIZE]; |
| int rv = read_tun(&data[0], sizeof(data)); |
| if (rv == -1) |
| return (uintptr_t)-1; |
| size_t length = rv; |
| debug_dump_data(data, length); |
| |
| struct tcphdr* tcphdr; |
| |
| if (length < sizeof(struct ethhdr)) |
| return (uintptr_t)-1; |
| struct ethhdr* ethhdr = (struct ethhdr*)&data[0]; |
| |
| if (ethhdr->h_proto == htons(ETH_P_IP)) { |
| if (length < sizeof(struct ethhdr) + sizeof(struct iphdr)) |
| return (uintptr_t)-1; |
| struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)]; |
| if (iphdr->protocol != IPPROTO_TCP) |
| return (uintptr_t)-1; |
| if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr)) |
| return (uintptr_t)-1; |
| tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4]; |
| } else { |
| if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) |
| return (uintptr_t)-1; |
| struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)]; |
| // TODO: parse and skip extension headers. |
| if (ipv6hdr->nexthdr != IPPROTO_TCP) |
| return (uintptr_t)-1; |
| if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) |
| return (uintptr_t)-1; |
| tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)]; |
| } |
| |
| struct tcp_resources* res = (struct tcp_resources*)a0; |
| NONFAILING(res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1))); |
| NONFAILING(res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2))); |
| |
| debug("extracted seq: %08x\n", res->seq); |
| debug("extracted ack: %08x\n", res->ack); |
| |
| return 0; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_open_dev) |
| static uintptr_t syz_open_dev(uintptr_t a0, uintptr_t a1, uintptr_t a2) |
| { |
| if (a0 == 0xc || a0 == 0xb) { |
| // syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd |
| // syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd |
| char buf[128]; |
| sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2); |
| return open(buf, O_RDWR, 0); |
| } else { |
| // syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd |
| char buf[1024]; |
| char* hash; |
| NONFAILING(strncpy(buf, (char*)a0, sizeof(buf))); |
| buf[sizeof(buf) - 1] = 0; |
| while ((hash = strchr(buf, '#'))) { |
| *hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone. |
| a1 /= 10; |
| } |
| return open(buf, a2, 0); |
| } |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_open_procfs) |
| static uintptr_t syz_open_procfs(uintptr_t a0, uintptr_t a1) |
| { |
| // syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd |
| |
| char buf[128]; |
| memset(buf, 0, sizeof(buf)); |
| if (a0 == 0) { |
| NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1)); |
| } else if (a0 == (uintptr_t)-1) { |
| NONFAILING(snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1)); |
| } else { |
| NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1)); |
| } |
| int fd = open(buf, O_RDWR); |
| if (fd == -1) |
| fd = open(buf, O_RDONLY); |
| return fd; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_open_pts) |
| static uintptr_t syz_open_pts(uintptr_t a0, uintptr_t a1) |
| { |
| // syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty] |
| int ptyno = 0; |
| if (ioctl(a0, TIOCGPTN, &ptyno)) |
| return -1; |
| char buf[128]; |
| sprintf(buf, "/dev/pts/%d", ptyno); |
| return open(buf, a1, 0); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_fuse_mount) |
| static uintptr_t syz_fuse_mount(uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5) |
| { |
| // syz_fuse_mount(target filename, mode flags[fuse_mode], uid uid, gid gid, maxread intptr, flags flags[mount_flags]) fd[fuse] |
| uint64 target = a0; |
| uint64 mode = a1; |
| uint64 uid = a2; |
| uint64 gid = a3; |
| uint64 maxread = a4; |
| uint64 flags = a5; |
| |
| int fd = open("/dev/fuse", O_RDWR); |
| if (fd == -1) |
| return fd; |
| char buf[1024]; |
| sprintf(buf, "fd=%d,user_id=%ld,group_id=%ld,rootmode=0%o", fd, (long)uid, (long)gid, (unsigned)mode & ~3u); |
| if (maxread != 0) |
| sprintf(buf + strlen(buf), ",max_read=%ld", (long)maxread); |
| if (mode & 1) |
| strcat(buf, ",default_permissions"); |
| if (mode & 2) |
| strcat(buf, ",allow_other"); |
| syscall(SYS_mount, "", target, "fuse", flags, buf); |
| // Ignore errors, maybe fuzzer can do something useful with fd alone. |
| return fd; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_fuseblk_mount) |
| static uintptr_t syz_fuseblk_mount(uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7) |
| { |
| // syz_fuseblk_mount(target filename, blkdev filename, mode flags[fuse_mode], uid uid, gid gid, maxread intptr, blksize intptr, flags flags[mount_flags]) fd[fuse] |
| uint64 target = a0; |
| uint64 blkdev = a1; |
| uint64 mode = a2; |
| uint64 uid = a3; |
| uint64 gid = a4; |
| uint64 maxread = a5; |
| uint64 blksize = a6; |
| uint64 flags = a7; |
| |
| int fd = open("/dev/fuse", O_RDWR); |
| if (fd == -1) |
| return fd; |
| if (syscall(SYS_mknodat, AT_FDCWD, blkdev, S_IFBLK, makedev(7, 199))) |
| return fd; |
| char buf[256]; |
| sprintf(buf, "fd=%d,user_id=%ld,group_id=%ld,rootmode=0%o", fd, (long)uid, (long)gid, (unsigned)mode & ~3u); |
| if (maxread != 0) |
| sprintf(buf + strlen(buf), ",max_read=%ld", (long)maxread); |
| if (blksize != 0) |
| sprintf(buf + strlen(buf), ",blksize=%ld", (long)blksize); |
| if (mode & 1) |
| strcat(buf, ",default_permissions"); |
| if (mode & 2) |
| strcat(buf, ",allow_other"); |
| syscall(SYS_mount, blkdev, target, "fuseblk", flags, buf); |
| // Ignore errors, maybe fuzzer can do something useful with fd alone. |
| return fd; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_init_net_socket) |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NONE) || defined(SYZ_SANDBOX_SETUID) || defined(SYZ_SANDBOX_NAMESPACE) |
| const int kInitNetNsFd = 253; |
| // syz_init_net_socket opens a socket in init net namespace. |
| // Used for families that can only be created in init net namespace. |
| static uintptr_t syz_init_net_socket(uintptr_t domain, uintptr_t type, uintptr_t proto) |
| { |
| int netns = open("/proc/self/ns/net", O_RDONLY); |
| if (netns == -1) |
| return netns; |
| if (setns(kInitNetNsFd, 0)) |
| return -1; |
| int sock = syscall(__NR_socket, domain, type, proto); |
| int err = errno; |
| if (setns(netns, 0)) |
| fail("setns(netns) failed"); |
| close(netns); |
| errno = err; |
| return sock; |
| } |
| #else |
| static uintptr_t syz_init_net_socket(uintptr_t domain, uintptr_t type, uintptr_t proto) |
| { |
| return syscall(__NR_socket, domain, type, proto); |
| } |
| #endif |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_genetlink_get_family_id) |
| static uintptr_t syz_genetlink_get_family_id(uintptr_t name) |
| { |
| char buf[512] = {0}; |
| struct nlmsghdr* hdr = (struct nlmsghdr*)buf; |
| struct genlmsghdr* genlhdr = (struct genlmsghdr*)NLMSG_DATA(hdr); |
| struct nlattr* attr = (struct nlattr*)(genlhdr + 1); |
| hdr->nlmsg_len = sizeof(*hdr) + sizeof(*genlhdr) + sizeof(*attr) + GENL_NAMSIZ; |
| hdr->nlmsg_type = GENL_ID_CTRL; |
| hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; |
| genlhdr->cmd = CTRL_CMD_GETFAMILY; |
| attr->nla_type = CTRL_ATTR_FAMILY_NAME; |
| attr->nla_len = sizeof(*attr) + GENL_NAMSIZ; |
| NONFAILING(strncpy((char*)(attr + 1), (char*)name, GENL_NAMSIZ)); |
| struct iovec iov = {hdr, hdr->nlmsg_len}; |
| struct sockaddr_nl addr = {0}; |
| addr.nl_family = AF_NETLINK; |
| debug("syz_genetlink_get_family_id(%s)\n", (char*)(attr + 1)); |
| int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); |
| if (fd == -1) { |
| debug("syz_genetlink_get_family_id: socket failed: %d\n", errno); |
| return -1; |
| } |
| struct msghdr msg = {&addr, sizeof(addr), &iov, 1, NULL, 0, 0}; |
| if (sendmsg(fd, &msg, 0) == -1) { |
| debug("syz_genetlink_get_family_id: sendmsg failed: %d\n", errno); |
| close(fd); |
| return -1; |
| } |
| ssize_t n = recv(fd, buf, sizeof(buf), 0); |
| close(fd); |
| if (n <= 0) { |
| debug("syz_genetlink_get_family_id: recv failed: %d\n", errno); |
| return -1; |
| } |
| if (hdr->nlmsg_type != GENL_ID_CTRL) { |
| debug("syz_genetlink_get_family_id: wrong reply type: %d\n", hdr->nlmsg_type); |
| return -1; |
| } |
| for (; (char*)attr < buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { |
| if (attr->nla_type == CTRL_ATTR_FAMILY_ID) |
| return *(uint16*)(attr + 1); |
| } |
| debug("syz_genetlink_get_family_id: no CTRL_ATTR_FAMILY_ID attr\n"); |
| return -1; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_mount_image) || defined(__NR_syz_read_part_table) |
| extern unsigned long long procid; |
| |
| struct fs_image_segment { |
| void* data; |
| uintptr_t size; |
| uintptr_t offset; |
| }; |
| |
| #define IMAGE_MAX_SEGMENTS 4096 |
| #define IMAGE_MAX_SIZE (129 << 20) |
| |
| #if defined(__i386__) |
| #define SYZ_memfd_create 356 |
| #elif defined(__x86_64__) |
| #define SYZ_memfd_create 319 |
| #elif defined(__arm__) |
| #define SYZ_memfd_create 385 |
| #elif defined(__aarch64__) |
| #define SYZ_memfd_create 279 |
| #elif defined(__ppc64__) || defined(__PPC64__) || defined(__powerpc64__) |
| #define SYZ_memfd_create 360 |
| #endif |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_read_part_table) |
| // syz_read_part_table(size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]]) |
| static uintptr_t syz_read_part_table(uintptr_t size, uintptr_t nsegs, uintptr_t segments) |
| { |
| char loopname[64], linkname[64]; |
| int loopfd, err = 0, res = -1; |
| uintptr_t i, j; |
| // See the comment in syz_mount_image. |
| struct fs_image_segment* segs = (struct fs_image_segment*)segments; |
| |
| if (nsegs > IMAGE_MAX_SEGMENTS) |
| nsegs = IMAGE_MAX_SEGMENTS; |
| for (i = 0; i < nsegs; i++) { |
| if (segs[i].size > IMAGE_MAX_SIZE) |
| segs[i].size = IMAGE_MAX_SIZE; |
| segs[i].offset %= IMAGE_MAX_SIZE; |
| if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size) |
| segs[i].offset = IMAGE_MAX_SIZE - segs[i].size; |
| if (size < segs[i].offset + segs[i].offset) |
| size = segs[i].offset + segs[i].offset; |
| } |
| if (size > IMAGE_MAX_SIZE) |
| size = IMAGE_MAX_SIZE; |
| int memfd = syscall(SYZ_memfd_create, "syz_read_part_table", 0); |
| if (memfd == -1) { |
| err = errno; |
| goto error; |
| } |
| if (ftruncate(memfd, size)) { |
| err = errno; |
| goto error_close_memfd; |
| } |
| for (i = 0; i < nsegs; i++) { |
| if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) { |
| debug("syz_read_part_table: pwrite[%u] failed: %d\n", (int)i, errno); |
| } |
| } |
| snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); |
| loopfd = open(loopname, O_RDWR); |
| if (loopfd == -1) { |
| err = errno; |
| goto error_close_memfd; |
| } |
| if (ioctl(loopfd, LOOP_SET_FD, memfd)) { |
| if (errno != EBUSY) { |
| err = errno; |
| goto error_close_loop; |
| } |
| ioctl(loopfd, LOOP_CLR_FD, 0); |
| usleep(1000); |
| if (ioctl(loopfd, LOOP_SET_FD, memfd)) { |
| err = errno; |
| goto error_close_loop; |
| } |
| } |
| struct loop_info64 info; |
| if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) { |
| err = errno; |
| goto error_clear_loop; |
| } |
| #if defined(SYZ_EXECUTOR) |
| cover_reset(0); |
| #endif |
| info.lo_flags |= LO_FLAGS_PARTSCAN; |
| if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) { |
| err = errno; |
| goto error_clear_loop; |
| } |
| res = 0; |
| // If we managed to parse some partitions, symlink them into our work dir. |
| for (i = 1, j = 0; i < 8; i++) { |
| snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i); |
| struct stat statbuf; |
| if (stat(loopname, &statbuf) == 0) { |
| snprintf(linkname, sizeof(linkname), "./file%d", (int)j++); |
| if (symlink(loopname, linkname)) { |
| debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno); |
| } |
| } |
| } |
| error_clear_loop: |
| ioctl(loopfd, LOOP_CLR_FD, 0); |
| error_close_loop: |
| close(loopfd); |
| error_close_memfd: |
| close(memfd); |
| error: |
| errno = err; |
| return res; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_mount_image) |
| //syz_mount_image(fs ptr[in, string[disk_filesystems]], dir ptr[in, filename], size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]], flags flags[mount_flags], opts ptr[in, fs_options[vfat_options]]) |
| //fs_image_segment { |
| // data ptr[in, array[int8]] |
| // size len[data, intptr] |
| // offset intptr |
| //} |
| static uintptr_t syz_mount_image(uintptr_t fs, uintptr_t dir, uintptr_t size, uintptr_t nsegs, uintptr_t segments, uintptr_t flags, uintptr_t opts) |
| { |
| char loopname[64]; |
| int loopfd, err = 0, res = -1; |
| uintptr_t i; |
| // Strictly saying we ought to do a nonfailing copyout of segments into a local var. |
| // But some filesystems have large number of segments (2000+), |
| // we can't allocate that much on stack and allocating elsewhere is problematic, |
| // so we just use the memory allocated by fuzzer. |
| struct fs_image_segment* segs = (struct fs_image_segment*)segments; |
| |
| if (nsegs > IMAGE_MAX_SEGMENTS) |
| nsegs = IMAGE_MAX_SEGMENTS; |
| for (i = 0; i < nsegs; i++) { |
| if (segs[i].size > IMAGE_MAX_SIZE) |
| segs[i].size = IMAGE_MAX_SIZE; |
| segs[i].offset %= IMAGE_MAX_SIZE; |
| if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size) |
| segs[i].offset = IMAGE_MAX_SIZE - segs[i].size; |
| if (size < segs[i].offset + segs[i].offset) |
| size = segs[i].offset + segs[i].offset; |
| } |
| if (size > IMAGE_MAX_SIZE) |
| size = IMAGE_MAX_SIZE; |
| int memfd = syscall(SYZ_memfd_create, "syz_mount_image", 0); |
| if (memfd == -1) { |
| err = errno; |
| goto error; |
| } |
| if (ftruncate(memfd, size)) { |
| err = errno; |
| goto error_close_memfd; |
| } |
| for (i = 0; i < nsegs; i++) { |
| if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) { |
| debug("syz_mount_image: pwrite[%u] failed: %d\n", (int)i, errno); |
| } |
| } |
| snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); |
| loopfd = open(loopname, O_RDWR); |
| if (loopfd == -1) { |
| err = errno; |
| goto error_close_memfd; |
| } |
| if (ioctl(loopfd, LOOP_SET_FD, memfd)) { |
| if (errno != EBUSY) { |
| err = errno; |
| goto error_close_loop; |
| } |
| ioctl(loopfd, LOOP_CLR_FD, 0); |
| usleep(1000); |
| if (ioctl(loopfd, LOOP_SET_FD, memfd)) { |
| err = errno; |
| goto error_close_loop; |
| } |
| } |
| mkdir((char*)dir, 0777); |
| NONFAILING(if (strcmp((char*)fs, "iso9660") == 0) flags |= MS_RDONLY); |
| debug("syz_mount_image: size=%llu segs=%llu loop='%s' dir='%s' fs='%s' opts='%s'\n", (uint64)size, (uint64)nsegs, loopname, (char*)dir, (char*)fs, (char*)opts); |
| #if defined(SYZ_EXECUTOR) |
| cover_reset(0); |
| #endif |
| if (mount(loopname, (char*)dir, (char*)fs, flags, (char*)opts)) { |
| err = errno; |
| goto error_clear_loop; |
| } |
| res = 0; |
| error_clear_loop: |
| ioctl(loopfd, LOOP_CLR_FD, 0); |
| error_close_loop: |
| close(loopfd); |
| error_close_memfd: |
| close(memfd); |
| error: |
| errno = err; |
| return res; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_kvm_setup_cpu) |
| #if defined(__x86_64__) |
| #include "common_kvm_amd64.h" |
| #elif defined(__aarch64__) |
| #include "common_kvm_arm64.h" |
| #else |
| static uintptr_t syz_kvm_setup_cpu(uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7) |
| { |
| return 0; |
| } |
| #endif |
| #endif // #ifdef __NR_syz_kvm_setup_cpu |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_FAULT_INJECTION) || defined(SYZ_SANDBOX_NAMESPACE) || \ |
| defined(SYZ_ENABLE_CGROUPS) |
| static bool write_file(const char* file, const char* what, ...) |
| { |
| char buf[1024]; |
| va_list args; |
| va_start(args, what); |
| vsnprintf(buf, sizeof(buf), what, args); |
| va_end(args); |
| buf[sizeof(buf) - 1] = 0; |
| int len = strlen(buf); |
| |
| int fd = open(file, O_WRONLY | O_CLOEXEC); |
| if (fd == -1) |
| return false; |
| if (write(fd, buf, len) != len) { |
| int err = errno; |
| close(fd); |
| errno = err; |
| return false; |
| } |
| close(fd); |
| return true; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| static void setup_cgroups() |
| { |
| if (mkdir("/syzcgroup", 0777)) { |
| debug("mkdir(/syzcgroup) failed: %d\n", errno); |
| } |
| if (mkdir("/syzcgroup/unified", 0777)) { |
| debug("mkdir(/syzcgroup/unified) failed: %d\n", errno); |
| } |
| if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) { |
| debug("mount(cgroup2) failed: %d\n", errno); |
| } |
| if (chmod("/syzcgroup/unified", 0777)) { |
| debug("chmod(/syzcgroup/unified) failed: %d\n", errno); |
| } |
| if (!write_file("/syzcgroup/unified/cgroup.subtree_control", "+cpu +memory +io +pids +rdma")) { |
| debug("write(cgroup.subtree_control) failed: %d\n", errno); |
| } |
| if (mkdir("/syzcgroup/cpu", 0777)) { |
| debug("mkdir(/syzcgroup/cpu) failed: %d\n", errno); |
| } |
| if (mount("none", "/syzcgroup/cpu", "cgroup", 0, "cpuset,cpuacct,perf_event,hugetlb")) { |
| debug("mount(cgroup cpu) failed: %d\n", errno); |
| } |
| if (!write_file("/syzcgroup/cpu/cgroup.clone_children", "1")) { |
| debug("write(/syzcgroup/cpu/cgroup.clone_children) failed: %d\n", errno); |
| } |
| if (chmod("/syzcgroup/cpu", 0777)) { |
| debug("chmod(/syzcgroup/cpu) failed: %d\n", errno); |
| } |
| if (mkdir("/syzcgroup/net", 0777)) { |
| debug("mkdir(/syzcgroup/net) failed: %d\n", errno); |
| } |
| if (mount("none", "/syzcgroup/net", "cgroup", 0, "net_cls,net_prio,devices,freezer")) { |
| debug("mount(cgroup net) failed: %d\n", errno); |
| } |
| if (chmod("/syzcgroup/net", 0777)) { |
| debug("chmod(/syzcgroup/net) failed: %d\n", errno); |
| } |
| } |
| |
| // TODO(dvyukov): this should be under a separate define for separate minimization, |
| // but for now we bundle this with cgroups. |
| static void setup_binfmt_misc() |
| { |
| if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:syz0::./file0:")) { |
| debug("write(/proc/sys/fs/binfmt_misc/register, syz0) failed: %d\n", errno); |
| } |
| if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:yz1::./file0:POC")) { |
| debug("write(/proc/sys/fs/binfmt_misc/register, syz1) failed: %d\n", errno); |
| } |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NONE) || defined(SYZ_SANDBOX_SETUID) || defined(SYZ_SANDBOX_NAMESPACE) |
| static void loop(); |
| |
| static void sandbox_common() |
| { |
| prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); |
| setpgrp(); |
| setsid(); |
| |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_init_net_socket) |
| int netns = open("/proc/self/ns/net", O_RDONLY); |
| if (netns == -1) |
| fail("open(/proc/self/ns/net) failed"); |
| if (dup2(netns, kInitNetNsFd) < 0) |
| fail("dup2(netns, kInitNetNsFd) failed"); |
| close(netns); |
| #endif |
| |
| struct rlimit rlim; |
| rlim.rlim_cur = rlim.rlim_max = 160 << 20; |
| setrlimit(RLIMIT_AS, &rlim); |
| rlim.rlim_cur = rlim.rlim_max = 8 << 20; |
| setrlimit(RLIMIT_MEMLOCK, &rlim); |
| rlim.rlim_cur = rlim.rlim_max = 136 << 20; |
| setrlimit(RLIMIT_FSIZE, &rlim); |
| rlim.rlim_cur = rlim.rlim_max = 1 << 20; |
| setrlimit(RLIMIT_STACK, &rlim); |
| rlim.rlim_cur = rlim.rlim_max = 0; |
| setrlimit(RLIMIT_CORE, &rlim); |
| |
| // CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems, |
| // so we do them separately of clone in do_sandbox_namespace. |
| if (unshare(CLONE_NEWNS)) { |
| debug("unshare(CLONE_NEWNS): %d\n", errno); |
| } |
| if (unshare(CLONE_NEWIPC)) { |
| debug("unshare(CLONE_NEWIPC): %d\n", errno); |
| } |
| if (unshare(0x02000000)) { |
| debug("unshare(CLONE_NEWCGROUP): %d\n", errno); |
| } |
| if (unshare(CLONE_NEWUTS)) { |
| debug("unshare(CLONE_NEWUTS): %d\n", errno); |
| } |
| if (unshare(CLONE_SYSVSEM)) { |
| debug("unshare(CLONE_SYSVSEM): %d\n", errno); |
| } |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NONE) |
| static int do_sandbox_none(void) |
| { |
| // CLONE_NEWPID takes effect for the first child of the current process, |
| // so we do it before fork to make the loop "init" process of the namespace. |
| // We ought to do fail here, but sandbox=none is used in pkg/ipc tests |
| // and they are usually run under non-root. |
| // Also since debug is stripped by pkg/csource, we need to do {} |
| // even though we generally don't do {} around single statements. |
| if (unshare(CLONE_NEWPID)) { |
| debug("unshare(CLONE_NEWPID): %d\n", errno); |
| } |
| int pid = fork(); |
| if (pid < 0) |
| fail("sandbox fork failed"); |
| if (pid) |
| return pid; |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| setup_cgroups(); |
| setup_binfmt_misc(); |
| #endif |
| sandbox_common(); |
| if (unshare(CLONE_NEWNET)) { |
| debug("unshare(CLONE_NEWNET): %d\n", errno); |
| } |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_TUN_ENABLE) |
| initialize_tun(); |
| // TODO(dvyukov): this should be separated from tun and minimized by csource separately. |
| initialize_netdevices(); |
| #endif |
| |
| loop(); |
| doexit(1); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_SETUID) |
| static int do_sandbox_setuid(void) |
| { |
| if (unshare(CLONE_NEWPID)) |
| fail("unshare(CLONE_NEWPID)"); |
| int pid = fork(); |
| if (pid < 0) |
| fail("sandbox fork failed"); |
| if (pid) |
| return pid; |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| setup_cgroups(); |
| setup_binfmt_misc(); |
| #endif |
| sandbox_common(); |
| if (unshare(CLONE_NEWNET)) |
| fail("unshare(CLONE_NEWNET)"); |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_TUN_ENABLE) |
| initialize_tun(); |
| // TODO(dvyukov): this should be separated from tun and minimized by csource separately. |
| initialize_netdevices(); |
| #endif |
| |
| const int nobody = 65534; |
| if (setgroups(0, NULL)) |
| fail("failed to setgroups"); |
| if (syscall(SYS_setresgid, nobody, nobody, nobody)) |
| fail("failed to setresgid"); |
| if (syscall(SYS_setresuid, nobody, nobody, nobody)) |
| fail("failed to setresuid"); |
| |
| // This is required to open /proc/self/* files. |
| // Otherwise they are owned by root and we can't open them after setuid. |
| // See task_dump_owner function in kernel. |
| prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); |
| |
| loop(); |
| doexit(1); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_SANDBOX_NAMESPACE) |
| static int real_uid; |
| static int real_gid; |
| __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20]; |
| |
| static int namespace_sandbox_proc(void* arg) |
| { |
| sandbox_common(); |
| |
| // /proc/self/setgroups is not present on some systems, ignore error. |
| write_file("/proc/self/setgroups", "deny"); |
| if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) |
| fail("write of /proc/self/uid_map failed"); |
| if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) |
| fail("write of /proc/self/gid_map failed"); |
| |
| // CLONE_NEWNET must always happen before tun setup, |
| // because we want the tun device in the test namespace. |
| if (unshare(CLONE_NEWNET)) |
| fail("unshare(CLONE_NEWNET)"); |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_TUN_ENABLE) |
| // We setup tun here as it needs to be in the test net namespace, |
| // which in turn needs to be in the test user namespace. |
| // However, IFF_NAPI_FRAGS will fail as we are not root already. |
| // There does not seem to be a call sequence that would satisfy all of that. |
| initialize_tun(); |
| // TODO(dvyukov): this should be separated from tun and minimized by csource separately. |
| initialize_netdevices(); |
| #endif |
| |
| if (mkdir("./syz-tmp", 0777)) |
| fail("mkdir(syz-tmp) failed"); |
| if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) |
| fail("mount(tmpfs) failed"); |
| if (mkdir("./syz-tmp/newroot", 0777)) |
| fail("mkdir failed"); |
| if (mkdir("./syz-tmp/newroot/dev", 0700)) |
| fail("mkdir failed"); |
| unsigned mount_flags = MS_BIND | MS_REC | MS_PRIVATE; |
| if (mount("/dev", "./syz-tmp/newroot/dev", NULL, mount_flags, NULL)) |
| fail("mount(dev) failed"); |
| if (mkdir("./syz-tmp/newroot/proc", 0700)) |
| fail("mkdir failed"); |
| if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL)) |
| fail("mount(proc) failed"); |
| if (mkdir("./syz-tmp/newroot/selinux", 0700)) |
| fail("mkdir failed"); |
| // selinux mount used to be at /selinux, but then moved to /sys/fs/selinux. |
| const char* selinux_path = "./syz-tmp/newroot/selinux"; |
| if (mount("/selinux", selinux_path, NULL, mount_flags, NULL)) { |
| if (errno != ENOENT) |
| fail("mount(/selinux) failed"); |
| if (mount("/sys/fs/selinux", selinux_path, NULL, mount_flags, NULL) && errno != ENOENT) |
| fail("mount(/sys/fs/selinux) failed"); |
| } |
| if (mkdir("./syz-tmp/newroot/sys", 0700)) |
| fail("mkdir failed"); |
| if (mount(NULL, "./syz-tmp/newroot/sys", "sysfs", 0, NULL)) |
| fail("mount(sysfs) failed"); |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| if (mkdir("./syz-tmp/newroot/syzcgroup", 0700)) |
| fail("mkdir failed"); |
| if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700)) |
| fail("mkdir failed"); |
| if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700)) |
| fail("mkdir failed"); |
| if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700)) |
| fail("mkdir failed"); |
| if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, mount_flags, NULL)) { |
| debug("mount(cgroup2, MS_BIND) failed: %d\n", errno); |
| } |
| if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, mount_flags, NULL)) { |
| debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno); |
| } |
| if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, mount_flags, NULL)) { |
| debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno); |
| } |
| #endif |
| if (mkdir("./syz-tmp/pivot", 0777)) |
| fail("mkdir failed"); |
| if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { |
| debug("pivot_root failed\n"); |
| if (chdir("./syz-tmp")) |
| fail("chdir failed"); |
| } else { |
| debug("pivot_root OK\n"); |
| if (chdir("/")) |
| fail("chdir failed"); |
| if (umount2("./pivot", MNT_DETACH)) |
| fail("umount failed"); |
| } |
| if (chroot("./newroot")) |
| fail("chroot failed"); |
| if (chdir("/")) |
| fail("chdir failed"); |
| |
| // Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes. |
| // Previously it lead to hangs because the loop process stopped due to SIGSTOP. |
| // Note that a process can always ptrace its direct children, which is enough |
| // for testing purposes. |
| struct __user_cap_header_struct cap_hdr = {}; |
| struct __user_cap_data_struct cap_data[2] = {}; |
| cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; |
| cap_hdr.pid = getpid(); |
| if (syscall(SYS_capget, &cap_hdr, &cap_data)) |
| fail("capget failed"); |
| cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE); |
| cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE); |
| cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE); |
| if (syscall(SYS_capset, &cap_hdr, &cap_data)) |
| fail("capset failed"); |
| |
| loop(); |
| doexit(1); |
| } |
| |
| static int do_sandbox_namespace(void) |
| { |
| int pid; |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| setup_cgroups(); |
| setup_binfmt_misc(); |
| #endif |
| real_uid = getuid(); |
| real_gid = getgid(); |
| mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows |
| pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64], |
| CLONE_NEWUSER | CLONE_NEWPID, 0); |
| if (pid < 0) |
| fail("sandbox clone failed"); |
| return pid; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_RESET_NET_NAMESPACE) |
| // checkpoint/reset_net_namespace partially resets net namespace to initial state |
| // after each test. Currently it resets only ipv4 netfilter state. |
| // Ideally, we just create a new net namespace for each test, |
| // however it's too slow (1-1.5 seconds per namespace, not parallelizable). |
| |
| // Linux headers do not compile for C++, so we have to define the structs manualy. |
| #define XT_TABLE_SIZE 1536 |
| #define XT_MAX_ENTRIES 10 |
| |
| struct xt_counters { |
| uint64 pcnt, bcnt; |
| }; |
| |
| struct ipt_getinfo { |
| char name[32]; |
| unsigned int valid_hooks; |
| unsigned int hook_entry[5]; |
| unsigned int underflow[5]; |
| unsigned int num_entries; |
| unsigned int size; |
| }; |
| |
| struct ipt_get_entries { |
| char name[32]; |
| unsigned int size; |
| void* entrytable[XT_TABLE_SIZE / sizeof(void*)]; |
| }; |
| |
| struct ipt_replace { |
| char name[32]; |
| unsigned int valid_hooks; |
| unsigned int num_entries; |
| unsigned int size; |
| unsigned int hook_entry[5]; |
| unsigned int underflow[5]; |
| unsigned int num_counters; |
| struct xt_counters* counters; |
| char entrytable[XT_TABLE_SIZE]; |
| }; |
| |
| struct ipt_table_desc { |
| const char* name; |
| struct ipt_getinfo info; |
| struct ipt_replace replace; |
| }; |
| |
| static struct ipt_table_desc ipv4_tables[] = { |
| {.name = "filter"}, |
| {.name = "nat"}, |
| {.name = "mangle"}, |
| {.name = "raw"}, |
| {.name = "security"}, |
| }; |
| |
| static struct ipt_table_desc ipv6_tables[] = { |
| {.name = "filter"}, |
| {.name = "nat"}, |
| {.name = "mangle"}, |
| {.name = "raw"}, |
| {.name = "security"}, |
| }; |
| |
| #define IPT_BASE_CTL 64 |
| #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) |
| #define IPT_SO_GET_INFO (IPT_BASE_CTL) |
| #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) |
| |
| struct arpt_getinfo { |
| char name[32]; |
| unsigned int valid_hooks; |
| unsigned int hook_entry[3]; |
| unsigned int underflow[3]; |
| unsigned int num_entries; |
| unsigned int size; |
| }; |
| |
| struct arpt_get_entries { |
| char name[32]; |
| unsigned int size; |
| void* entrytable[XT_TABLE_SIZE / sizeof(void*)]; |
| }; |
| |
| struct arpt_replace { |
| char name[32]; |
| unsigned int valid_hooks; |
| unsigned int num_entries; |
| unsigned int size; |
| unsigned int hook_entry[3]; |
| unsigned int underflow[3]; |
| unsigned int num_counters; |
| struct xt_counters* counters; |
| char entrytable[XT_TABLE_SIZE]; |
| }; |
| |
| struct arpt_table_desc { |
| const char* name; |
| struct arpt_getinfo info; |
| struct arpt_replace replace; |
| }; |
| |
| static struct arpt_table_desc arpt_tables[] = { |
| {.name = "filter"}, |
| }; |
| |
| #define ARPT_BASE_CTL 96 |
| #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) |
| #define ARPT_SO_GET_INFO (ARPT_BASE_CTL) |
| #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) |
| |
| static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) |
| { |
| struct ipt_get_entries entries; |
| socklen_t optlen; |
| int fd, i; |
| |
| fd = socket(family, SOCK_STREAM, IPPROTO_TCP); |
| if (fd == -1) { |
| switch (errno) { |
| case EAFNOSUPPORT: |
| case ENOPROTOOPT: |
| return; |
| } |
| fail("socket(%d, SOCK_STREAM, IPPROTO_TCP)", family); |
| } |
| for (i = 0; i < num_tables; i++) { |
| struct ipt_table_desc* table = &tables[i]; |
| strcpy(table->info.name, table->name); |
| strcpy(table->replace.name, table->name); |
| optlen = sizeof(table->info); |
| if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) { |
| switch (errno) { |
| case EPERM: |
| case ENOENT: |
| case ENOPROTOOPT: |
| continue; |
| } |
| fail("getsockopt(IPT_SO_GET_INFO)"); |
| } |
| debug("checkpoint iptable %s/%d: entries=%d hooks=%x size=%d\n", table->name, family, table->info.num_entries, table->info.valid_hooks, table->info.size); |
| if (table->info.size > sizeof(table->replace.entrytable)) |
| fail("table size is too large: %u", table->info.size); |
| if (table->info.num_entries > XT_MAX_ENTRIES) |
| fail("too many counters: %u", table->info.num_entries); |
| memset(&entries, 0, sizeof(entries)); |
| strcpy(entries.name, table->name); |
| entries.size = table->info.size; |
| optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; |
| if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) |
| fail("getsockopt(IPT_SO_GET_ENTRIES)"); |
| table->replace.valid_hooks = table->info.valid_hooks; |
| table->replace.num_entries = table->info.num_entries; |
| table->replace.size = table->info.size; |
| memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); |
| memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); |
| memcpy(table->replace.entrytable, entries.entrytable, table->info.size); |
| } |
| close(fd); |
| } |
| |
| static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) |
| { |
| struct xt_counters counters[XT_MAX_ENTRIES]; |
| struct ipt_get_entries entries; |
| struct ipt_getinfo info; |
| socklen_t optlen; |
| int fd, i; |
| |
| fd = socket(family, SOCK_STREAM, IPPROTO_TCP); |
| if (fd == -1) { |
| switch (errno) { |
| case EAFNOSUPPORT: |
| case ENOPROTOOPT: |
| return; |
| } |
| fail("socket(%d, SOCK_STREAM, IPPROTO_TCP)", family); |
| } |
| for (i = 0; i < num_tables; i++) { |
| struct ipt_table_desc* table = &tables[i]; |
| if (table->info.valid_hooks == 0) |
| continue; |
| memset(&info, 0, sizeof(info)); |
| strcpy(info.name, table->name); |
| optlen = sizeof(info); |
| if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen)) |
| fail("getsockopt(IPT_SO_GET_INFO)"); |
| if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { |
| memset(&entries, 0, sizeof(entries)); |
| strcpy(entries.name, table->name); |
| entries.size = table->info.size; |
| optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; |
| if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) |
| fail("getsockopt(IPT_SO_GET_ENTRIES)"); |
| if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) |
| continue; |
| } |
| debug("resetting iptable %s\n", table->name); |
| table->replace.num_counters = info.num_entries; |
| table->replace.counters = counters; |
| optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; |
| if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen)) |
| fail("setsockopt(IPT_SO_SET_REPLACE)"); |
| } |
| close(fd); |
| } |
| |
| static void checkpoint_arptables(void) |
| { |
| struct arpt_get_entries entries; |
| socklen_t optlen; |
| unsigned i; |
| int fd; |
| |
| fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); |
| if (fd == -1) |
| fail("socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); |
| for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { |
| struct arpt_table_desc* table = &arpt_tables[i]; |
| strcpy(table->info.name, table->name); |
| strcpy(table->replace.name, table->name); |
| optlen = sizeof(table->info); |
| if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) { |
| switch (errno) { |
| case EPERM: |
| case ENOENT: |
| case ENOPROTOOPT: |
| continue; |
| } |
| fail("getsockopt(ARPT_SO_GET_INFO)"); |
| } |
| debug("checkpoint arptable %s: entries=%d hooks=%x size=%d\n", table->name, table->info.num_entries, table->info.valid_hooks, table->info.size); |
| if (table->info.size > sizeof(table->replace.entrytable)) |
| fail("table size is too large: %u", table->info.size); |
| if (table->info.num_entries > XT_MAX_ENTRIES) |
| fail("too many counters: %u", table->info.num_entries); |
| memset(&entries, 0, sizeof(entries)); |
| strcpy(entries.name, table->name); |
| entries.size = table->info.size; |
| optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; |
| if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) |
| fail("getsockopt(ARPT_SO_GET_ENTRIES)"); |
| table->replace.valid_hooks = table->info.valid_hooks; |
| table->replace.num_entries = table->info.num_entries; |
| table->replace.size = table->info.size; |
| memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); |
| memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); |
| memcpy(table->replace.entrytable, entries.entrytable, table->info.size); |
| } |
| close(fd); |
| } |
| |
| static void reset_arptables() |
| { |
| struct xt_counters counters[XT_MAX_ENTRIES]; |
| struct arpt_get_entries entries; |
| struct arpt_getinfo info; |
| socklen_t optlen; |
| unsigned i; |
| int fd; |
| |
| fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); |
| if (fd == -1) |
| fail("socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); |
| for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { |
| struct arpt_table_desc* table = &arpt_tables[i]; |
| if (table->info.valid_hooks == 0) |
| continue; |
| memset(&info, 0, sizeof(info)); |
| strcpy(info.name, table->name); |
| optlen = sizeof(info); |
| if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen)) |
| fail("getsockopt(ARPT_SO_GET_INFO)"); |
| if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { |
| memset(&entries, 0, sizeof(entries)); |
| strcpy(entries.name, table->name); |
| entries.size = table->info.size; |
| optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; |
| if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) |
| fail("getsockopt(ARPT_SO_GET_ENTRIES)"); |
| if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) |
| continue; |
| } |
| debug("resetting arptable %s\n", table->name); |
| table->replace.num_counters = info.num_entries; |
| table->replace.counters = counters; |
| optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; |
| if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen)) |
| fail("setsockopt(ARPT_SO_SET_REPLACE)"); |
| } |
| close(fd); |
| } |
| |
| #include <linux/if.h> |
| #include <linux/netfilter_bridge/ebtables.h> |
| |
| struct ebt_table_desc { |
| const char* name; |
| struct ebt_replace replace; |
| char entrytable[XT_TABLE_SIZE]; |
| }; |
| |
| static struct ebt_table_desc ebt_tables[] = { |
| {.name = "filter"}, |
| {.name = "nat"}, |
| {.name = "broute"}, |
| }; |
| |
| static void checkpoint_ebtables(void) |
| { |
| socklen_t optlen; |
| unsigned i; |
| int fd; |
| |
| fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); |
| if (fd == -1) |
| fail("socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); |
| for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { |
| struct ebt_table_desc* table = &ebt_tables[i]; |
| strcpy(table->replace.name, table->name); |
| optlen = sizeof(table->replace); |
| if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) { |
| switch (errno) { |
| case EPERM: |
| case ENOENT: |
| case ENOPROTOOPT: |
| continue; |
| } |
| fail("getsockopt(EBT_SO_GET_INIT_INFO)"); |
| } |
| debug("checkpoint ebtable %s: entries=%d hooks=%x size=%d\n", table->name, table->replace.nentries, table->replace.valid_hooks, table->replace.entries_size); |
| if (table->replace.entries_size > sizeof(table->entrytable)) |
| fail("table size is too large: %u", table->replace.entries_size); |
| table->replace.num_counters = 0; |
| table->replace.entries = table->entrytable; |
| optlen = sizeof(table->replace) + table->replace.entries_size; |
| if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen)) |
| fail("getsockopt(EBT_SO_GET_INIT_ENTRIES)"); |
| } |
| close(fd); |
| } |
| |
| static void reset_ebtables() |
| { |
| struct ebt_replace replace; |
| char entrytable[XT_TABLE_SIZE]; |
| socklen_t optlen; |
| unsigned i, j, h; |
| int fd; |
| |
| fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); |
| if (fd == -1) |
| fail("socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); |
| for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { |
| struct ebt_table_desc* table = &ebt_tables[i]; |
| if (table->replace.valid_hooks == 0) |
| continue; |
| memset(&replace, 0, sizeof(replace)); |
| strcpy(replace.name, table->name); |
| optlen = sizeof(replace); |
| if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen)) |
| fail("getsockopt(EBT_SO_GET_INFO)"); |
| replace.num_counters = 0; |
| table->replace.entries = 0; |
| for (h = 0; h < NF_BR_NUMHOOKS; h++) |
| table->replace.hook_entry[h] = 0; |
| if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) { |
| memset(&entrytable, 0, sizeof(entrytable)); |
| replace.entries = entrytable; |
| optlen = sizeof(replace) + replace.entries_size; |
| if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen)) |
| fail("getsockopt(EBT_SO_GET_ENTRIES)"); |
| if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0) |
| continue; |
| } |
| debug("resetting ebtable %s\n", table->name); |
| // Kernel does not seem to return actual entry points (wat?). |
| for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) { |
| if (table->replace.valid_hooks & (1 << h)) { |
| table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j; |
| j++; |
| } |
| } |
| table->replace.entries = table->entrytable; |
| optlen = sizeof(table->replace) + table->replace.entries_size; |
| if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen)) |
| fail("setsockopt(EBT_SO_SET_ENTRIES)"); |
| } |
| close(fd); |
| } |
| |
| static void checkpoint_net_namespace(void) |
| { |
| checkpoint_ebtables(); |
| checkpoint_arptables(); |
| checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); |
| checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); |
| } |
| |
| static void reset_net_namespace(void) |
| { |
| reset_ebtables(); |
| reset_arptables(); |
| reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); |
| reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || (defined(SYZ_REPEAT) && defined(SYZ_WAIT_REPEAT) && defined(SYZ_USE_TMP_DIR)) |
| // One does not simply remove a directory. |
| // There can be mounts, so we need to try to umount. |
| // Moreover, a mount can be mounted several times, so we need to try to umount in a loop. |
| // Moreover, after umount a dir can become non-empty again, so we need another loop. |
| // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty. |
| static void remove_dir(const char* dir) |
| { |
| DIR* dp; |
| struct dirent* ep; |
| int iter = 0; |
| retry: |
| while (umount2(dir, MNT_DETACH) == 0) { |
| debug("umount(%s)\n", dir); |
| } |
| dp = opendir(dir); |
| if (dp == NULL) { |
| if (errno == EMFILE) { |
| // This happens when the test process casts prlimit(NOFILE) on us. |
| // Ideally we somehow prevent test processes from messing with parent processes. |
| // But full sandboxing is expensive, so let's ignore this error for now. |
| exitf("opendir(%s) failed due to NOFILE, exiting", dir); |
| } |
| exitf("opendir(%s) failed", dir); |
| } |
| while ((ep = readdir(dp))) { |
| if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) |
| continue; |
| char filename[FILENAME_MAX]; |
| snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); |
| struct stat st; |
| if (lstat(filename, &st)) |
| exitf("lstat(%s) failed", filename); |
| if (S_ISDIR(st.st_mode)) { |
| remove_dir(filename); |
| continue; |
| } |
| int i; |
| for (i = 0;; i++) { |
| debug("unlink(%s)\n", filename); |
| if (unlink(filename) == 0) |
| break; |
| if (errno == EROFS) { |
| debug("ignoring EROFS\n"); |
| break; |
| } |
| if (errno != EBUSY || i > 100) |
| exitf("unlink(%s) failed", filename); |
| debug("umount(%s)\n", filename); |
| if (umount2(filename, MNT_DETACH)) |
| exitf("umount(%s) failed", filename); |
| } |
| } |
| closedir(dp); |
| int i; |
| for (i = 0;; i++) { |
| debug("rmdir(%s)\n", dir); |
| if (rmdir(dir) == 0) |
| break; |
| if (i < 100) { |
| if (errno == EROFS) { |
| debug("ignoring EROFS\n"); |
| break; |
| } |
| if (errno == EBUSY) { |
| debug("umount(%s)\n", dir); |
| if (umount2(dir, MNT_DETACH)) |
| exitf("umount(%s) failed", dir); |
| continue; |
| } |
| if (errno == ENOTEMPTY) { |
| if (iter < 100) { |
| iter++; |
| goto retry; |
| } |
| } |
| } |
| exitf("rmdir(%s) failed", dir); |
| } |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_FAULT_INJECTION) |
| static int inject_fault(int nth) |
| { |
| int fd; |
| char buf[16]; |
| |
| fd = open("/proc/thread-self/fail-nth", O_RDWR); |
| // We treat errors here as temporal/non-critical because we see |
| // occasional ENOENT/EACCES errors returned. It seems that fuzzer |
| // somehow gets its hands to it. |
| if (fd == -1) |
| exitf("failed to open /proc/thread-self/fail-nth"); |
| sprintf(buf, "%d", nth + 1); |
| if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) |
| exitf("failed to write /proc/thread-self/fail-nth"); |
| return fd; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) |
| static int fault_injected(int fail_fd) |
| { |
| char buf[16]; |
| int n = read(fail_fd, buf, sizeof(buf) - 1); |
| if (n <= 0) |
| exitf("failed to read /proc/thread-self/fail-nth"); |
| int res = n == 2 && buf[0] == '0' && buf[1] == '\n'; |
| buf[0] = '0'; |
| if (write(fail_fd, buf, 1) != 1) |
| exitf("failed to write /proc/thread-self/fail-nth"); |
| close(fail_fd); |
| return res; |
| } |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_REPEAT) |
| static void execute_one(); |
| extern unsigned long long procid; |
| |
| #if defined(SYZ_EXECUTOR) |
| void reply_handshake(); |
| void receive_execute(bool need_prog); |
| void reply_execute(int status); |
| extern uint32* output_data; |
| extern uint32* output_pos; |
| #endif |
| |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_WAIT_REPEAT) |
| static void loop() |
| { |
| #if defined(SYZ_EXECUTOR) |
| // Tell parent that we are ready to serve. |
| reply_handshake(); |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_RESET_NET_NAMESPACE) |
| checkpoint_net_namespace(); |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| char cgroupdir[64]; |
| snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); |
| char cgroupdir_cpu[64]; |
| snprintf(cgroupdir_cpu, sizeof(cgroupdir_cpu), "/syzcgroup/cpu/syz%llu", procid); |
| char cgroupdir_net[64]; |
| snprintf(cgroupdir_net, sizeof(cgroupdir_net), "/syzcgroup/net/syz%llu", procid); |
| if (mkdir(cgroupdir, 0777)) { |
| debug("mkdir(%s) failed: %d\n", cgroupdir, errno); |
| } |
| if (mkdir(cgroupdir_cpu, 0777)) { |
| debug("mkdir(%s) failed: %d\n", cgroupdir_cpu, errno); |
| } |
| if (mkdir(cgroupdir_net, 0777)) { |
| debug("mkdir(%s) failed: %d\n", cgroupdir_net, errno); |
| } |
| int pid = getpid(); |
| char procs_file[128]; |
| snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir); |
| if (!write_file(procs_file, "%d", pid)) { |
| debug("write(%s) failed: %d\n", procs_file, errno); |
| } |
| snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir_cpu); |
| if (!write_file(procs_file, "%d", pid)) { |
| debug("write(%s) failed: %d\n", procs_file, errno); |
| } |
| snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir_net); |
| if (!write_file(procs_file, "%d", pid)) { |
| debug("write(%s) failed: %d\n", procs_file, errno); |
| } |
| #endif |
| int iter; |
| for (iter = 0;; iter++) { |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_USE_TMP_DIR) |
| // Create a new private work dir for this test (removed at the end of the loop). |
| char cwdbuf[32]; |
| sprintf(cwdbuf, "./%d", iter); |
| if (mkdir(cwdbuf, 0777)) |
| fail("failed to mkdir"); |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(__NR_syz_mount_image) || defined(__NR_syz_read_part_table) |
| char buf[64]; |
| snprintf(buf, sizeof(buf), "/dev/loop%llu", procid); |
| int loopfd = open(buf, O_RDWR); |
| if (loopfd != -1) { |
| ioctl(loopfd, LOOP_CLR_FD, 0); |
| close(loopfd); |
| } |
| #endif |
| #if defined(SYZ_EXECUTOR) |
| // TODO: consider moving the read into the child. |
| // Potentially it can speed up things a bit -- when the read finishes |
| // we already have a forked worker process. |
| receive_execute(false); |
| #endif |
| int pid = fork(); |
| if (pid < 0) |
| fail("clone failed"); |
| if (pid == 0) { |
| prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); |
| setpgrp(); |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_USE_TMP_DIR) |
| if (chdir(cwdbuf)) |
| fail("failed to chdir"); |
| #endif |
| #if defined(SYZ_EXECUTOR) |
| close(kInPipeFd); |
| close(kOutPipeFd); |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_ENABLE_CGROUPS) |
| if (symlink(cgroupdir, "./cgroup")) { |
| debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno); |
| } |
| if (symlink(cgroupdir_cpu, "./cgroup.cpu")) { |
| debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir_cpu, errno); |
| } |
| if (symlink(cgroupdir_net, "./cgroup.net")) { |
| debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir_net, errno); |
| } |
| #endif |
| #if defined(SYZ_EXECUTOR) |
| if (flag_enable_tun) { |
| // Read all remaining packets from tun to better |
| // isolate consequently executing programs. |
| flush_tun(); |
| } |
| output_pos = output_data; |
| #elif defined(SYZ_TUN_ENABLE) |
| flush_tun(); |
| #endif |
| execute_one(); |
| debug("worker exiting\n"); |
| doexit(0); |
| } |
| debug("spawned worker pid %d\n", pid); |
| |
| // We used to use sigtimedwait(SIGCHLD) to wait for the subprocess. |
| // But SIGCHLD is also delivered when a process stops/continues, |
| // so it would require a loop with status analysis and timeout recalculation. |
| // SIGCHLD should also unblock the usleep below, so the spin loop |
| // should be as efficient as sigtimedwait. |
| int status = 0; |
| uint64 start = current_time_ms(); |
| #if defined(SYZ_EXECUTOR) |
| uint64 last_executed = start; |
| uint32 executed_calls = __atomic_load_n(output_data, __ATOMIC_RELAXED); |
| #endif |
| for (;;) { |
| int res = waitpid(-1, &status, __WALL | WNOHANG); |
| if (res == pid) { |
| debug("waitpid(%d)=%d\n", pid, res); |
| break; |
| } |
| usleep(1000); |
| #if defined(SYZ_EXECUTOR) |
| // Even though the test process executes exit at the end |
| // and execution time of each syscall is bounded by 20ms, |
| // this backup watchdog is necessary and its performance is important. |
| // The problem is that exit in the test processes can fail (sic). |
| // One observed scenario is that the test processes prohibits |
| // exit_group syscall using seccomp. Another observed scenario |
| // is that the test processes setups a userfaultfd for itself, |
| // then the main thread hangs when it wants to page in a page. |
| // Below we check if the test process still executes syscalls |
| // and kill it after 500ms of inactivity. |
| uint64 now = current_time_ms(); |
| uint32 now_executed = __atomic_load_n(output_data, __ATOMIC_RELAXED); |
| if (executed_calls != now_executed) { |
| executed_calls = now_executed; |
| last_executed = now; |
| } |
| if ((now - start < 3 * 1000) && (now - start < 1000 || now - last_executed < 500)) |
| continue; |
| #else |
| if (current_time_ms() - start < 3 * 1000) |
| continue; |
| #endif |
| debug("waitpid(%d)=%d\n", pid, res); |
| debug("killing\n"); |
| kill(-pid, SIGKILL); |
| kill(pid, SIGKILL); |
| while (waitpid(-1, &status, __WALL) != pid) { |
| } |
| break; |
| } |
| #if defined(SYZ_EXECUTOR) |
| status = WEXITSTATUS(status); |
| if (status == kFailStatus) |
| fail("child failed"); |
| if (status == kErrorStatus) |
| error("child errored"); |
| reply_execute(0); |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_USE_TMP_DIR) |
| remove_dir(cwdbuf); |
| #endif |
| #if defined(SYZ_EXECUTOR) || defined(SYZ_RESET_NET_NAMESPACE) |
| reset_net_namespace(); |
| #endif |
| } |
| } |
| #else |
| void loop() |
| { |
| while (1) { |
| execute_one(); |
| } |
| } |
| #endif |
| #endif |
| |
| #if defined(SYZ_THREADED) |
| struct thread_t { |
| int created, running, call; |
| pthread_t th; |
| }; |
| |
| static struct thread_t threads[16]; |
| static void execute_call(int call); |
| static int running; |
| #if defined(SYZ_COLLIDE) |
| static int collide; |
| #endif |
| |
| static void* thr(void* arg) |
| { |
| struct thread_t* th = (struct thread_t*)arg; |
| for (;;) { |
| while (!__atomic_load_n(&th->running, __ATOMIC_ACQUIRE)) |
| syscall(SYS_futex, &th->running, FUTEX_WAIT, 0, 0); |
| execute_call(th->call); |
| __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED); |
| __atomic_store_n(&th->running, 0, __ATOMIC_RELEASE); |
| syscall(SYS_futex, &th->running, FUTEX_WAKE); |
| } |
| return 0; |
| } |
| |
| static void execute(int num_calls) |
| { |
| int call, thread; |
| running = 0; |
| for (call = 0; call < num_calls; call++) { |
| for (thread = 0; thread < sizeof(threads) / sizeof(threads[0]); thread++) { |
| struct thread_t* th = &threads[thread]; |
| if (!th->created) { |
| th->created = 1; |
| pthread_attr_t attr; |
| pthread_attr_init(&attr); |
| pthread_attr_setstacksize(&attr, 128 << 10); |
| pthread_create(&th->th, &attr, thr, th); |
| } |
| if (!__atomic_load_n(&th->running, __ATOMIC_ACQUIRE)) { |
| th->call = call; |
| __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED); |
| __atomic_store_n(&th->running, 1, __ATOMIC_RELEASE); |
| syscall(SYS_futex, &th->running, FUTEX_WAKE); |
| #if defined(SYZ_COLLIDE) |
| if (collide && call % 2) |
| break; |
| #endif |
| struct timespec ts; |
| ts.tv_sec = 0; |
| ts.tv_nsec = 20 * 1000 * 1000; |
| syscall(SYS_futex, &th->running, FUTEX_WAIT, 1, &ts); |
| if (running) |
| usleep((call == num_calls - 1) ? 10000 : 1000); |
| break; |
| } |
| } |
| } |
| } |
| #endif |