| // Copyright (c) 2015 Big Switch Networks, Inc |
| // SPDX-License-Identifier: Apache-2.0 |
| |
| /* |
| * Copyright 2015 Big Switch Networks, Inc |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| #define _GNU_SOURCE |
| |
| #include "ubpf.h" |
| #include "ebpf.h" |
| #include <stdio.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <stdbool.h> |
| #include <stdarg.h> |
| #include <sys/mman.h> |
| #include <endian.h> |
| #include "ubpf_int.h" |
| #include <unistd.h> |
| |
| #define SHIFT_MASK_32_BIT(X) ((X) & 0x1f) |
| #define SHIFT_MASK_64_BIT(X) ((X) & 0x3f) |
| #define DEFAULT_JITTER_BUFFER_SIZE 65536 |
| |
| static bool |
| validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_insts, char** errmsg); |
| static bool |
| bounds_check( |
| const struct ubpf_vm* vm, |
| void* addr, |
| int size, |
| const char* type, |
| uint16_t cur_pc, |
| void* mem, |
| size_t mem_len, |
| void* stack, |
| size_t stack_len); |
| |
| bool |
| ubpf_toggle_bounds_check(struct ubpf_vm* vm, bool enable) |
| { |
| bool old = vm->bounds_check_enabled; |
| vm->bounds_check_enabled = enable; |
| return old; |
| } |
| |
| bool |
| ubpf_toggle_undefined_behavior_check(struct ubpf_vm* vm, bool enable) |
| { |
| bool old = vm->undefined_behavior_check_enabled; |
| vm->undefined_behavior_check_enabled = enable; |
| return old; |
| } |
| |
| void |
| ubpf_set_error_print(struct ubpf_vm* vm, int (*error_printf)(FILE* stream, const char* format, ...)) |
| { |
| if (error_printf) |
| vm->error_printf = error_printf; |
| else |
| vm->error_printf = fprintf; |
| } |
| |
| static uint64_t |
| ubpf_default_external_dispatcher( |
| uint64_t arg1, |
| uint64_t arg2, |
| uint64_t arg3, |
| uint64_t arg4, |
| uint64_t arg5, |
| unsigned int index, |
| external_function_t* external_fns) |
| { |
| return external_fns[index](arg1, arg2, arg3, arg4, arg5); |
| } |
| |
| struct ubpf_vm* |
| ubpf_create(void) |
| { |
| struct ubpf_vm* vm = calloc(1, sizeof(*vm)); |
| if (vm == NULL) { |
| return NULL; |
| } |
| |
| vm->ext_funcs = calloc(MAX_EXT_FUNCS, sizeof(*vm->ext_funcs)); |
| if (vm->ext_funcs == NULL) { |
| ubpf_destroy(vm); |
| return NULL; |
| } |
| |
| vm->ext_func_names = calloc(MAX_EXT_FUNCS, sizeof(*vm->ext_func_names)); |
| if (vm->ext_func_names == NULL) { |
| ubpf_destroy(vm); |
| return NULL; |
| } |
| |
| vm->local_func_stack_usage = calloc(UBPF_MAX_INSTS, sizeof(struct ubpf_stack_usage)); |
| if (vm->local_func_stack_usage == NULL) { |
| ubpf_destroy(vm); |
| return NULL; |
| } |
| |
| vm->bounds_check_enabled = true; |
| vm->undefined_behavior_check_enabled = false; |
| vm->error_printf = fprintf; |
| |
| #if defined(__x86_64__) || defined(_M_X64) |
| vm->jit_translate = ubpf_translate_x86_64; |
| vm->jit_update_dispatcher = ubpf_jit_update_dispatcher_x86_64; |
| vm->jit_update_helper = ubpf_jit_update_helper_x86_64; |
| #elif defined(__aarch64__) || defined(_M_ARM64) |
| vm->jit_translate = ubpf_translate_arm64; |
| vm->jit_update_dispatcher = ubpf_jit_update_dispatcher_arm64; |
| vm->jit_update_helper = ubpf_jit_update_helper_arm64; |
| #else |
| vm->translate = ubpf_translate_null; |
| #endif |
| vm->unwind_stack_extension_index = -1; |
| |
| vm->jitted_result.compile_result = UBPF_JIT_COMPILE_FAILURE; |
| vm->jitter_buffer_size = DEFAULT_JITTER_BUFFER_SIZE; |
| return vm; |
| } |
| |
| void |
| ubpf_destroy(struct ubpf_vm* vm) |
| { |
| ubpf_unload_code(vm); |
| free(vm->int_funcs); |
| free(vm->ext_funcs); |
| free(vm->ext_func_names); |
| free(vm->local_func_stack_usage); |
| free(vm); |
| } |
| |
| external_function_t |
| as_external_function_t(void* f) |
| { |
| return (external_function_t)f; |
| }; |
| |
| int |
| ubpf_register(struct ubpf_vm* vm, unsigned int idx, const char* name, external_function_t fn) |
| { |
| if (idx >= MAX_EXT_FUNCS) { |
| return -1; |
| } |
| |
| vm->ext_funcs[idx] = (ext_func)fn; |
| vm->ext_func_names[idx] = name; |
| |
| int success = 0; |
| |
| if (vm->jitted_result.compile_result == UBPF_JIT_COMPILE_SUCCESS) { |
| if (mprotect(vm->jitted, vm->jitted_size, PROT_READ | PROT_WRITE) < 0) { |
| return -1; |
| } |
| |
| // Now, update! |
| if (!vm->jit_update_helper( |
| vm, fn, idx, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_helper_offset)) { |
| // Can't immediately stop here because we have unprotected memory! |
| success = -1; |
| } |
| |
| if (mprotect(vm->jitted, vm->jitted_size, PROT_READ | PROT_EXEC) < 0) { |
| return -1; |
| } |
| } |
| return success; |
| } |
| |
| int |
| ubpf_register_external_dispatcher( |
| struct ubpf_vm* vm, external_function_dispatcher_t dispatcher, external_function_validate_t validater) |
| { |
| vm->dispatcher = dispatcher; |
| vm->dispatcher_validate = validater; |
| |
| int success = 0; |
| |
| if (vm->jitted_result.compile_result == UBPF_JIT_COMPILE_SUCCESS) { |
| if (mprotect(vm->jitted, vm->jitted_size, PROT_READ | PROT_WRITE) < 0) { |
| return -1; |
| } |
| |
| // Now, update! |
| if (!vm->jit_update_dispatcher( |
| vm, dispatcher, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_dispatcher_offset)) { |
| // Can't immediately stop here because we have unprotected memory! |
| success = -1; |
| } |
| |
| if (mprotect(vm->jitted, vm->jitted_size, PROT_READ | PROT_EXEC) < 0) { |
| return -1; |
| } |
| } |
| return success; |
| } |
| |
| int |
| ubpf_set_unwind_function_index(struct ubpf_vm* vm, unsigned int idx) |
| { |
| if (vm->unwind_stack_extension_index != -1) { |
| return -1; |
| } |
| |
| vm->unwind_stack_extension_index = idx; |
| return 0; |
| } |
| |
| unsigned int |
| ubpf_lookup_registered_function(struct ubpf_vm* vm, const char* name) |
| { |
| int i; |
| for (i = 0; i < MAX_EXT_FUNCS; i++) { |
| const char* other = vm->ext_func_names[i]; |
| if (other && !strcmp(other, name)) { |
| return i; |
| } |
| } |
| return -1; |
| } |
| |
| int |
| ubpf_load(struct ubpf_vm* vm, const void* code, uint32_t code_len, char** errmsg) |
| { |
| const struct ebpf_inst* source_inst = code; |
| *errmsg = NULL; |
| |
| if (UBPF_EBPF_STACK_SIZE % sizeof(uint64_t) != 0) { |
| *errmsg = ubpf_error("UBPF_EBPF_STACK_SIZE must be a multiple of 8"); |
| return -1; |
| } |
| |
| if (vm->insts) { |
| *errmsg = ubpf_error( |
| "code has already been loaded into this VM. Use ubpf_unload_code() if you need to reuse this VM"); |
| return -1; |
| } |
| |
| if (code_len % 8 != 0) { |
| *errmsg = ubpf_error("code_len must be a multiple of 8"); |
| return -1; |
| } |
| |
| if (!validate(vm, code, code_len / 8, errmsg)) { |
| return -1; |
| } |
| |
| vm->insts = malloc(code_len); |
| if (vm->insts == NULL) { |
| *errmsg = ubpf_error("out of memory"); |
| return -1; |
| } |
| |
| vm->num_insts = code_len / sizeof(vm->insts[0]); |
| |
| vm->int_funcs = (bool*)calloc(vm->num_insts, sizeof(bool)); |
| if (!vm->int_funcs) { |
| *errmsg = ubpf_error("out of memory"); |
| return -1; |
| } |
| |
| for (uint32_t i = 0; i < vm->num_insts; i++) { |
| /* Mark targets of local call instructions. They |
| * represent the beginning of local functions and |
| * the jitter may need to do something special with |
| * them. |
| */ |
| if (source_inst[i].opcode == EBPF_OP_CALL && source_inst[i].src == 1) { |
| uint32_t target = i + source_inst[i].imm + 1; |
| vm->int_funcs[target] = true; |
| } |
| // Store instructions in the vm. |
| ubpf_store_instruction(vm, i, source_inst[i]); |
| } |
| |
| return 0; |
| } |
| |
| void |
| ubpf_unload_code(struct ubpf_vm* vm) |
| { |
| if (vm->jitted) { |
| munmap(vm->jitted, vm->jitted_size); |
| vm->jitted = NULL; |
| vm->jitted_size = 0; |
| } |
| if (vm->insts) { |
| free(vm->insts); |
| vm->insts = NULL; |
| vm->num_insts = 0; |
| } |
| } |
| |
| static uint32_t |
| u32(uint64_t x) |
| { |
| return x; |
| } |
| |
| static int32_t |
| i32(uint64_t x) |
| { |
| return x; |
| } |
| |
| /** |
| * @brief Sign extend immediate value to a signed 64-bit value. |
| * |
| * @param[in] immediate The signed 32-bit immediate value to sign extend. |
| * @return The sign extended 64-bit value. |
| */ |
| static int64_t |
| i64(int32_t immediate) |
| { |
| return (int64_t)immediate; |
| } |
| |
| #define IS_ALIGNED(x, a) (((uintptr_t)(x) & ((a) - 1)) == 0) |
| |
| inline static uint64_t |
| ubpf_mem_load(uint64_t address, size_t size) |
| { |
| if (!IS_ALIGNED(address, size)) { |
| // Fill the result with 0 to avoid leaking uninitialized memory. |
| uint64_t value = 0; |
| memcpy(&value, (void*)address, size); |
| return value; |
| } |
| |
| switch (size) { |
| case 1: |
| return *(uint8_t*)address; |
| case 2: |
| return *(uint16_t*)address; |
| case 4: |
| return *(uint32_t*)address; |
| case 8: |
| return *(uint64_t*)address; |
| default: |
| abort(); |
| } |
| } |
| |
| inline static void |
| ubpf_mem_store(uint64_t address, uint64_t value, size_t size) |
| { |
| if (!IS_ALIGNED(address, size)) { |
| memcpy((void*)address, &value, size); |
| return; |
| } |
| |
| switch (size) { |
| case 1: |
| *(uint8_t*)address = value; |
| break; |
| case 2: |
| *(uint16_t*)address = value; |
| break; |
| case 4: |
| *(uint32_t*)address = value; |
| break; |
| case 8: |
| *(uint64_t*)address = value; |
| break; |
| default: |
| abort(); |
| } |
| } |
| |
| /** |
| * @brief Mark the bits in the shadow stack corresponding to the address if it is within the stack bounds. |
| * |
| * @param[in] stack The base address of the stack. |
| * @param[in] shadow_stack The base address of the shadow stack. |
| * @param[in] address The address being written to. |
| * @param[in] size The number of bytes being written. |
| */ |
| static inline void |
| ubpf_mark_shadow_stack( |
| const struct ubpf_vm* vm, uint8_t* stack, uint64_t stack_length, uint8_t* shadow_stack, void* address, size_t size) |
| { |
| if (!vm->undefined_behavior_check_enabled) { |
| return; |
| } |
| |
| uintptr_t access_start = (uintptr_t)address; |
| uintptr_t access_end = access_start + size; |
| uintptr_t stack_start = (uintptr_t)stack; |
| uintptr_t stack_end = stack_start + stack_length; |
| |
| if (access_start > access_end) { |
| // Overflow |
| return; |
| } |
| |
| if (access_start >= stack_start && access_end <= stack_end) { |
| // Shadow stack is a bit array, where each bit corresponds to 1 byte in the stack. |
| // If the bit is set, the memory is initialized. |
| size_t offset = access_start - stack_start; |
| for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { |
| // Convert test_bit into offset + mask to test against the shadow stack. |
| size_t bit_offset = test_bit / 8; |
| size_t bit_mask = 1 << (test_bit % 8); |
| shadow_stack[bit_offset] |= bit_mask; |
| } |
| } |
| } |
| |
| /** |
| * @brief Check if the address is within the stack bounds and the shadow stack is marked for the address. |
| * |
| * @param[in] stack The base address of the stack. |
| * @param[in] shadow_stack The base address of the shadow stack. |
| * @param[in] address The address being read from. |
| * @param[in] size The number of bytes being read. |
| * @return true - The read is from initialized memory or is not within the stack bounds. |
| * @return false - The read is from uninitialized memory within the stack bounds. |
| */ |
| static inline bool |
| ubpf_check_shadow_stack( |
| const struct ubpf_vm* vm, uint8_t* stack, uint64_t stack_length, uint8_t* shadow_stack, void* address, size_t size) |
| { |
| if (!vm->undefined_behavior_check_enabled) { |
| return true; |
| } |
| |
| uintptr_t access_start = (uintptr_t)address; |
| uintptr_t access_end = access_start + size; |
| uintptr_t stack_start = (uintptr_t)stack; |
| uintptr_t stack_end = stack_start + stack_length; |
| |
| if (access_start > access_end) { |
| // Overflow |
| return true; |
| } |
| |
| if (access_start >= stack_start && access_end <= stack_end) { |
| // Shadow stack is a bit array, where each bit corresponds to 1 byte in the stack. |
| // If the bit is set, the memory is initialized. |
| size_t offset = access_start - stack_start; |
| for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { |
| // Convert test_bit into offset + mask to test against the shadow stack. |
| size_t bit_offset = test_bit / 8; |
| size_t bit_mask = 1 << (test_bit % 8); |
| if ((shadow_stack[bit_offset] & bit_mask) == 0) { |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| |
| #define REGISTER_TO_SHADOW_MASK(reg) (1 << (reg)) |
| |
| /** |
| * @brief Check if the registers being accessed by this instruction are initialized and mark the destination register as |
| * initialized if it is. |
| * |
| * @param[in] vm The VM instance. |
| * @param[in,out] shadow_registers Storage for the shadow register state. |
| * @param[in] inst The instruction being executed. |
| * @return true - The registers are initialized. |
| * @return false - The registers are not initialized - an error message has been printed. |
| */ |
| static inline bool |
| ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registers, struct ebpf_inst inst) |
| { |
| if (!vm->undefined_behavior_check_enabled) { |
| return true; |
| } |
| |
| bool src_register_required = false; |
| bool dst_register_required = false; |
| bool dst_register_initialized = false; |
| |
| switch (inst.opcode & EBPF_CLS_MASK) { |
| // Load instructions initialize the destination register. |
| case EBPF_CLS_LD: |
| dst_register_initialized = true; |
| break; |
| // Load indirect instructions initialize the destination register and require the source register to be initialized. |
| case EBPF_CLS_LDX: |
| src_register_required = true; |
| dst_register_initialized = true; |
| break; |
| // Store instructions require the destination register to be initialized. |
| case EBPF_CLS_ST: |
| dst_register_required = true; |
| break; |
| // Store indirect instructions require both the source and destination registers to be initialized. |
| case EBPF_CLS_STX: |
| dst_register_required = true; |
| src_register_required = true; |
| break; |
| case EBPF_CLS_ALU: |
| case EBPF_CLS_ALU64: |
| // Source register is required if the EBPF_SRC_REG bit is set. |
| src_register_required = inst.opcode & EBPF_SRC_REG; |
| dst_register_initialized = true; |
| switch (inst.opcode & EBPF_ALU_OP_MASK) { |
| case 0x00: // EBPF_OP_ADD |
| case 0x10: // EBPF_OP_SUB |
| case 0x20: // EBPF_OP_MUL |
| case 0x30: // EBPF_OP_DIV |
| case 0x40: // EBPF_OP_OR |
| case 0x50: // EBPF_OP_AND |
| case 0x60: // EBPF_OP_LSH |
| case 0x70: // EBPF_OP_RSH |
| case 0x80: // EBPF_OP_NEG |
| case 0x90: // EBPF_OP_MOD |
| case 0xa0: // EBPF_OP_XOR |
| case 0xc0: // EBPF_OP_ARSH |
| case 0xd0: // EBPF_OP_LE |
| dst_register_required = true; |
| break; |
| case 0xb0: // EBPF_OP_MOV |
| // Destination register is initialized. |
| break; |
| } |
| break; |
| case EBPF_CLS_JMP: |
| case EBPF_CLS_JMP32: |
| // Source register is required if the EBPF_SRC_REG bit is set. |
| src_register_required = inst.opcode & EBPF_SRC_REG; |
| switch (inst.opcode & EBPF_JMP_OP_MASK) { |
| case EBPF_MODE_JA: |
| case EBPF_MODE_CALL: |
| case EBPF_MODE_EXIT: |
| src_register_required = false; |
| break; |
| case EBPF_MODE_JEQ: |
| case EBPF_MODE_JGT: |
| case EBPF_MODE_JGE: |
| case EBPF_MODE_JSET: |
| case EBPF_MODE_JNE: |
| case EBPF_MODE_JSGT: |
| case EBPF_MODE_JSGE: |
| case EBPF_MODE_JLT: |
| case EBPF_MODE_JLE: |
| case EBPF_MODE_JSLT: |
| case EBPF_MODE_JSLE: |
| dst_register_required = true; |
| break; |
| } |
| break; |
| } |
| |
| if (src_register_required && !(*shadow_registers & REGISTER_TO_SHADOW_MASK(inst.src))) { |
| vm->error_printf(stderr, "Error: Source register r%d is not initialized.\n", inst.src); |
| return false; |
| } |
| |
| if (dst_register_required && !(*shadow_registers & REGISTER_TO_SHADOW_MASK(inst.dst))) { |
| vm->error_printf(stderr, "Error: Destination register r%d is not initialized.\n", inst.dst); |
| return false; |
| } |
| |
| if (dst_register_initialized) { |
| *shadow_registers |= REGISTER_TO_SHADOW_MASK(inst.dst); |
| } |
| |
| if (inst.opcode == EBPF_OP_CALL) { |
| if (inst.src == 0) { |
| // Mark the return address register as initialized. |
| *shadow_registers |= REGISTER_TO_SHADOW_MASK(0); |
| |
| // Mark r1-r5 as uninitialized. |
| *shadow_registers &= |
| ~(REGISTER_TO_SHADOW_MASK(1) | REGISTER_TO_SHADOW_MASK(2) | REGISTER_TO_SHADOW_MASK(3) | |
| REGISTER_TO_SHADOW_MASK(4) | REGISTER_TO_SHADOW_MASK(5)); |
| } else if (inst.src == 1) { |
| // Do nothing, register state will be handled by the callee on return. |
| } |
| } |
| |
| if (inst.opcode == EBPF_OP_EXIT) { |
| if (!(*shadow_registers & REGISTER_TO_SHADOW_MASK(0))) { |
| vm->error_printf(stderr, "Error: Return value register r0 is not initialized.\n"); |
| return false; |
| } |
| // Mark r1-r5 as uninitialized. |
| *shadow_registers &= |
| ~(REGISTER_TO_SHADOW_MASK(1) | REGISTER_TO_SHADOW_MASK(2) | REGISTER_TO_SHADOW_MASK(3) | |
| REGISTER_TO_SHADOW_MASK(4) | REGISTER_TO_SHADOW_MASK(5)); |
| } |
| |
| return true; |
| } |
| |
| int |
| ubpf_exec_ex( |
| const struct ubpf_vm* vm, |
| void* mem, |
| size_t mem_len, |
| uint64_t* bpf_return_value, |
| uint8_t* stack_start, |
| size_t stack_length) |
| { |
| uint16_t pc = 0; |
| const struct ebpf_inst* insts = vm->insts; |
| uint64_t* reg; |
| uint64_t _reg[16]; |
| uint64_t stack_frame_index = 0; |
| int return_value = -1; |
| void* external_dispatcher_cookie = mem; |
| void* shadow_stack = NULL; |
| |
| struct ebpf_inst previous_inst = {.opcode = 0}; |
| |
| if (!insts) { |
| /* Code must be loaded before we can execute */ |
| return -1; |
| } |
| |
| struct ubpf_stack_frame stack_frames[UBPF_MAX_CALL_DEPTH] = { |
| 0, |
| }; |
| |
| if (vm->undefined_behavior_check_enabled) { |
| shadow_stack = calloc(stack_length / 8, 1); |
| if (!shadow_stack) { |
| return_value = -1; |
| goto cleanup; |
| } |
| } |
| |
| #ifdef DEBUG |
| if (vm->regs) |
| reg = vm->regs; |
| else |
| reg = _reg; |
| #else |
| reg = _reg; |
| #endif |
| uint16_t shadow_registers = 0; // Bit mask of registers that have been written to. |
| |
| reg[1] = (uintptr_t)mem; |
| reg[2] = (uint64_t)mem_len; |
| reg[10] = (uintptr_t)stack_start + stack_length; |
| |
| // Mark r1, r2, r10 as initialized. |
| shadow_registers |= REGISTER_TO_SHADOW_MASK(1) | REGISTER_TO_SHADOW_MASK(2) | REGISTER_TO_SHADOW_MASK(10); |
| |
| int instruction_limit = vm->instruction_limit; |
| |
| while (1) { |
| const uint16_t cur_pc = pc; |
| if (pc >= vm->num_insts) { |
| return_value = -1; |
| goto cleanup; |
| } |
| if (vm->instruction_limit && instruction_limit-- <= 0) { |
| return_value = -1; |
| goto cleanup; |
| } |
| |
| if ((pc == 0 || vm->int_funcs[pc]) && stack_frame_index < UBPF_MAX_CALL_DEPTH) { |
| // If this is neither the first instruction nor a local function call, then the behavior is undefined. |
| if (previous_inst.opcode != 0 && !(previous_inst.opcode == EBPF_OP_CALL && previous_inst.src == 1)) { |
| // Previous instruction wasn't a call to this instruction, so behavior is undefined. |
| if (vm->undefined_behavior_check_enabled) { |
| vm->error_printf( |
| stderr, "Error: Call to local function at pc %d is not from a call instruction.\n", pc); |
| return_value = -1; |
| goto cleanup; |
| } |
| } |
| stack_frames[stack_frame_index].stack_usage = ubpf_stack_usage_for_local_func(vm, pc); |
| } |
| |
| struct ebpf_inst inst = ubpf_fetch_instruction(vm, pc++); |
| |
| if (!ubpf_validate_shadow_register(vm, &shadow_registers, inst)) { |
| return_value = -1; |
| goto cleanup; |
| } |
| |
| switch (inst.opcode) { |
| case EBPF_OP_ADD_IMM: |
| reg[inst.dst] += inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_ADD_REG: |
| reg[inst.dst] += reg[inst.src]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_SUB_IMM: |
| reg[inst.dst] -= inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_SUB_REG: |
| reg[inst.dst] -= reg[inst.src]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_MUL_IMM: |
| reg[inst.dst] *= inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_MUL_REG: |
| reg[inst.dst] *= reg[inst.src]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_DIV_IMM: |
| reg[inst.dst] = u32(inst.imm) ? u32(reg[inst.dst]) / u32(inst.imm) : 0; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_DIV_REG: |
| reg[inst.dst] = u32(reg[inst.src]) ? u32(reg[inst.dst]) / u32(reg[inst.src]) : 0; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_OR_IMM: |
| reg[inst.dst] |= inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_OR_REG: |
| reg[inst.dst] |= reg[inst.src]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_AND_IMM: |
| reg[inst.dst] &= inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_AND_REG: |
| reg[inst.dst] &= reg[inst.src]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_LSH_IMM: |
| reg[inst.dst] = (u32(reg[inst.dst]) << SHIFT_MASK_32_BIT(inst.imm) & UINT32_MAX); |
| break; |
| case EBPF_OP_LSH_REG: |
| reg[inst.dst] = (u32(reg[inst.dst]) << SHIFT_MASK_32_BIT(reg[inst.src]) & UINT32_MAX); |
| break; |
| case EBPF_OP_RSH_IMM: |
| reg[inst.dst] = u32(reg[inst.dst]) >> SHIFT_MASK_32_BIT(inst.imm); |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_RSH_REG: |
| reg[inst.dst] = u32(reg[inst.dst]) >> SHIFT_MASK_32_BIT(reg[inst.src]); |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_NEG: |
| reg[inst.dst] = -(int64_t)reg[inst.dst]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_MOD_IMM: |
| reg[inst.dst] = u32(inst.imm) ? u32(reg[inst.dst]) % u32(inst.imm) : u32(reg[inst.dst]); |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_MOD_REG: |
| reg[inst.dst] = u32(reg[inst.src]) ? u32(reg[inst.dst]) % u32(reg[inst.src]) : u32(reg[inst.dst]); |
| break; |
| case EBPF_OP_XOR_IMM: |
| reg[inst.dst] ^= inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_XOR_REG: |
| reg[inst.dst] ^= reg[inst.src]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_MOV_IMM: |
| reg[inst.dst] = inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_MOV_REG: |
| reg[inst.dst] = reg[inst.src]; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_ARSH_IMM: |
| reg[inst.dst] = (int32_t)reg[inst.dst] >> inst.imm; |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| case EBPF_OP_ARSH_REG: |
| reg[inst.dst] = (int32_t)reg[inst.dst] >> u32(reg[inst.src]); |
| reg[inst.dst] &= UINT32_MAX; |
| break; |
| |
| case EBPF_OP_LE: |
| if (inst.imm == 16) { |
| reg[inst.dst] = htole16(reg[inst.dst]); |
| } else if (inst.imm == 32) { |
| reg[inst.dst] = htole32(reg[inst.dst]); |
| } else if (inst.imm == 64) { |
| reg[inst.dst] = htole64(reg[inst.dst]); |
| } |
| break; |
| case EBPF_OP_BE: |
| if (inst.imm == 16) { |
| reg[inst.dst] = htobe16(reg[inst.dst]); |
| } else if (inst.imm == 32) { |
| reg[inst.dst] = htobe32(reg[inst.dst]); |
| } else if (inst.imm == 64) { |
| reg[inst.dst] = htobe64(reg[inst.dst]); |
| } |
| break; |
| |
| case EBPF_OP_ADD64_IMM: |
| reg[inst.dst] += inst.imm; |
| break; |
| case EBPF_OP_ADD64_REG: |
| reg[inst.dst] += reg[inst.src]; |
| break; |
| case EBPF_OP_SUB64_IMM: |
| reg[inst.dst] -= inst.imm; |
| break; |
| case EBPF_OP_SUB64_REG: |
| reg[inst.dst] -= reg[inst.src]; |
| break; |
| case EBPF_OP_MUL64_IMM: |
| reg[inst.dst] *= inst.imm; |
| break; |
| case EBPF_OP_MUL64_REG: |
| reg[inst.dst] *= reg[inst.src]; |
| break; |
| case EBPF_OP_DIV64_IMM: |
| reg[inst.dst] = inst.imm ? reg[inst.dst] / inst.imm : 0; |
| break; |
| case EBPF_OP_DIV64_REG: |
| reg[inst.dst] = reg[inst.src] ? reg[inst.dst] / reg[inst.src] : 0; |
| break; |
| case EBPF_OP_OR64_IMM: |
| reg[inst.dst] |= inst.imm; |
| break; |
| case EBPF_OP_OR64_REG: |
| reg[inst.dst] |= reg[inst.src]; |
| break; |
| case EBPF_OP_AND64_IMM: |
| reg[inst.dst] &= inst.imm; |
| break; |
| case EBPF_OP_AND64_REG: |
| reg[inst.dst] &= reg[inst.src]; |
| break; |
| case EBPF_OP_LSH64_IMM: |
| reg[inst.dst] <<= SHIFT_MASK_64_BIT(inst.imm); |
| break; |
| case EBPF_OP_LSH64_REG: |
| reg[inst.dst] <<= SHIFT_MASK_64_BIT(reg[inst.src]); |
| break; |
| case EBPF_OP_RSH64_IMM: |
| reg[inst.dst] >>= SHIFT_MASK_64_BIT(inst.imm); |
| break; |
| case EBPF_OP_RSH64_REG: |
| reg[inst.dst] >>= SHIFT_MASK_64_BIT(reg[inst.src]); |
| break; |
| case EBPF_OP_NEG64: |
| reg[inst.dst] = -reg[inst.dst]; |
| break; |
| case EBPF_OP_MOD64_IMM: |
| reg[inst.dst] = inst.imm ? reg[inst.dst] % inst.imm : reg[inst.dst]; |
| break; |
| case EBPF_OP_MOD64_REG: |
| reg[inst.dst] = reg[inst.src] ? reg[inst.dst] % reg[inst.src] : reg[inst.dst]; |
| break; |
| case EBPF_OP_XOR64_IMM: |
| reg[inst.dst] ^= inst.imm; |
| break; |
| case EBPF_OP_XOR64_REG: |
| reg[inst.dst] ^= reg[inst.src]; |
| break; |
| case EBPF_OP_MOV64_IMM: |
| reg[inst.dst] = inst.imm; |
| break; |
| case EBPF_OP_MOV64_REG: |
| reg[inst.dst] = reg[inst.src]; |
| break; |
| case EBPF_OP_ARSH64_IMM: |
| reg[inst.dst] = (int64_t)reg[inst.dst] >> inst.imm; |
| break; |
| case EBPF_OP_ARSH64_REG: |
| reg[inst.dst] = (int64_t)reg[inst.dst] >> reg[inst.src]; |
| break; |
| |
| /* |
| * HACK runtime bounds check |
| * |
| * Needed since we don't have a verifier yet. |
| */ |
| #define BOUNDS_CHECK_LOAD(size) \ |
| do { \ |
| if (!ubpf_check_shadow_stack( \ |
| vm, stack_start, stack_length, shadow_stack, (char*)reg[inst.src] + inst.offset, size)) { \ |
| return_value = -1; \ |
| goto cleanup; \ |
| } \ |
| if (!bounds_check( \ |
| vm, \ |
| (char*)reg[inst.src] + inst.offset, \ |
| size, \ |
| "load", \ |
| cur_pc, \ |
| mem, \ |
| mem_len, \ |
| stack_start, \ |
| stack_length)) { \ |
| return_value = -1; \ |
| goto cleanup; \ |
| } \ |
| } while (0) |
| #define BOUNDS_CHECK_STORE(size) \ |
| do { \ |
| if (!bounds_check( \ |
| vm, \ |
| (char*)reg[inst.dst] + inst.offset, \ |
| size, \ |
| "store", \ |
| cur_pc, \ |
| mem, \ |
| mem_len, \ |
| stack_start, \ |
| stack_length)) { \ |
| return_value = -1; \ |
| goto cleanup; \ |
| } \ |
| ubpf_mark_shadow_stack(vm, stack_start, stack_length, shadow_stack, (char*)reg[inst.dst] + inst.offset, size); \ |
| } while (0) |
| |
| case EBPF_OP_LDXW: |
| BOUNDS_CHECK_LOAD(4); |
| reg[inst.dst] = ubpf_mem_load(reg[inst.src] + inst.offset, 4); |
| break; |
| case EBPF_OP_LDXH: |
| BOUNDS_CHECK_LOAD(2); |
| reg[inst.dst] = ubpf_mem_load(reg[inst.src] + inst.offset, 2); |
| break; |
| case EBPF_OP_LDXB: |
| BOUNDS_CHECK_LOAD(1); |
| reg[inst.dst] = ubpf_mem_load(reg[inst.src] + inst.offset, 1); |
| break; |
| case EBPF_OP_LDXDW: |
| BOUNDS_CHECK_LOAD(8); |
| reg[inst.dst] = ubpf_mem_load(reg[inst.src] + inst.offset, 8); |
| break; |
| |
| case EBPF_OP_STW: |
| BOUNDS_CHECK_STORE(4); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, inst.imm, 4); |
| break; |
| case EBPF_OP_STH: |
| BOUNDS_CHECK_STORE(2); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, inst.imm, 2); |
| break; |
| case EBPF_OP_STB: |
| BOUNDS_CHECK_STORE(1); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, inst.imm, 1); |
| break; |
| case EBPF_OP_STDW: |
| BOUNDS_CHECK_STORE(8); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, inst.imm, 8); |
| break; |
| |
| case EBPF_OP_STXW: |
| BOUNDS_CHECK_STORE(4); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, reg[inst.src], 4); |
| break; |
| case EBPF_OP_STXH: |
| BOUNDS_CHECK_STORE(2); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, reg[inst.src], 2); |
| break; |
| case EBPF_OP_STXB: |
| BOUNDS_CHECK_STORE(1); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, reg[inst.src], 1); |
| break; |
| case EBPF_OP_STXDW: |
| BOUNDS_CHECK_STORE(8); |
| ubpf_mem_store(reg[inst.dst] + inst.offset, reg[inst.src], 8); |
| break; |
| |
| case EBPF_OP_LDDW: |
| reg[inst.dst] = u32(inst.imm) | ((uint64_t)ubpf_fetch_instruction(vm, pc++).imm << 32); |
| break; |
| |
| case EBPF_OP_JA: |
| pc += inst.offset; |
| break; |
| case EBPF_OP_JEQ_IMM: |
| if (reg[inst.dst] == (uint64_t)i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JEQ_REG: |
| if (reg[inst.dst] == reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JEQ32_IMM: |
| if (u32(reg[inst.dst]) == u32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JEQ32_REG: |
| if (u32(reg[inst.dst]) == u32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGT_IMM: |
| if (reg[inst.dst] > (uint64_t)i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGT_REG: |
| if (reg[inst.dst] > reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGT32_IMM: |
| if (u32(reg[inst.dst]) > u32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGT32_REG: |
| if (u32(reg[inst.dst]) > u32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGE_IMM: |
| if (reg[inst.dst] >= (uint64_t)i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGE_REG: |
| if (reg[inst.dst] >= reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGE32_IMM: |
| if (u32(reg[inst.dst]) >= u32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JGE32_REG: |
| if (u32(reg[inst.dst]) >= u32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLT_IMM: |
| if (reg[inst.dst] < (uint64_t)i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLT_REG: |
| if (reg[inst.dst] < reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLT32_IMM: |
| if (u32(reg[inst.dst]) < u32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLT32_REG: |
| if (u32(reg[inst.dst]) < u32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLE_IMM: |
| if (reg[inst.dst] <= (uint64_t)i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLE_REG: |
| if (reg[inst.dst] <= reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLE32_IMM: |
| if (u32(reg[inst.dst]) <= u32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JLE32_REG: |
| if (u32(reg[inst.dst]) <= u32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSET_IMM: |
| if (reg[inst.dst] & (uint64_t)i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSET_REG: |
| if (reg[inst.dst] & reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSET32_IMM: |
| if (u32(reg[inst.dst]) & u32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSET32_REG: |
| if (u32(reg[inst.dst]) & u32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JNE_IMM: |
| if (reg[inst.dst] != (uint64_t)i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JNE_REG: |
| if (reg[inst.dst] != reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JNE32_IMM: |
| if (u32(reg[inst.dst]) != u32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JNE32_REG: |
| if (u32(reg[inst.dst]) != u32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGT_IMM: |
| if ((int64_t)reg[inst.dst] > i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGT_REG: |
| if ((int64_t)reg[inst.dst] > (int64_t)reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGT32_IMM: |
| if (i32(reg[inst.dst]) > i32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGT32_REG: |
| if (i32(reg[inst.dst]) > i32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGE_IMM: |
| if ((int64_t)reg[inst.dst] >= i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGE_REG: |
| if ((int64_t)reg[inst.dst] >= (int64_t)reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGE32_IMM: |
| if (i32(reg[inst.dst]) >= i32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSGE32_REG: |
| if (i32(reg[inst.dst]) >= i32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLT_IMM: |
| if ((int64_t)reg[inst.dst] < i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLT_REG: |
| if ((int64_t)reg[inst.dst] < (int64_t)reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLT32_IMM: |
| if (i32(reg[inst.dst]) < i32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLT32_REG: |
| if (i32(reg[inst.dst]) < i32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLE_IMM: |
| if ((int64_t)reg[inst.dst] <= i64(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLE_REG: |
| if ((int64_t)reg[inst.dst] <= (int64_t)reg[inst.src]) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLE32_IMM: |
| if (i32(reg[inst.dst]) <= i32(inst.imm)) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_JSLE32_REG: |
| if (i32(reg[inst.dst]) <= i32(reg[inst.src])) { |
| pc += inst.offset; |
| } |
| break; |
| case EBPF_OP_EXIT: |
| if (stack_frame_index > 0) { |
| stack_frame_index--; |
| pc = stack_frames[stack_frame_index].return_address; |
| reg[BPF_REG_6] = stack_frames[stack_frame_index].saved_registers[0]; |
| reg[BPF_REG_7] = stack_frames[stack_frame_index].saved_registers[1]; |
| reg[BPF_REG_8] = stack_frames[stack_frame_index].saved_registers[2]; |
| reg[BPF_REG_9] = stack_frames[stack_frame_index].saved_registers[3]; |
| reg[BPF_REG_10] += stack_frames[stack_frame_index].stack_usage; |
| break; |
| } |
| *bpf_return_value = reg[0]; |
| return_value = 0; |
| goto cleanup; |
| case EBPF_OP_CALL: |
| // Differentiate between local and external calls -- assume that the |
| // program was assembled with the same endianess as the host machine. |
| if (inst.src == 0) { |
| // Handle call by address to external function. |
| if (vm->dispatcher != NULL) { |
| reg[0] = |
| vm->dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, external_dispatcher_cookie); |
| } else { |
| reg[0] = ubpf_default_external_dispatcher( |
| reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, vm->ext_funcs); |
| } |
| if (inst.imm == vm->unwind_stack_extension_index && reg[0] == 0) { |
| *bpf_return_value = reg[0]; |
| return_value = 0; |
| goto cleanup; |
| } |
| } else if (inst.src == 1) { |
| if (stack_frame_index >= UBPF_MAX_CALL_DEPTH) { |
| vm->error_printf( |
| stderr, |
| "uBPF error: number of nested functions calls (%lu) exceeds max (%lu) at PC %u\n", |
| stack_frame_index + 1, |
| UBPF_MAX_CALL_DEPTH, |
| cur_pc); |
| return_value = -1; |
| goto cleanup; |
| } |
| stack_frames[stack_frame_index].saved_registers[0] = reg[BPF_REG_6]; |
| stack_frames[stack_frame_index].saved_registers[1] = reg[BPF_REG_7]; |
| stack_frames[stack_frame_index].saved_registers[2] = reg[BPF_REG_8]; |
| stack_frames[stack_frame_index].saved_registers[3] = reg[BPF_REG_9]; |
| stack_frames[stack_frame_index].return_address = pc; |
| |
| reg[BPF_REG_10] -= stack_frames[stack_frame_index].stack_usage; |
| |
| stack_frame_index++; |
| pc += inst.imm; |
| break; |
| } else if (inst.src == 2) { |
| // Calling external function by BTF ID is not yet supported. |
| return_value = -1; |
| goto cleanup; |
| } |
| // Because we have already validated, we can assume that the type code is |
| // valid. |
| break; |
| default: |
| vm->error_printf(stderr, "Error: unknown opcode %d at PC %d\n", inst.opcode, cur_pc); |
| return_value = -1; |
| goto cleanup; |
| } |
| if (((inst.opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU) && (inst.opcode & EBPF_ALU_OP_MASK) != 0xd0) { |
| reg[inst.dst] &= UINT32_MAX; |
| } |
| // Save the previous instruction for detecting falling through to the start of another function. |
| previous_inst = inst; |
| } |
| |
| cleanup: |
| #if defined(NTDDI_VERSION) && defined(WINNT) |
| free(stack_frames); |
| #endif |
| if (shadow_stack) { |
| free(shadow_stack); |
| } |
| return return_value; |
| } |
| |
| int |
| ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) |
| { |
| // Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. |
| #if defined(NTDDI_VERSION) && defined(WINNT) |
| uint64_t* stack = NULL; |
| struct ubpf_stack_frame* stack_frames = NULL; |
| |
| stack = calloc(UBPF_EBPF_STACK_SIZE, 1); |
| if (!stack) { |
| return -1; |
| } |
| #else |
| uint64_t stack[UBPF_EBPF_STACK_SIZE / sizeof(uint64_t)]; |
| #endif |
| int result = ubpf_exec_ex(vm, mem, mem_len, bpf_return_value, (uint8_t*)stack, UBPF_EBPF_STACK_SIZE); |
| #if defined(NTDDI_VERSION) && defined(WINNT) |
| free(stack); |
| #endif |
| return result; |
| } |
| |
| static bool |
| validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_insts, char** errmsg) |
| { |
| if (num_insts >= UBPF_MAX_INSTS) { |
| *errmsg = ubpf_error("too many instructions (max %u)", UBPF_MAX_INSTS); |
| return false; |
| } |
| |
| if (!ubpf_calculate_stack_usage_for_local_func(vm, 0, errmsg)) { |
| return false; |
| } |
| |
| int i; |
| for (i = 0; i < num_insts; i++) { |
| struct ebpf_inst inst = insts[i]; |
| bool store = false; |
| |
| switch (inst.opcode) { |
| case EBPF_OP_ADD_IMM: |
| case EBPF_OP_ADD_REG: |
| case EBPF_OP_SUB_IMM: |
| case EBPF_OP_SUB_REG: |
| case EBPF_OP_MUL_IMM: |
| case EBPF_OP_MUL_REG: |
| case EBPF_OP_DIV_REG: |
| case EBPF_OP_OR_IMM: |
| case EBPF_OP_OR_REG: |
| case EBPF_OP_AND_IMM: |
| case EBPF_OP_AND_REG: |
| case EBPF_OP_LSH_IMM: |
| case EBPF_OP_LSH_REG: |
| case EBPF_OP_RSH_IMM: |
| case EBPF_OP_RSH_REG: |
| case EBPF_OP_NEG: |
| case EBPF_OP_MOD_REG: |
| case EBPF_OP_XOR_IMM: |
| case EBPF_OP_XOR_REG: |
| case EBPF_OP_MOV_IMM: |
| case EBPF_OP_MOV_REG: |
| case EBPF_OP_ARSH_IMM: |
| case EBPF_OP_ARSH_REG: |
| break; |
| |
| case EBPF_OP_LE: |
| case EBPF_OP_BE: |
| if (inst.imm != 16 && inst.imm != 32 && inst.imm != 64) { |
| *errmsg = ubpf_error("invalid endian immediate at PC %d", i); |
| return false; |
| } |
| break; |
| |
| case EBPF_OP_ADD64_IMM: |
| case EBPF_OP_ADD64_REG: |
| case EBPF_OP_SUB64_IMM: |
| case EBPF_OP_SUB64_REG: |
| case EBPF_OP_MUL64_IMM: |
| case EBPF_OP_MUL64_REG: |
| case EBPF_OP_DIV64_REG: |
| case EBPF_OP_OR64_IMM: |
| case EBPF_OP_OR64_REG: |
| case EBPF_OP_AND64_IMM: |
| case EBPF_OP_AND64_REG: |
| case EBPF_OP_LSH64_IMM: |
| case EBPF_OP_LSH64_REG: |
| case EBPF_OP_RSH64_IMM: |
| case EBPF_OP_RSH64_REG: |
| case EBPF_OP_NEG64: |
| case EBPF_OP_MOD64_REG: |
| case EBPF_OP_XOR64_IMM: |
| case EBPF_OP_XOR64_REG: |
| break; |
| case EBPF_OP_MOV64_IMM: |
| case EBPF_OP_MOV64_REG: |
| store = true; |
| break; |
| case EBPF_OP_ARSH64_IMM: |
| case EBPF_OP_ARSH64_REG: |
| break; |
| |
| case EBPF_OP_LDXW: |
| case EBPF_OP_LDXH: |
| case EBPF_OP_LDXB: |
| case EBPF_OP_LDXDW: |
| break; |
| |
| case EBPF_OP_STW: |
| case EBPF_OP_STH: |
| case EBPF_OP_STB: |
| case EBPF_OP_STDW: |
| case EBPF_OP_STXW: |
| case EBPF_OP_STXH: |
| case EBPF_OP_STXB: |
| case EBPF_OP_STXDW: |
| store = true; |
| break; |
| |
| case EBPF_OP_LDDW: |
| if (inst.src != 0) { |
| *errmsg = ubpf_error("invalid source register for LDDW at PC %d", i); |
| return false; |
| } |
| if (i + 1 >= num_insts || insts[i + 1].opcode != 0) { |
| *errmsg = ubpf_error("incomplete lddw at PC %d", i); |
| return false; |
| } |
| i++; /* Skip next instruction */ |
| break; |
| |
| case EBPF_OP_JA: |
| case EBPF_OP_JEQ_REG: |
| case EBPF_OP_JEQ_IMM: |
| case EBPF_OP_JGT_REG: |
| case EBPF_OP_JGT_IMM: |
| case EBPF_OP_JGE_REG: |
| case EBPF_OP_JGE_IMM: |
| case EBPF_OP_JLT_REG: |
| case EBPF_OP_JLT_IMM: |
| case EBPF_OP_JLE_REG: |
| case EBPF_OP_JLE_IMM: |
| case EBPF_OP_JSET_REG: |
| case EBPF_OP_JSET_IMM: |
| case EBPF_OP_JNE_REG: |
| case EBPF_OP_JNE_IMM: |
| case EBPF_OP_JSGT_IMM: |
| case EBPF_OP_JSGT_REG: |
| case EBPF_OP_JSGE_IMM: |
| case EBPF_OP_JSGE_REG: |
| case EBPF_OP_JSLT_IMM: |
| case EBPF_OP_JSLT_REG: |
| case EBPF_OP_JSLE_IMM: |
| case EBPF_OP_JSLE_REG: |
| case EBPF_OP_JEQ32_IMM: |
| case EBPF_OP_JEQ32_REG: |
| case EBPF_OP_JGT32_IMM: |
| case EBPF_OP_JGT32_REG: |
| case EBPF_OP_JGE32_IMM: |
| case EBPF_OP_JGE32_REG: |
| case EBPF_OP_JSET32_REG: |
| case EBPF_OP_JSET32_IMM: |
| case EBPF_OP_JNE32_IMM: |
| case EBPF_OP_JNE32_REG: |
| case EBPF_OP_JSGT32_IMM: |
| case EBPF_OP_JSGT32_REG: |
| case EBPF_OP_JSGE32_IMM: |
| case EBPF_OP_JSGE32_REG: |
| case EBPF_OP_JLT32_IMM: |
| case EBPF_OP_JLT32_REG: |
| case EBPF_OP_JLE32_IMM: |
| case EBPF_OP_JLE32_REG: |
| case EBPF_OP_JSLT32_IMM: |
| case EBPF_OP_JSLT32_REG: |
| case EBPF_OP_JSLE32_IMM: |
| case EBPF_OP_JSLE32_REG: |
| if (inst.offset == -1) { |
| *errmsg = ubpf_error("infinite loop at PC %d", i); |
| return false; |
| } |
| int new_pc = i + 1 + inst.offset; |
| if (new_pc < 0 || new_pc >= num_insts) { |
| *errmsg = ubpf_error("jump out of bounds at PC %d", i); |
| return false; |
| } else if (insts[new_pc].opcode == 0) { |
| *errmsg = ubpf_error("jump to middle of lddw at PC %d", i); |
| return false; |
| } |
| break; |
| |
| case EBPF_OP_CALL: |
| if (inst.src == 0) { |
| if (inst.imm < 0 || inst.imm >= MAX_EXT_FUNCS) { |
| *errmsg = ubpf_error("invalid call immediate at PC %d", i); |
| return false; |
| } |
| if ((vm->dispatcher != NULL && !vm->dispatcher_validate(inst.imm, vm)) || |
| (vm->dispatcher == NULL && !vm->ext_funcs[inst.imm])) { |
| *errmsg = ubpf_error("call to nonexistent function %u at PC %d", inst.imm, i); |
| return false; |
| } |
| } else if (inst.src == 1) { |
| int call_target = i + (inst.imm + 1); |
| if (call_target < 0 || call_target >= num_insts) { |
| *errmsg = |
| ubpf_error("call to local function (at PC %d) is out of bounds (target: %d)", i, call_target); |
| return false; |
| } |
| if (!ubpf_calculate_stack_usage_for_local_func(vm, call_target, errmsg)) { |
| return false; |
| } |
| } else if (inst.src == 2) { |
| *errmsg = ubpf_error("call to external function by BTF ID (at PC %d) is not supported", i); |
| return false; |
| } else { |
| *errmsg = ubpf_error("call (at PC %d) contains invalid type value", i); |
| return false; |
| } |
| break; |
| |
| case EBPF_OP_EXIT: |
| break; |
| |
| case EBPF_OP_DIV_IMM: |
| case EBPF_OP_MOD_IMM: |
| case EBPF_OP_DIV64_IMM: |
| case EBPF_OP_MOD64_IMM: |
| break; |
| |
| default: |
| *errmsg = ubpf_error("unknown opcode 0x%02x at PC %d", inst.opcode, i); |
| return false; |
| } |
| |
| if (inst.src > 10) { |
| *errmsg = ubpf_error("invalid source register at PC %d", i); |
| return false; |
| } |
| |
| if (inst.dst > 9 && !(store && inst.dst == 10)) { |
| *errmsg = ubpf_error("invalid destination register at PC %d", i); |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| static bool |
| bounds_check( |
| const struct ubpf_vm* vm, |
| void* addr, |
| int size, |
| const char* type, |
| uint16_t cur_pc, |
| void* mem, |
| size_t mem_len, |
| void* stack, |
| size_t stack_len) |
| { |
| if (!vm->bounds_check_enabled) |
| return true; |
| |
| uintptr_t access_start = (uintptr_t)addr; |
| uintptr_t access_end = access_start + size; |
| uintptr_t stack_start = (uintptr_t)stack; |
| uintptr_t stack_end = stack_start + stack_len; |
| uintptr_t mem_start = (uintptr_t)mem; |
| uintptr_t mem_end = mem_start + mem_len; |
| |
| // Memory in the range [access_start, access_end) is being accessed. |
| // Memory in the range [stack_start, stack_end) is the stack. |
| // Memory in the range [mem_start, mem_end) is the memory. |
| |
| if (access_start > access_end) { |
| vm->error_printf( |
| stderr, "uBPF error: invalid memory access %s at PC %u, addr %p, size %d\n", type, cur_pc, addr, size); |
| return false; |
| } |
| |
| // Check if the access is within the memory bounds. |
| // Note: The comparison is <= because the end address is one past the last byte for both |
| // the access and the memory regions. |
| if (access_start >= mem_start && access_end <= mem_end) { |
| return true; |
| } |
| |
| // Check if the access is within the stack bounds. |
| // Note: The comparison is <= because the end address is one past the last byte for both |
| // the access and the stack regions. |
| if (access_start >= stack_start && access_end <= stack_end) { |
| return true; |
| } |
| |
| // The address may be invalid or it may be a region of memory that the caller |
| // is aware of but that is not part of the stack or memory. |
| // Call any registered bounds check function to determine if the access is valid. |
| if (vm->bounds_check_function != NULL && |
| vm->bounds_check_function(vm->bounds_check_user_data, access_start, size)) { |
| return true; |
| } |
| |
| // Memory is neither stack, nor memory, nor valid according to the bounds check function. |
| |
| // Access is out of bounds. |
| vm->error_printf( |
| stderr, |
| "uBPF error: out of bounds memory %s at PC %u, addr %p, size %d\nmem %p/%zd stack %p/%d\n", |
| type, |
| cur_pc, |
| addr, |
| size, |
| mem, |
| mem_len, |
| stack, |
| UBPF_EBPF_STACK_SIZE); |
| return false; |
| } |
| |
| char* |
| ubpf_error(const char* fmt, ...) |
| { |
| char* msg; |
| va_list ap; |
| va_start(ap, fmt); |
| if (vasprintf(&msg, fmt, ap) < 0) { |
| msg = NULL; |
| } |
| va_end(ap); |
| return msg; |
| } |
| |
| #ifdef DEBUG |
| void |
| ubpf_set_registers(struct ubpf_vm* vm, uint64_t* regs) |
| { |
| vm->regs = regs; |
| } |
| |
| uint64_t* |
| ubpf_get_registers(const struct ubpf_vm* vm) |
| { |
| return vm->regs; |
| } |
| |
| #else |
| void |
| ubpf_set_registers(struct ubpf_vm* vm, uint64_t* regs) |
| { |
| (void)vm; |
| (void)regs; |
| fprintf(stderr, "uBPF warning: registers are not exposed in release mode. Please recompile in debug mode\n"); |
| } |
| |
| uint64_t* |
| ubpf_get_registers(const struct ubpf_vm* vm) |
| { |
| (void)vm; |
| fprintf(stderr, "uBPF warning: registers are not exposed in release mode. Please recompile in debug mode\n"); |
| return NULL; |
| } |
| #endif |
| |
| typedef struct _ebpf_encoded_inst |
| { |
| union |
| { |
| uint64_t value; |
| struct ebpf_inst inst; |
| }; |
| } ebpf_encoded_inst; |
| |
| struct ebpf_inst |
| ubpf_fetch_instruction(const struct ubpf_vm* vm, uint16_t pc) |
| { |
| // XOR instruction with base address of vm. |
| // This makes ROP attack more difficult. |
| ebpf_encoded_inst encode_inst; |
| encode_inst.inst = vm->insts[pc]; |
| encode_inst.value ^= (uint64_t)vm->insts; |
| encode_inst.value ^= vm->pointer_secret; |
| return encode_inst.inst; |
| } |
| |
| void |
| ubpf_store_instruction(const struct ubpf_vm* vm, uint16_t pc, struct ebpf_inst inst) |
| { |
| // XOR instruction with base address of vm. |
| // This makes ROP attack more difficult. |
| ebpf_encoded_inst encode_inst; |
| encode_inst.inst = inst; |
| encode_inst.value ^= (uint64_t)vm->insts; |
| encode_inst.value ^= vm->pointer_secret; |
| vm->insts[pc] = encode_inst.inst; |
| } |
| |
| int |
| ubpf_set_pointer_secret(struct ubpf_vm* vm, uint64_t secret) |
| { |
| if (vm->insts) { |
| return -1; |
| } |
| vm->pointer_secret = secret; |
| return 0; |
| } |
| |
| int |
| ubpf_register_data_relocation(struct ubpf_vm* vm, void* user_context, ubpf_data_relocation relocation) |
| { |
| if (vm->data_relocation_function != NULL) { |
| return -1; |
| } |
| vm->data_relocation_function = relocation; |
| vm->data_relocation_user_data = user_context; |
| return 0; |
| } |
| |
| int |
| ubpf_register_data_bounds_check(struct ubpf_vm* vm, void* user_context, ubpf_bounds_check bounds_check) |
| { |
| if (vm->bounds_check_function != NULL) { |
| return -1; |
| } |
| vm->bounds_check_function = bounds_check; |
| vm->bounds_check_user_data = user_context; |
| return 0; |
| } |
| |
| int |
| ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previous_limit) |
| { |
| if (previous_limit != NULL) { |
| *previous_limit = vm->instruction_limit; |
| } |
| vm->instruction_limit = limit; |
| return 0; |
| } |
| |
| bool |
| ubpf_calculate_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc, char** errmsg) |
| { |
| // If there is a stack usage calculator and we have not invoked it before for the target, |
| // then now is the time to call it! |
| if (vm->stack_usage_calculator && !vm->local_func_stack_usage[pc].stack_usage_calculated) { |
| uint16_t stack_usage = (vm->stack_usage_calculator)(vm, pc, vm->stack_usage_calculator_cookie); |
| vm->local_func_stack_usage[pc].stack_usage = stack_usage; |
| } |
| vm->local_func_stack_usage[pc].stack_usage_calculated = true; |
| // Now that we are guaranteed to have a value for the amount of the stack used by the function |
| // starting at call_target, let's make sure that it is 16-byte aligned. Note: The amount of stack |
| // used might be 0 (in the case where there is no registered stack usage calculator callback). That |
| // is okay because ubpf_stack_usage_for_local_func will give us a meaningful default. |
| if (vm->local_func_stack_usage[pc].stack_usage % 16) { |
| *errmsg = ubpf_error( |
| "local function (at PC %d) has improperly sized stack use (%d)", |
| pc, |
| vm->local_func_stack_usage[pc].stack_usage); |
| return false; |
| } |
| return true; |
| } |
| |
| uint16_t |
| ubpf_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc) |
| { |
| uint16_t stack_usage = UBPF_EBPF_STACK_SIZE; |
| if (vm->local_func_stack_usage[pc].stack_usage_calculated) { |
| stack_usage = vm->local_func_stack_usage[pc].stack_usage; |
| } |
| return stack_usage; |
| } |
| |
| int |
| ubpf_register_stack_usage_calculator(struct ubpf_vm* vm, stack_usage_calculator_t calculator, void* cookie) |
| { |
| vm->stack_usage_calculator_cookie = cookie; |
| vm->stack_usage_calculator = calculator; |
| return 0; |
| } |