blob: 4dcaa71f8e441c743350014e097ab0a4811eaf7e [file] [log] [blame]
// vi: ft=c
/*
* Copyright 2015 Big Switch Networks, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <errno.h>
#include <assert.h>
#include "ubpf_int.h"
#define DASM_CHECKS
#include <dasm_proto.h>
#include <dasm_x86.h>
|.arch x64
/* The hardcoded dasm_State variable name is confusing given our use of 'dst' */
#define dasm Dst
#define STACK_SIZE 128
/*
* Register mapping:
*
* eBPF -> x86
* 0 -> rax
* 1 -> rdi
* 2 -> rsi
* 3 -> rdx
* 4 -> r9
* 5 -> r8
* 6 -> rbx
* 7 -> r13
* 8 -> r14
* 9 -> r15
* 10 -> rbp
*
* eBPF calling convention:
* - Registers r6-r10 are callee save.
* - Function call arguments go in r1-r5.
* - Return value goes in r0.
*
* x86 calling convention:
* - Registers rbp, rbx, r12, r13, r14, and r15 are callee save.
* - Function call arguments go in rdi, rsi, rdx, rcx, r8, and r9.
* - Return value goes in rax.
*
* rcx (cl) is a required operand for shifts by a register. We reserve it.
* rsp could not be used for the stack pointer due to DynASM issues.
* r12 could not be used due to DynASM issues.
*/
#define RAX 0
#define RCX 1
#define RDX 2
#define RBX 3
#define RSP 4
#define RBP 5
#define RSI 6
#define RDI 7
#define R8 8
#define R9 9
#define R10 10
#define R11 11
/* r12 is unused (encoding problems) */
#define R13 13
#define R14 14
#define R15 15
#define REGISTER_MAP_SIZE 11
static int register_map[REGISTER_MAP_SIZE] = {
RAX,
RDI,
RSI,
RDX,
R9,
R8,
RBX,
R13,
R14,
R15,
RBP,
};
/* Return the x86 register for the given eBPF register */
static int
map_register(int r)
{
assert(r < REGISTER_MAP_SIZE);
return register_map[r % REGISTER_MAP_SIZE];
}
/* For testing, this changes the mapping between x86 and eBPF registers */
void
ubpf_set_register_offset(int x)
{
int i;
if (x < REGISTER_MAP_SIZE) {
int tmp[REGISTER_MAP_SIZE];
memcpy(tmp, register_map, sizeof(register_map));
for (i = 0; i < REGISTER_MAP_SIZE; i++) {
register_map[i] = tmp[(i+x)%REGISTER_MAP_SIZE];
}
} else {
/* Shuffle array */
unsigned int seed = x;
for (i = 0; i < REGISTER_MAP_SIZE-1; i++) {
int j = i + (rand_r(&seed) % (REGISTER_MAP_SIZE-i));
int tmp = register_map[j];
register_map[j] = register_map[i];
register_map[i] = tmp;
}
}
}
static void muldivmod(dasm_State **dasm, uint16_t pc, uint8_t opcode, int src, int dst, int32_t imm);
|.actionlist actions
ubpf_jit_fn
ubpf_compile(struct ubpf_vm *vm, char **errmsg)
{
dasm_State *d;
void *jitted = NULL;
size_t jitted_size;
int ret;
*errmsg = NULL;
if (vm->jitted) {
return vm->jitted;
}
if (!vm->insts) {
*errmsg = ubpf_error("code has not been loaded into this VM");
return NULL;
}
|.section code
dasm_init(&d, DASM_MAXSECTION);
|.globals lbl_
void *labels[lbl__MAX];
dasm_setupglobal(&d, labels, lbl__MAX);
dasm_setup(&d, actions);
dasm_growpc(&d, vm->num_insts);
dasm_State **dasm = &d;
if ((ret = dasm_checkstep(dasm, 0)) != DASM_S_OK) {
*errmsg = ubpf_error("DynASM error %#x before prologue", ret);
goto error;
}
/* Prologue */
|.code
|->entry:
/* rbp, rbx, r13, r14, and r15 are callee save */
/* TODO don't save registers we don't write to */
| push rbp
| push rbx
| push r13
| push r14
| push r15
/* Move rdi into register 1 */
if (map_register(1) != RDI) {
| mov Rq(map_register(1)), rdi
}
/* Allocate stack space and store address in r10 */
| sub rsp, STACK_SIZE
| mov Rq(map_register(10)), Rq(RSP)
if ((ret = dasm_checkstep(dasm, 0)) != DASM_S_OK) {
*errmsg = ubpf_error("DynASM error %#x after prologue", ret);
goto error;
}
int i;
for (i = 0; i < vm->num_insts; i++) {
struct ebpf_inst inst = vm->insts[i];
|=>i:
int src = map_register(inst.src);
int dst = map_register(inst.dst);
int jmp_target = i + inst.offset + 1;
switch (inst.opcode) {
case EBPF_OP_ADD_IMM:
| add Rd(dst), inst.imm
break;
case EBPF_OP_ADD_REG:
| add Rd(dst), Rd(src)
break;
case EBPF_OP_SUB_IMM:
| sub Rd(dst), inst.imm
break;
case EBPF_OP_SUB_REG:
| sub Rd(dst), Rd(src)
break;
case EBPF_OP_MUL_IMM:
case EBPF_OP_MUL_REG:
case EBPF_OP_DIV_IMM:
case EBPF_OP_DIV_REG:
case EBPF_OP_MOD_IMM:
case EBPF_OP_MOD_REG:
muldivmod(dasm, i, inst.opcode, src, dst, inst.imm);
break;
case EBPF_OP_OR_IMM:
| or Rd(dst), inst.imm
break;
case EBPF_OP_OR_REG:
| or Rd(dst), Rd(src)
break;
case EBPF_OP_AND_IMM:
| and Rd(dst), inst.imm
break;
case EBPF_OP_AND_REG:
| and Rd(dst), Rd(src)
break;
case EBPF_OP_LSH_IMM:
| shl Rd(dst), inst.imm
break;
case EBPF_OP_LSH_REG:
| mov Rq(RCX), Rq(src)
| shl Rd(dst), cl
break;
case EBPF_OP_RSH_IMM:
| shr Rd(dst), inst.imm
break;
case EBPF_OP_RSH_REG:
| mov Rq(RCX), Rq(src)
| shr Rd(dst), cl
break;
case EBPF_OP_NEG:
| neg Rd(dst)
break;
case EBPF_OP_XOR_IMM:
| xor Rd(dst), inst.imm
break;
case EBPF_OP_XOR_REG:
| xor Rd(dst), Rd(src)
break;
case EBPF_OP_MOV_IMM:
| mov Rd(dst), inst.imm
break;
case EBPF_OP_MOV_REG:
| mov Rd(dst), Rd(src)
break;
case EBPF_OP_ARSH_IMM:
| sar Rd(dst), inst.imm
break;
case EBPF_OP_ARSH_REG:
| mov Rq(RCX), Rq(src)
| sar Rd(dst), cl
break;
case EBPF_OP_LE:
/* No-op */
break;
case EBPF_OP_BE:
if (inst.imm == 16) {
| rol Rw(dst), 8
| and Rd(dst), 0xffff
} else if (inst.imm == 32) {
/* bswap of r8d+ is misassembled */
if (dst < 8) {
| bswap Rd(dst)
} else {
| mov Rq(RCX), Rq(dst)
| bswap ecx
| mov Rd(dst), ecx
}
} else if (inst.imm == 64) {
/* bswap of r8+ is misassembled */
if (dst < 8) {
| bswap Rq(dst)
} else {
| mov Rq(RCX), Rq(dst)
| bswap rcx
| mov Rq(dst), rcx
}
}
break;
case EBPF_OP_ADD64_IMM:
| add Rq(dst), inst.imm
break;
case EBPF_OP_ADD64_REG:
| add Rq(dst), Rq(src)
break;
case EBPF_OP_SUB64_IMM:
| sub Rq(dst), inst.imm
break;
case EBPF_OP_SUB64_REG:
| sub Rq(dst), Rq(src)
break;
case EBPF_OP_MUL64_IMM:
case EBPF_OP_MUL64_REG:
case EBPF_OP_DIV64_IMM:
case EBPF_OP_DIV64_REG:
case EBPF_OP_MOD64_IMM:
case EBPF_OP_MOD64_REG:
muldivmod(dasm, i, inst.opcode, src, dst, inst.imm);
break;
case EBPF_OP_OR64_IMM:
| or Rq(dst), inst.imm
break;
case EBPF_OP_OR64_REG:
| or Rq(dst), Rq(src)
break;
case EBPF_OP_AND64_IMM:
| and Rq(dst), inst.imm
break;
case EBPF_OP_AND64_REG:
| and Rq(dst), Rq(src)
break;
case EBPF_OP_LSH64_IMM:
| shl Rq(dst), inst.imm
break;
case EBPF_OP_LSH64_REG:
| mov Rq(RCX), Rq(src)
| shl Rq(dst), cl
break;
case EBPF_OP_RSH64_IMM:
| shr Rq(dst), inst.imm
break;
case EBPF_OP_RSH64_REG:
| mov Rq(RCX), Rq(src)
| shr Rq(dst), cl
break;
case EBPF_OP_NEG64:
| neg Rq(dst)
break;
case EBPF_OP_XOR64_IMM:
| xor Rq(dst), inst.imm
break;
case EBPF_OP_XOR64_REG:
| xor Rq(dst), Rq(src)
break;
case EBPF_OP_MOV64_IMM:
/* TODO use shorter mov for smaller immediates */
| mov Rq(dst), inst.imm
break;
case EBPF_OP_MOV64_REG:
| mov Rq(dst), Rq(src)
break;
case EBPF_OP_ARSH64_IMM:
| sar Rq(dst), inst.imm
break;
case EBPF_OP_ARSH64_REG:
| mov Rq(RCX), Rq(src)
| sar Rq(dst), cl
break;
case EBPF_OP_JA:
| jmp =>jmp_target
break;
case EBPF_OP_JEQ_IMM:
| cmp Rq(dst), inst.imm
| je =>jmp_target
break;
case EBPF_OP_JEQ_REG:
| cmp Rq(dst), Rq(src)
| je =>jmp_target
break;
case EBPF_OP_JGT_IMM:
| cmp Rq(dst), inst.imm
| ja =>jmp_target
break;
case EBPF_OP_JGT_REG:
| cmp Rq(dst), Rq(src)
| ja =>jmp_target
break;
case EBPF_OP_JGE_IMM:
| cmp Rq(dst), inst.imm
| jae =>jmp_target
break;
case EBPF_OP_JGE_REG:
| cmp Rq(dst), Rq(src)
| jae =>jmp_target
break;
case EBPF_OP_JSET_IMM:
| test Rq(dst), inst.imm
| jnz =>jmp_target
break;
case EBPF_OP_JSET_REG:
| test Rq(dst), Rq(src)
| jnz =>jmp_target
break;
case EBPF_OP_JNE_IMM:
| cmp Rq(dst), inst.imm
| jne =>jmp_target
break;
case EBPF_OP_JNE_REG:
| cmp Rq(dst), Rq(src)
| jne =>jmp_target
break;
case EBPF_OP_JSGT_IMM:
| cmp Rq(dst), inst.imm
| jg =>jmp_target
break;
case EBPF_OP_JSGT_REG:
| cmp Rq(dst), Rq(src)
| jg =>jmp_target
break;
case EBPF_OP_JSGE_IMM:
| cmp Rq(dst), inst.imm
| jge =>jmp_target
break;
case EBPF_OP_JSGE_REG:
| cmp Rq(dst), Rq(src)
| jge =>jmp_target
break;
case EBPF_OP_CALL:
/* We reserve RCX for shifts */
| mov rcx, r9
/* TODO direct call */
| mov rax, vm->ext_funcs[inst.imm]
| call rax
break;
case EBPF_OP_EXIT:
if (i != vm->num_insts - 1) {
| jmp ->exit
}
break;
case EBPF_OP_LDXW:
| mov Rd(dst), dword [Rq(src)+inst.offset]
break;
case EBPF_OP_LDXH:
/* movzx is not assembled correctly */
| xor ecx, ecx
| mov cx, word [Rq(src)+inst.offset]
| mov Rq(dst), Rq(RCX)
break;
case EBPF_OP_LDXB:
/* movzx is not assembled correctly */
| xor ecx, ecx
| mov cl, byte [Rq(src)+inst.offset]
| mov Rq(dst), Rq(RCX)
break;
case EBPF_OP_LDXDW:
| mov Rq(dst), qword [Rq(src)+inst.offset]
break;
case EBPF_OP_STW:
| mov dword [Rq(dst)+inst.offset], inst.imm
break;
case EBPF_OP_STH:
| mov word [Rq(dst)+inst.offset], inst.imm
break;
case EBPF_OP_STB:
| mov byte [Rq(dst)+inst.offset], inst.imm
break;
case EBPF_OP_STDW:
| mov qword [Rq(dst)+inst.offset], inst.imm
break;
case EBPF_OP_STXW:
| mov dword [Rq(dst)+inst.offset], Rd(src)
break;
case EBPF_OP_STXH:
| mov word [Rq(dst)+inst.offset], Rw(src)
break;
case EBPF_OP_STXB:
/* Rb(src) generates the wrong byte registers */
/* mov rcx, Rq(src) also did not work */
| mov Rq(RCX), Rq(src)
| mov byte [Rq(dst)+inst.offset], cl
break;
case EBPF_OP_STXDW:
| mov qword [Rq(dst)+inst.offset], Rq(src)
break;
case EBPF_OP_LDDW: {
struct ebpf_inst inst2 = vm->insts[++i];
uint64_t imm = (uint32_t)inst.imm | ((uint64_t)inst2.imm << 32);
| mov64 Rq(dst), imm
break;
}
}
if ((ret = dasm_checkstep(dasm, 0)) != DASM_S_OK) {
*errmsg = ubpf_error("DynASM error %#x at PC %d: opcode %02x src %u dst %u", ret, i, inst.opcode, src, dst);
goto error;
}
}
/* Epilogue */
| ->exit:
/* Move register 0 into rax */
if (map_register(0) != RAX) {
| mov Rq(RAX), Rq(map_register(0))
}
| ->exit2:
| add rsp, STACK_SIZE
| pop r15
| pop r14
| pop r13
| pop rbx
| pop rbp
| ret
/* Division by zero handler */
const char *div_by_zero_fmt = "uBPF error: division by zero at PC %u\n";
| ->div_by_zero:
| mov64 rdi, (uintptr_t)stderr
| mov64 rsi, (uintptr_t)div_by_zero_fmt
| mov64 rax, (uintptr_t)fprintf
| call rax
| mov rax, -1
| jmp ->exit2
if ((ret = dasm_checkstep(dasm, 0)) != DASM_S_OK) {
*errmsg = ubpf_error("DynASM error %#x after epilogue", ret);
goto error;
}
if ((ret = dasm_link(dasm, &jitted_size)) != DASM_S_OK) {
*errmsg = ubpf_error("internal uBPF error: dasm_link failed: %d", ret);
goto error;
}
jitted = mmap(0, jitted_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (jitted == MAP_FAILED) {
*errmsg = ubpf_error("internal uBPF error: mmap failed: %s\n", strerror(errno));
goto error;
}
if ((ret = dasm_encode(dasm, jitted)) != DASM_S_OK) {
*errmsg = ubpf_error("internal uBPF error: dasm_encode failed: %d\n", ret);
goto error;
}
if (mprotect(jitted, jitted_size, PROT_READ | PROT_EXEC) < 0) {
*errmsg = ubpf_error("internal uBPF error: mprotect failed: %s\n", strerror(errno));
goto error;
}
assert(labels[lbl_entry] == jitted);
dasm_free(dasm);
vm->jitted = labels[lbl_entry];
vm->jitted_size = jitted_size;
return vm->jitted;
error:
dasm_free(dasm);
if (jitted) {
munmap(jitted, jitted_size);
}
return NULL;
}
static void
muldivmod(dasm_State **dasm, uint16_t pc, uint8_t opcode, int src, int dst, int32_t imm)
{
bool mul = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MUL_IMM & EBPF_ALU_OP_MASK);
bool div = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_DIV_IMM & EBPF_ALU_OP_MASK);
bool mod = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MOD_IMM & EBPF_ALU_OP_MASK);
bool is64 = (opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU64;
if (div || mod) {
if (is64) {
| test Rq(src), Rq(src)
} else {
| test Rd(src), Rd(src)
}
| jnz >1
| mov rdx, pc
| jmp ->div_by_zero
|1:
}
if (dst != RAX) {
| push rax
}
if (dst != RDX) {
| push rdx
}
if (imm) {
| mov Rq(RCX), imm
} else {
| mov Rq(RCX), Rq(src)
}
| mov Rq(RAX), Rq(dst)
if (div || mod) {
| xor edx, edx
if (is64) {
| div rcx
} else {
| div ecx
}
} else {
if (is64) {
| mul rcx
} else {
| mul ecx
}
}
if (dst != RDX) {
if (mod) {
| mov Rq(dst), Rq(RDX)
}
| pop rdx
}
if (dst != RAX) {
if (div || mul) {
| mov Rq(dst), Rq(RAX)
}
| pop rax
}
}