| /* simulator.c -- Interface for the AArch64 simulator. |
| |
| Copyright (C) 2015-2018 Free Software Foundation, Inc. |
| |
| Contributed by Red Hat. |
| |
| This file is part of GDB. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <sys/types.h> |
| #include <math.h> |
| #include <time.h> |
| #include <limits.h> |
| |
| #include "simulator.h" |
| #include "cpustate.h" |
| #include "memory.h" |
| |
| #define NO_SP 0 |
| #define SP_OK 1 |
| |
| #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag)) |
| #define IS_SET(_X) (TST (( _X )) ? 1 : 0) |
| #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1) |
| |
| /* Space saver macro. */ |
| #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW)) |
| |
| #define HALT_UNALLOC \ |
| do \ |
| { \ |
| TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ |
| TRACE_INSN (cpu, \ |
| "Unallocated instruction detected at sim line %d," \ |
| " exe addr %" PRIx64, \ |
| __LINE__, aarch64_get_PC (cpu)); \ |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ |
| sim_stopped, SIM_SIGILL); \ |
| } \ |
| while (0) |
| |
| #define HALT_NYI \ |
| do \ |
| { \ |
| TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \ |
| TRACE_INSN (cpu, \ |
| "Unimplemented instruction detected at sim line %d," \ |
| " exe addr %" PRIx64, \ |
| __LINE__, aarch64_get_PC (cpu)); \ |
| if (! TRACE_ANY_P (cpu)) \ |
| sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \ |
| aarch64_get_instr (cpu)); \ |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\ |
| sim_stopped, SIM_SIGABRT); \ |
| } \ |
| while (0) |
| |
| #define NYI_assert(HI, LO, EXPECTED) \ |
| do \ |
| { \ |
| if (INSTR ((HI), (LO)) != (EXPECTED)) \ |
| HALT_NYI; \ |
| } \ |
| while (0) |
| |
| /* Helper functions used by expandLogicalImmediate. */ |
| |
| /* for i = 1, ... N result<i-1> = 1 other bits are zero */ |
| static inline uint64_t |
| ones (int N) |
| { |
| return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1)); |
| } |
| |
| /* result<0> to val<N> */ |
| static inline uint64_t |
| pickbit (uint64_t val, int N) |
| { |
| return pickbits64 (val, N, N); |
| } |
| |
| static uint64_t |
| expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N) |
| { |
| uint64_t mask; |
| uint64_t imm; |
| unsigned simd_size; |
| |
| /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R |
| (in other words, right rotated by R), then replicated. */ |
| if (N != 0) |
| { |
| simd_size = 64; |
| mask = 0xffffffffffffffffull; |
| } |
| else |
| { |
| switch (S) |
| { |
| case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break; |
| case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break; |
| case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break; |
| case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break; |
| case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break; |
| default: return 0; |
| } |
| mask = (1ull << simd_size) - 1; |
| /* Top bits are IGNORED. */ |
| R &= simd_size - 1; |
| } |
| |
| /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */ |
| if (S == simd_size - 1) |
| return 0; |
| |
| /* S+1 consecutive bits to 1. */ |
| /* NOTE: S can't be 63 due to detection above. */ |
| imm = (1ull << (S + 1)) - 1; |
| |
| /* Rotate to the left by simd_size - R. */ |
| if (R != 0) |
| imm = ((imm << (simd_size - R)) & mask) | (imm >> R); |
| |
| /* Replicate the value according to SIMD size. */ |
| switch (simd_size) |
| { |
| case 2: imm = (imm << 2) | imm; |
| case 4: imm = (imm << 4) | imm; |
| case 8: imm = (imm << 8) | imm; |
| case 16: imm = (imm << 16) | imm; |
| case 32: imm = (imm << 32) | imm; |
| case 64: break; |
| default: return 0; |
| } |
| |
| return imm; |
| } |
| |
| /* Instr[22,10] encodes N immr and imms. we want a lookup table |
| for each possible combination i.e. 13 bits worth of int entries. */ |
| #define LI_TABLE_SIZE (1 << 13) |
| static uint64_t LITable[LI_TABLE_SIZE]; |
| |
| void |
| aarch64_init_LIT_table (void) |
| { |
| unsigned index; |
| |
| for (index = 0; index < LI_TABLE_SIZE; index++) |
| { |
| uint32_t N = uimm (index, 12, 12); |
| uint32_t immr = uimm (index, 11, 6); |
| uint32_t imms = uimm (index, 5, 0); |
| |
| LITable [index] = expand_logical_immediate (imms, immr, N); |
| } |
| } |
| |
| static void |
| dexNotify (sim_cpu *cpu) |
| { |
| /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry |
| 2 ==> exit Java, 3 ==> start next bytecode. */ |
| uint32_t type = INSTR (14, 0); |
| |
| TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type); |
| |
| switch (type) |
| { |
| case 0: |
| /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0), |
| aarch64_get_reg_u64 (cpu, R22, 0)); */ |
| break; |
| case 1: |
| /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0), |
| aarch64_get_reg_u64 (cpu, R22, 0)); */ |
| break; |
| case 2: |
| /* aarch64_notifyMethodExit (); */ |
| break; |
| case 3: |
| /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0), |
| aarch64_get_reg_u64 (cpu, R22, 0)); */ |
| break; |
| } |
| } |
| |
| /* secondary decode within top level groups */ |
| |
| static void |
| dexPseudo (sim_cpu *cpu) |
| { |
| /* assert instr[28,27] = 00 |
| |
| We provide 2 pseudo instructions: |
| |
| HALT stops execution of the simulator causing an immediate |
| return to the x86 code which entered it. |
| |
| CALLOUT initiates recursive entry into x86 code. A register |
| argument holds the address of the x86 routine. Immediate |
| values in the instruction identify the number of general |
| purpose and floating point register arguments to be passed |
| and the type of any value to be returned. */ |
| |
| uint32_t PSEUDO_HALT = 0xE0000000U; |
| uint32_t PSEUDO_CALLOUT = 0x00018000U; |
| uint32_t PSEUDO_CALLOUTR = 0x00018001U; |
| uint32_t PSEUDO_NOTIFY = 0x00014000U; |
| uint32_t dispatch; |
| |
| if (aarch64_get_instr (cpu) == PSEUDO_HALT) |
| { |
| TRACE_EVENTS (cpu, " Pseudo Halt Instruction"); |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_stopped, SIM_SIGTRAP); |
| } |
| |
| dispatch = INSTR (31, 15); |
| |
| /* We do not handle callouts at the moment. */ |
| if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR) |
| { |
| TRACE_EVENTS (cpu, " Callout"); |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_stopped, SIM_SIGABRT); |
| } |
| |
| else if (dispatch == PSEUDO_NOTIFY) |
| dexNotify (cpu); |
| |
| else |
| HALT_UNALLOC; |
| } |
| |
| /* Load-store single register (unscaled offset) |
| These instructions employ a base register plus an unscaled signed |
| 9 bit offset. |
| |
| N.B. the base register (source) can be Xn or SP. all other |
| registers may not be SP. */ |
| |
| /* 32 bit load 32 bit unscaled signed 9 bit. */ |
| static void |
| ldur32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 64 bit load 64 bit unscaled signed 9 bit. */ |
| static void |
| ldur64 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 32 bit load zero-extended byte unscaled signed 9 bit. */ |
| static void |
| ldurb32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 32 bit load sign-extended byte unscaled signed 9 bit. */ |
| static void |
| ldursb32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 64 bit load sign-extended byte unscaled signed 9 bit. */ |
| static void |
| ldursb64 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 32 bit load zero-extended short unscaled signed 9 bit */ |
| static void |
| ldurh32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 32 bit load sign-extended short unscaled signed 9 bit */ |
| static void |
| ldursh32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 64 bit load sign-extended short unscaled signed 9 bit */ |
| static void |
| ldursh64 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* 64 bit load sign-extended word unscaled signed 9 bit */ |
| static void |
| ldursw (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + offset)); |
| } |
| |
| /* N.B. with stores the value in source is written to the address |
| identified by source2 modified by offset. */ |
| |
| /* 32 bit store 32 bit unscaled signed 9 bit. */ |
| static void |
| stur32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u32 (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, |
| aarch64_get_reg_u32 (cpu, rd, NO_SP)); |
| } |
| |
| /* 64 bit store 64 bit unscaled signed 9 bit */ |
| static void |
| stur64 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u64 (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, |
| aarch64_get_reg_u64 (cpu, rd, NO_SP)); |
| } |
| |
| /* 32 bit store byte unscaled signed 9 bit */ |
| static void |
| sturb (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u8 (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, |
| aarch64_get_reg_u8 (cpu, rd, NO_SP)); |
| } |
| |
| /* 32 bit store short unscaled signed 9 bit */ |
| static void |
| sturh (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u16 (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, |
| aarch64_get_reg_u16 (cpu, rd, NO_SP)); |
| } |
| |
| /* Load single register pc-relative label |
| Offset is a signed 19 bit immediate count in words |
| rt may not be SP. */ |
| |
| /* 32 bit pc-relative load */ |
| static void |
| ldr32_pcrel (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_mem_u32 |
| (cpu, aarch64_get_PC (cpu) + offset * 4)); |
| } |
| |
| /* 64 bit pc-relative load */ |
| static void |
| ldr_pcrel (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_mem_u64 |
| (cpu, aarch64_get_PC (cpu) + offset * 4)); |
| } |
| |
| /* sign extended 32 bit pc-relative load */ |
| static void |
| ldrsw_pcrel (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_mem_s32 |
| (cpu, aarch64_get_PC (cpu) + offset * 4)); |
| } |
| |
| /* float pc-relative load */ |
| static void |
| fldrs_pcrel (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u32 (cpu, rd, 0, |
| aarch64_get_mem_u32 |
| (cpu, aarch64_get_PC (cpu) + offset * 4)); |
| } |
| |
| /* double pc-relative load */ |
| static void |
| fldrd_pcrel (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, st, 0, |
| aarch64_get_mem_u64 |
| (cpu, aarch64_get_PC (cpu) + offset * 4)); |
| } |
| |
| /* long double pc-relative load. */ |
| static void |
| fldrq_pcrel (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int st = INSTR (4, 0); |
| uint64_t addr = aarch64_get_PC (cpu) + offset * 4; |
| FRegister a; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_get_mem_long_double (cpu, addr, & a); |
| aarch64_set_FP_long_double (cpu, st, a); |
| } |
| |
| /* This can be used to scale an offset by applying |
| the requisite shift. the second argument is either |
| 16, 32 or 64. */ |
| |
| #define SCALE(_offset, _elementSize) \ |
| ((_offset) << ScaleShift ## _elementSize) |
| |
| /* This can be used to optionally scale a register derived offset |
| by applying the requisite shift as indicated by the Scaling |
| argument. The second argument is either Byte, Short, Word |
| or Long. The third argument is either Scaled or Unscaled. |
| N.B. when _Scaling is Scaled the shift gets ANDed with |
| all 1s while when it is Unscaled it gets ANDed with 0. */ |
| |
| #define OPT_SCALE(_offset, _elementType, _Scaling) \ |
| ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0)) |
| |
| /* This can be used to zero or sign extend a 32 bit register derived |
| value to a 64 bit value. the first argument must be the value as |
| a uint32_t and the second must be either UXTW or SXTW. The result |
| is returned as an int64_t. */ |
| |
| static inline int64_t |
| extend (uint32_t value, Extension extension) |
| { |
| union |
| { |
| uint32_t u; |
| int32_t n; |
| } x; |
| |
| /* A branchless variant of this ought to be possible. */ |
| if (extension == UXTW || extension == NoExtension) |
| return value; |
| |
| x.u = value; |
| return x.n; |
| } |
| |
| /* Scalar Floating Point |
| |
| FP load/store single register (4 addressing modes) |
| |
| N.B. the base register (source) can be the stack pointer. |
| The secondary source register (source2) can only be an Xn register. */ |
| |
| /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address)); |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* Load 8 bit with unsigned 12 bit offset. */ |
| static void |
| fldrb_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); |
| } |
| |
| /* Load 16 bit scaled unsigned 12 bit. */ |
| static void |
| fldrh_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr)); |
| } |
| |
| /* Load 32 bit scaled unsigned 12 bit. */ |
| static void |
| fldrs_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr)); |
| } |
| |
| /* Load 64 bit scaled unsigned 12 bit. */ |
| static void |
| fldrd_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); |
| } |
| |
| /* Load 128 bit scaled unsigned 12 bit. */ |
| static void |
| fldrq_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rd = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr)); |
| aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8)); |
| } |
| |
| /* Load 32 bit scaled or unscaled zero- or sign-extended |
| 32-bit register offset. */ |
| static void |
| fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 32, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 |
| (cpu, address + displacement)); |
| } |
| |
| /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */ |
| static void |
| fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 64, scaling); |
| |
| fldrd_wb (cpu, displacement, NoWriteBack); |
| } |
| |
| /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| FRegister a; |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_get_mem_long_double (cpu, address, & a); |
| aarch64_set_FP_long_double (cpu, st, a); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */ |
| static void |
| fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 128, scaling); |
| |
| fldrq_wb (cpu, displacement, NoWriteBack); |
| } |
| |
| /* Memory Access |
| |
| load-store single register |
| There are four addressing modes available here which all employ a |
| 64 bit source (base) register. |
| |
| N.B. the base register (source) can be the stack pointer. |
| The secondary source register (source2)can only be an Xn register. |
| |
| Scaled, 12-bit, unsigned immediate offset, without pre- and |
| post-index options. |
| Unscaled, 9-bit, signed immediate offset with pre- or post-index |
| writeback. |
| scaled or unscaled 64-bit register offset. |
| scaled or unscaled 32-bit extended register offset. |
| |
| All offsets are assumed to be raw from the decode i.e. the |
| simulator is expected to adjust scaled offsets based on the |
| accessed data size with register or extended register offset |
| versions the same applies except that in the latter case the |
| operation may also require a sign extend. |
| |
| A separate method is provided for each possible addressing mode. */ |
| |
| /* 32 bit load 32 bit scaled unsigned 12 bit */ |
| static void |
| ldr32_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. */ |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 32))); |
| } |
| |
| /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit load 32 bit scaled or unscaled |
| zero- or sign-extended 32-bit register offset */ |
| static void |
| ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 32, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, |
| aarch64_get_mem_u32 (cpu, address + displacement)); |
| } |
| |
| /* 64 bit load 64 bit scaled unsigned 12 bit */ |
| static void |
| ldr_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. */ |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 64))); |
| } |
| |
| /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 64 bit load 64 bit scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 64, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, |
| aarch64_get_mem_u64 (cpu, address + displacement)); |
| } |
| |
| /* 32 bit load zero-extended byte scaled unsigned 12 bit. */ |
| static void |
| ldrb32_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be |
| there is no scaling required for a byte load. */ |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, |
| aarch64_get_mem_u8 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); |
| } |
| |
| /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit load zero-extended byte scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* There is no scaling required for a byte load. */ |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, |
| aarch64_get_mem_u8 (cpu, address + displacement)); |
| } |
| |
| /* 64 bit load sign-extended byte unscaled signed 9 bit |
| with pre- or post-writeback. */ |
| static void |
| ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| int64_t val; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| val = aarch64_get_mem_s8 (cpu, address); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, val); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 64 bit load sign-extended byte scaled unsigned 12 bit. */ |
| static void |
| ldrsb_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| ldrsb_wb (cpu, offset, NoWriteBack); |
| } |
| |
| /* 64 bit load sign-extended byte scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* There is no scaling required for a byte load. */ |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, |
| aarch64_get_mem_s8 (cpu, address + displacement)); |
| } |
| |
| /* 32 bit load zero-extended short scaled unsigned 12 bit. */ |
| static void |
| ldrh32_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint32_t val; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. */ |
| val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 16)); |
| aarch64_set_reg_u32 (cpu, rt, NO_SP, val); |
| } |
| |
| /* 32 bit load zero-extended short unscaled signed 9 bit |
| with pre- or post-writeback. */ |
| static void |
| ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit load zero-extended short scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 16, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u32 (cpu, rt, NO_SP, |
| aarch64_get_mem_u16 (cpu, address + displacement)); |
| } |
| |
| /* 32 bit load sign-extended short scaled unsigned 12 bit. */ |
| static void |
| ldrsh32_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| int32_t val; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. */ |
| val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 16)); |
| aarch64_set_reg_s32 (cpu, rt, NO_SP, val); |
| } |
| |
| /* 32 bit load sign-extended short unscaled signed 9 bit |
| with pre- or post-writeback. */ |
| static void |
| ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s32 (cpu, rt, NO_SP, |
| (int32_t) aarch64_get_mem_s16 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit load sign-extended short scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 16, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s32 (cpu, rt, NO_SP, |
| (int32_t) aarch64_get_mem_s16 |
| (cpu, address + displacement)); |
| } |
| |
| /* 64 bit load sign-extended short scaled unsigned 12 bit. */ |
| static void |
| ldrsh_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| int64_t val; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. */ |
| val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 16)); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, val); |
| } |
| |
| /* 64 bit load sign-extended short unscaled signed 9 bit |
| with pre- or post-writeback. */ |
| static void |
| ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| int64_t val; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| val = aarch64_get_mem_s16 (cpu, address); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, val); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 64 bit load sign-extended short scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 16, scaling); |
| int64_t val; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| val = aarch64_get_mem_s16 (cpu, address + displacement); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, val); |
| } |
| |
| /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */ |
| static void |
| ldrsw_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| int64_t val; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 32)); |
| /* The target register may not be SP but the source may be. */ |
| return aarch64_set_reg_s64 (cpu, rt, NO_SP, val); |
| } |
| |
| /* 64 bit load sign-extended 32 bit unscaled signed 9 bit |
| with pre- or post-writeback. */ |
| static void |
| ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 64 bit load sign-extended 32 bit scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 32, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s64 (cpu, rt, NO_SP, |
| aarch64_get_mem_s32 (cpu, address + displacement)); |
| } |
| |
| /* N.B. with stores the value in source is written to the |
| address identified by source2 modified by source3/offset. */ |
| |
| /* 32 bit store scaled unsigned 12 bit. */ |
| static void |
| str32_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. */ |
| aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 32)), |
| aarch64_get_reg_u32 (cpu, rt, NO_SP)); |
| } |
| |
| /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit store scaled or unscaled zero- or |
| sign-extended 32-bit register offset. */ |
| static void |
| str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 32, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u32 (cpu, address + displacement, |
| aarch64_get_reg_u64 (cpu, rt, NO_SP)); |
| } |
| |
| /* 64 bit store scaled unsigned 12 bit. */ |
| static void |
| str_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u64 (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 64), |
| aarch64_get_reg_u64 (cpu, rt, NO_SP)); |
| } |
| |
| /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 64 bit store scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| uint64_t displacement = OPT_SCALE (extended, 64, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u64 (cpu, address + displacement, |
| aarch64_get_reg_u64 (cpu, rt, NO_SP)); |
| } |
| |
| /* 32 bit store byte scaled unsigned 12 bit. */ |
| static void |
| strb_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. |
| There is no scaling required for a byte load. */ |
| aarch64_set_mem_u8 (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, |
| aarch64_get_reg_u8 (cpu, rt, NO_SP)); |
| } |
| |
| /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit store byte scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* There is no scaling required for a byte load. */ |
| aarch64_set_mem_u8 (cpu, address + displacement, |
| aarch64_get_reg_u8 (cpu, rt, NO_SP)); |
| } |
| |
| /* 32 bit store short scaled unsigned 12 bit. */ |
| static void |
| strh_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The target register may not be SP but the source may be. */ |
| aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 16), |
| aarch64_get_reg_u16 (cpu, rt, NO_SP)); |
| } |
| |
| /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address; |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit store short scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| /* rn may reference SP, rm and rt must reference ZR */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension); |
| uint64_t displacement = OPT_SCALE (extended, 16, scaling); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u16 (cpu, address + displacement, |
| aarch64_get_reg_u16 (cpu, rt, NO_SP)); |
| } |
| |
| /* Prefetch unsigned 12 bit. */ |
| static void |
| prfm_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, |
| 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, |
| 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, |
| 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, |
| 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, |
| 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, |
| ow ==> UNALLOC |
| PrfOp prfop = prfop (instr, 4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + SCALE (offset, 64). */ |
| |
| /* TODO : implement prefetch of address. */ |
| } |
| |
| /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */ |
| static void |
| prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, |
| 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, |
| 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, |
| 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, |
| 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, |
| 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, |
| ow ==> UNALLOC |
| rn may reference SP, rm may only reference ZR |
| PrfOp prfop = prfop (instr, 4, 0); |
| uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| uint64_t displacement = OPT_SCALE (extended, 64, scaling); |
| uint64_t address = base + displacement. */ |
| |
| /* TODO : implement prefetch of address */ |
| } |
| |
| /* 64 bit pc-relative prefetch. */ |
| static void |
| prfm_pcrel (sim_cpu *cpu, int32_t offset) |
| { |
| /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM, |
| 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM, |
| 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM, |
| 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM, |
| 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM, |
| 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM, |
| ow ==> UNALLOC |
| PrfOp prfop = prfop (instr, 4, 0); |
| uint64_t address = aarch64_get_PC (cpu) + offset. */ |
| |
| /* TODO : implement this */ |
| } |
| |
| /* Load-store exclusive. */ |
| |
| static void |
| ldxr (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int size = INSTR (31, 30); |
| /* int ordered = INSTR (15, 15); */ |
| /* int exclusive = ! INSTR (23, 23); */ |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (size) |
| { |
| case 0: |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address)); |
| break; |
| case 1: |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address)); |
| break; |
| case 2: |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address)); |
| break; |
| case 3: |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address)); |
| break; |
| } |
| } |
| |
| static void |
| stxr (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rt = INSTR (4, 0); |
| unsigned rs = INSTR (20, 16); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int size = INSTR (31, 30); |
| uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP); |
| |
| switch (size) |
| { |
| case 0: aarch64_set_mem_u8 (cpu, address, data); break; |
| case 1: aarch64_set_mem_u16 (cpu, address, data); break; |
| case 2: aarch64_set_mem_u32 (cpu, address, data); break; |
| case 3: aarch64_set_mem_u64 (cpu, address, data); break; |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */ |
| } |
| |
| static void |
| dexLoadLiteral (sim_cpu *cpu) |
| { |
| /* instr[29,27] == 011 |
| instr[25,24] == 00 |
| instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS |
| 010 ==> LDRX, 011 ==> FLDRD |
| 100 ==> LDRSW, 101 ==> FLDRQ |
| 110 ==> PRFM, 111 ==> UNALLOC |
| instr[26] ==> V : 0 ==> GReg, 1 ==> FReg |
| instr[23, 5] == simm19 */ |
| |
| /* unsigned rt = INSTR (4, 0); */ |
| uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26); |
| int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5); |
| |
| switch (dispatch) |
| { |
| case 0: ldr32_pcrel (cpu, imm); break; |
| case 1: fldrs_pcrel (cpu, imm); break; |
| case 2: ldr_pcrel (cpu, imm); break; |
| case 3: fldrd_pcrel (cpu, imm); break; |
| case 4: ldrsw_pcrel (cpu, imm); break; |
| case 5: fldrq_pcrel (cpu, imm); break; |
| case 6: prfm_pcrel (cpu, imm); break; |
| case 7: |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* Immediate arithmetic |
| The aimm argument is a 12 bit unsigned value or a 12 bit unsigned |
| value left shifted by 12 bits (done at decode). |
| |
| N.B. the register args (dest, source) can normally be Xn or SP. |
| the exception occurs for flag setting instructions which may |
| only use Xn for the output (dest). */ |
| |
| /* 32 bit add immediate. */ |
| static void |
| add32 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm); |
| } |
| |
| /* 64 bit add immediate. */ |
| static void |
| add64 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm); |
| } |
| |
| static void |
| set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2) |
| { |
| int32_t result = value1 + value2; |
| int64_t sresult = (int64_t) value1 + (int64_t) value2; |
| uint64_t uresult = (uint64_t)(uint32_t) value1 |
| + (uint64_t)(uint32_t) value2; |
| uint32_t flags = 0; |
| |
| if (result == 0) |
| flags |= Z; |
| |
| if (result & (1 << 31)) |
| flags |= N; |
| |
| if (uresult != (uint32_t)result) |
| flags |= C; |
| |
| if (sresult != result) |
| flags |= V; |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| #define NEG(a) (((a) & signbit) == signbit) |
| #define POS(a) (((a) & signbit) == 0) |
| |
| static void |
| set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) |
| { |
| uint64_t result = value1 + value2; |
| uint32_t flags = 0; |
| uint64_t signbit = 1ULL << 63; |
| |
| if (result == 0) |
| flags |= Z; |
| |
| if (NEG (result)) |
| flags |= N; |
| |
| if ( (NEG (value1) && NEG (value2)) |
| || (NEG (value1) && POS (result)) |
| || (NEG (value2) && POS (result))) |
| flags |= C; |
| |
| if ( (NEG (value1) && NEG (value2) && POS (result)) |
| || (POS (value1) && POS (value2) && NEG (result))) |
| flags |= V; |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| static void |
| set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2) |
| { |
| uint32_t result = value1 - value2; |
| uint32_t flags = 0; |
| uint32_t signbit = 1U << 31; |
| |
| if (result == 0) |
| flags |= Z; |
| |
| if (NEG (result)) |
| flags |= N; |
| |
| if ( (NEG (value1) && POS (value2)) |
| || (NEG (value1) && POS (result)) |
| || (POS (value2) && POS (result))) |
| flags |= C; |
| |
| if ( (NEG (value1) && POS (value2) && POS (result)) |
| || (POS (value1) && NEG (value2) && NEG (result))) |
| flags |= V; |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| static void |
| set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2) |
| { |
| uint64_t result = value1 - value2; |
| uint32_t flags = 0; |
| uint64_t signbit = 1ULL << 63; |
| |
| if (result == 0) |
| flags |= Z; |
| |
| if (NEG (result)) |
| flags |= N; |
| |
| if ( (NEG (value1) && POS (value2)) |
| || (NEG (value1) && POS (result)) |
| || (POS (value2) && POS (result))) |
| flags |= C; |
| |
| if ( (NEG (value1) && POS (value2) && POS (result)) |
| || (POS (value1) && NEG (value2) && NEG (result))) |
| flags |= V; |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| static void |
| set_flags_for_binop32 (sim_cpu *cpu, uint32_t result) |
| { |
| uint32_t flags = 0; |
| |
| if (result == 0) |
| flags |= Z; |
| else |
| flags &= ~ Z; |
| |
| if (result & (1 << 31)) |
| flags |= N; |
| else |
| flags &= ~ N; |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| static void |
| set_flags_for_binop64 (sim_cpu *cpu, uint64_t result) |
| { |
| uint32_t flags = 0; |
| |
| if (result == 0) |
| flags |= Z; |
| else |
| flags &= ~ Z; |
| |
| if (result & (1ULL << 63)) |
| flags |= N; |
| else |
| flags &= ~ N; |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| /* 32 bit add immediate set flags. */ |
| static void |
| adds32 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| /* TODO : do we need to worry about signs here? */ |
| int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm); |
| set_flags_for_add32 (cpu, value1, aimm); |
| } |
| |
| /* 64 bit add immediate set flags. */ |
| static void |
| adds64 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| uint64_t value2 = aimm; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); |
| set_flags_for_add64 (cpu, value1, value2); |
| } |
| |
| /* 32 bit sub immediate. */ |
| static void |
| sub32 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm); |
| } |
| |
| /* 64 bit sub immediate. */ |
| static void |
| sub64 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm); |
| } |
| |
| /* 32 bit sub immediate set flags. */ |
| static void |
| subs32 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| uint32_t value2 = aimm; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); |
| set_flags_for_sub32 (cpu, value1, value2); |
| } |
| |
| /* 64 bit sub immediate set flags. */ |
| static void |
| subs64 (sim_cpu *cpu, uint32_t aimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| uint32_t value2 = aimm; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); |
| set_flags_for_sub64 (cpu, value1, value2); |
| } |
| |
| /* Data Processing Register. */ |
| |
| /* First two helpers to perform the shift operations. */ |
| |
| static inline uint32_t |
| shifted32 (uint32_t value, Shift shift, uint32_t count) |
| { |
| switch (shift) |
| { |
| default: |
| case LSL: |
| return (value << count); |
| case LSR: |
| return (value >> count); |
| case ASR: |
| { |
| int32_t svalue = value; |
| return (svalue >> count); |
| } |
| case ROR: |
| { |
| uint32_t top = value >> count; |
| uint32_t bottom = value << (32 - count); |
| return (bottom | top); |
| } |
| } |
| } |
| |
| static inline uint64_t |
| shifted64 (uint64_t value, Shift shift, uint32_t count) |
| { |
| switch (shift) |
| { |
| default: |
| case LSL: |
| return (value << count); |
| case LSR: |
| return (value >> count); |
| case ASR: |
| { |
| int64_t svalue = value; |
| return (svalue >> count); |
| } |
| case ROR: |
| { |
| uint64_t top = value >> count; |
| uint64_t bottom = value << (64 - count); |
| return (bottom | top); |
| } |
| } |
| } |
| |
| /* Arithmetic shifted register. |
| These allow an optional LSL, ASR or LSR to the second source |
| register with a count up to the register bit count. |
| |
| N.B register args may not be SP. */ |
| |
| /* 32 bit ADD shifted register. */ |
| static void |
| add32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| shift, count)); |
| } |
| |
| /* 64 bit ADD shifted register. */ |
| static void |
| add64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), |
| shift, count)); |
| } |
| |
| /* 32 bit ADD shifted register setting flags. */ |
| static void |
| adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); |
| set_flags_for_add32 (cpu, value1, value2); |
| } |
| |
| /* 64 bit ADD shifted register setting flags. */ |
| static void |
| adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); |
| set_flags_for_add64 (cpu, value1, value2); |
| } |
| |
| /* 32 bit SUB shifted register. */ |
| static void |
| sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| shift, count)); |
| } |
| |
| /* 64 bit SUB shifted register. */ |
| static void |
| sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), |
| shift, count)); |
| } |
| |
| /* 32 bit SUB shifted register setting flags. */ |
| static void |
| subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); |
| set_flags_for_sub32 (cpu, value1, value2); |
| } |
| |
| /* 64 bit SUB shifted register setting flags. */ |
| static void |
| subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); |
| set_flags_for_sub64 (cpu, value1, value2); |
| } |
| |
| /* First a couple more helpers to fetch the |
| relevant source register element either |
| sign or zero extended as required by the |
| extension value. */ |
| |
| static uint32_t |
| extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension) |
| { |
| switch (extension) |
| { |
| case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); |
| case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); |
| case UXTW: /* Fall through. */ |
| case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP); |
| case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); |
| case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); |
| case SXTW: /* Fall through. */ |
| case SXTX: /* Fall through. */ |
| default: return aarch64_get_reg_s32 (cpu, lo, NO_SP); |
| } |
| } |
| |
| static uint64_t |
| extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension) |
| { |
| switch (extension) |
| { |
| case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP); |
| case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP); |
| case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP); |
| case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP); |
| case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP); |
| case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP); |
| case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP); |
| case SXTX: |
| default: return aarch64_get_reg_s64 (cpu, lo, NO_SP); |
| } |
| } |
| |
| /* Arithmetic extending register |
| These allow an optional sign extension of some portion of the |
| second source register followed by an optional left shift of |
| between 1 and 4 bits (i.e. a shift of 0-4 bits???) |
| |
| N.B output (dest) and first input arg (source) may normally be Xn |
| or SP. However, for flag setting operations dest can only be |
| Xn. Second input registers are always Xn. */ |
| |
| /* 32 bit ADD extending register. */ |
| static void |
| add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u32 (cpu, rn, SP_OK) |
| + (extreg32 (cpu, rm, extension) << shift)); |
| } |
| |
| /* 64 bit ADD extending register. |
| N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ |
| static void |
| add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| + (extreg64 (cpu, rm, extension) << shift)); |
| } |
| |
| /* 32 bit ADD extending register setting flags. */ |
| static void |
| adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); |
| uint32_t value2 = extreg32 (cpu, rm, extension) << shift; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); |
| set_flags_for_add32 (cpu, value1, value2); |
| } |
| |
| /* 64 bit ADD extending register setting flags */ |
| /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ |
| static void |
| adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| uint64_t value2 = extreg64 (cpu, rm, extension) << shift; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2); |
| set_flags_for_add64 (cpu, value1, value2); |
| } |
| |
| /* 32 bit SUB extending register. */ |
| static void |
| sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u32 (cpu, rn, SP_OK) |
| - (extreg32 (cpu, rm, extension) << shift)); |
| } |
| |
| /* 64 bit SUB extending register. */ |
| /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */ |
| static void |
| sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) |
| - (extreg64 (cpu, rm, extension) << shift)); |
| } |
| |
| /* 32 bit SUB extending register setting flags. */ |
| static void |
| subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK); |
| uint32_t value2 = extreg32 (cpu, rm, extension) << shift; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); |
| set_flags_for_sub32 (cpu, value1, value2); |
| } |
| |
| /* 64 bit SUB extending register setting flags */ |
| /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */ |
| static void |
| subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| uint64_t value2 = extreg64 (cpu, rm, extension) << shift; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2); |
| set_flags_for_sub64 (cpu, value1, value2); |
| } |
| |
| static void |
| dexAddSubtractImmediate (sim_cpu *cpu) |
| { |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30] = op : 0 ==> ADD, 1 ==> SUB |
| instr[29] = set : 0 ==> no flags, 1 ==> set flags |
| instr[28,24] = 10001 |
| instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC |
| instr[21,10] = uimm12 |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| /* N.B. the shift is applied at decode before calling the add/sub routine. */ |
| uint32_t shift = INSTR (23, 22); |
| uint32_t imm = INSTR (21, 10); |
| uint32_t dispatch = INSTR (31, 29); |
| |
| NYI_assert (28, 24, 0x11); |
| |
| if (shift > 1) |
| HALT_UNALLOC; |
| |
| if (shift) |
| imm <<= 12; |
| |
| switch (dispatch) |
| { |
| case 0: add32 (cpu, imm); break; |
| case 1: adds32 (cpu, imm); break; |
| case 2: sub32 (cpu, imm); break; |
| case 3: subs32 (cpu, imm); break; |
| case 4: add64 (cpu, imm); break; |
| case 5: adds64 (cpu, imm); break; |
| case 6: sub64 (cpu, imm); break; |
| case 7: subs64 (cpu, imm); break; |
| } |
| } |
| |
| static void |
| dexAddSubtractShiftedRegister (sim_cpu *cpu) |
| { |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS |
| instr[28,24] = 01011 |
| instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC |
| instr[21] = 0 |
| instr[20,16] = Rm |
| instr[15,10] = count : must be 0xxxxx for 32 bit |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| uint32_t size = INSTR (31, 31); |
| uint32_t count = INSTR (15, 10); |
| Shift shiftType = INSTR (23, 22); |
| |
| NYI_assert (28, 24, 0x0B); |
| NYI_assert (21, 21, 0); |
| |
| /* Shift encoded as ROR is unallocated. */ |
| if (shiftType == ROR) |
| HALT_UNALLOC; |
| |
| /* 32 bit operations must have count[5] = 0 |
| or else we have an UNALLOC. */ |
| if (size == 0 && uimm (count, 5, 5)) |
| HALT_UNALLOC; |
| |
| /* Dispatch on size:op i.e instr [31,29]. */ |
| switch (INSTR (31, 29)) |
| { |
| case 0: add32_shift (cpu, shiftType, count); break; |
| case 1: adds32_shift (cpu, shiftType, count); break; |
| case 2: sub32_shift (cpu, shiftType, count); break; |
| case 3: subs32_shift (cpu, shiftType, count); break; |
| case 4: add64_shift (cpu, shiftType, count); break; |
| case 5: adds64_shift (cpu, shiftType, count); break; |
| case 6: sub64_shift (cpu, shiftType, count); break; |
| case 7: subs64_shift (cpu, shiftType, count); break; |
| } |
| } |
| |
| static void |
| dexAddSubtractExtendedRegister (sim_cpu *cpu) |
| { |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30] = op : 0 ==> ADD, 1 ==> SUB |
| instr[29] = set? : 0 ==> no flags, 1 ==> set flags |
| instr[28,24] = 01011 |
| instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC |
| instr[21] = 1 |
| instr[20,16] = Rm |
| instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH, |
| 000 ==> LSL|UXTW, 001 ==> UXTZ, |
| 000 ==> SXTB, 001 ==> SXTH, |
| 000 ==> SXTW, 001 ==> SXTX, |
| instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| Extension extensionType = INSTR (15, 13); |
| uint32_t shift = INSTR (12, 10); |
| |
| NYI_assert (28, 24, 0x0B); |
| NYI_assert (21, 21, 1); |
| |
| /* Shift may not exceed 4. */ |
| if (shift > 4) |
| HALT_UNALLOC; |
| |
| /* Dispatch on size:op:set?. */ |
| switch (INSTR (31, 29)) |
| { |
| case 0: add32_ext (cpu, extensionType, shift); break; |
| case 1: adds32_ext (cpu, extensionType, shift); break; |
| case 2: sub32_ext (cpu, extensionType, shift); break; |
| case 3: subs32_ext (cpu, extensionType, shift); break; |
| case 4: add64_ext (cpu, extensionType, shift); break; |
| case 5: adds64_ext (cpu, extensionType, shift); break; |
| case 6: sub64_ext (cpu, extensionType, shift); break; |
| case 7: subs64_ext (cpu, extensionType, shift); break; |
| } |
| } |
| |
| /* Conditional data processing |
| Condition register is implicit 3rd source. */ |
| |
| /* 32 bit add with carry. */ |
| /* N.B register args may not be SP. */ |
| |
| static void |
| adc32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| + aarch64_get_reg_u32 (cpu, rm, NO_SP) |
| + IS_SET (C)); |
| } |
| |
| /* 64 bit add with carry */ |
| static void |
| adc64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| + aarch64_get_reg_u64 (cpu, rm, NO_SP) |
| + IS_SET (C)); |
| } |
| |
| /* 32 bit add with carry setting flags. */ |
| static void |
| adcs32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); |
| uint32_t carry = IS_SET (C); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); |
| set_flags_for_add32 (cpu, value1, value2 + carry); |
| } |
| |
| /* 64 bit add with carry setting flags. */ |
| static void |
| adcs64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); |
| uint64_t carry = IS_SET (C); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry); |
| set_flags_for_add64 (cpu, value1, value2 + carry); |
| } |
| |
| /* 32 bit sub with carry. */ |
| static void |
| sbc32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */ |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| - aarch64_get_reg_u32 (cpu, rm, NO_SP) |
| - 1 + IS_SET (C)); |
| } |
| |
| /* 64 bit sub with carry */ |
| static void |
| sbc64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| - aarch64_get_reg_u64 (cpu, rm, NO_SP) |
| - 1 + IS_SET (C)); |
| } |
| |
| /* 32 bit sub with carry setting flags */ |
| static void |
| sbcs32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP); |
| uint32_t carry = IS_SET (C); |
| uint32_t result = value1 - value2 + 1 - carry; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| set_flags_for_sub32 (cpu, value1, value2 + 1 - carry); |
| } |
| |
| /* 64 bit sub with carry setting flags */ |
| static void |
| sbcs64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); |
| uint64_t carry = IS_SET (C); |
| uint64_t result = value1 - value2 + 1 - carry; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| set_flags_for_sub64 (cpu, value1, value2 + 1 - carry); |
| } |
| |
| static void |
| dexAddSubtractWithCarry (sim_cpu *cpu) |
| { |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30] = op : 0 ==> ADC, 1 ==> SBC |
| instr[29] = set? : 0 ==> no flags, 1 ==> set flags |
| instr[28,21] = 1 1010 000 |
| instr[20,16] = Rm |
| instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| uint32_t op2 = INSTR (15, 10); |
| |
| NYI_assert (28, 21, 0xD0); |
| |
| if (op2 != 0) |
| HALT_UNALLOC; |
| |
| /* Dispatch on size:op:set?. */ |
| switch (INSTR (31, 29)) |
| { |
| case 0: adc32 (cpu); break; |
| case 1: adcs32 (cpu); break; |
| case 2: sbc32 (cpu); break; |
| case 3: sbcs32 (cpu); break; |
| case 4: adc64 (cpu); break; |
| case 5: adcs64 (cpu); break; |
| case 6: sbc64 (cpu); break; |
| case 7: sbcs64 (cpu); break; |
| } |
| } |
| |
| static uint32_t |
| testConditionCode (sim_cpu *cpu, CondCode cc) |
| { |
| /* This should be reduceable to branchless logic |
| by some careful testing of bits in CC followed |
| by the requisite masking and combining of bits |
| from the flag register. |
| |
| For now we do it with a switch. */ |
| int res; |
| |
| switch (cc) |
| { |
| case EQ: res = IS_SET (Z); break; |
| case NE: res = IS_CLEAR (Z); break; |
| case CS: res = IS_SET (C); break; |
| case CC: res = IS_CLEAR (C); break; |
| case MI: res = IS_SET (N); break; |
| case PL: res = IS_CLEAR (N); break; |
| case VS: res = IS_SET (V); break; |
| case VC: res = IS_CLEAR (V); break; |
| case HI: res = IS_SET (C) && IS_CLEAR (Z); break; |
| case LS: res = IS_CLEAR (C) || IS_SET (Z); break; |
| case GE: res = IS_SET (N) == IS_SET (V); break; |
| case LT: res = IS_SET (N) != IS_SET (V); break; |
| case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break; |
| case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break; |
| case AL: |
| case NV: |
| default: |
| res = 1; |
| break; |
| } |
| return res; |
| } |
| |
| static void |
| CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */ |
| { |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30] = compare with positive (1) or negative value (0) |
| instr[29,21] = 1 1101 0010 |
| instr[20,16] = Rm or const |
| instr[15,12] = cond |
| instr[11] = compare reg (0) or const (1) |
| instr[10] = 0 |
| instr[9,5] = Rn |
| instr[4] = 0 |
| instr[3,0] = value for CPSR bits if the comparison does not take place. */ |
| signed int negate; |
| unsigned rm; |
| unsigned rn; |
| |
| NYI_assert (29, 21, 0x1d2); |
| NYI_assert (10, 10, 0); |
| NYI_assert (4, 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (! testConditionCode (cpu, INSTR (15, 12))) |
| { |
| aarch64_set_CPSR (cpu, INSTR (3, 0)); |
| return; |
| } |
| |
| negate = INSTR (30, 30) ? 1 : -1; |
| rm = INSTR (20, 16); |
| rn = INSTR ( 9, 5); |
| |
| if (INSTR (31, 31)) |
| { |
| if (INSTR (11, 11)) |
| set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), |
| negate * (uint64_t) rm); |
| else |
| set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK), |
| negate * aarch64_get_reg_u64 (cpu, rm, SP_OK)); |
| } |
| else |
| { |
| if (INSTR (11, 11)) |
| set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), |
| negate * rm); |
| else |
| set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK), |
| negate * aarch64_get_reg_u32 (cpu, rm, SP_OK)); |
| } |
| } |
| |
| static void |
| do_vec_MOV_whole_vector (sim_cpu *cpu) |
| { |
| /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm) |
| |
| instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,21] = 001110101 |
| instr[20,16] = Vs |
| instr[15,10] = 000111 |
| instr[9,5] = Vs |
| instr[4,0] = Vd */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| |
| NYI_assert (29, 21, 0x075); |
| NYI_assert (15, 10, 0x07); |
| |
| if (INSTR (20, 16) != vs) |
| HALT_NYI; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (30, 30)) |
| aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1)); |
| |
| aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0)); |
| } |
| |
| static void |
| do_vec_SMOV_into_scalar (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = word(0)/long(1) |
| instr[29,21] = 00 1110 000 |
| instr[20,16] = element size and index |
| instr[15,10] = 00 0010 11 |
| instr[9,5] = V source |
| instr[4,0] = R dest */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned imm5 = INSTR (20, 16); |
| unsigned full = INSTR (30, 30); |
| int size, index; |
| |
| NYI_assert (29, 21, 0x070); |
| NYI_assert (15, 10, 0x0B); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| |
| if (imm5 & 0x1) |
| { |
| size = 0; |
| index = (imm5 >> 1) & 0xF; |
| } |
| else if (imm5 & 0x2) |
| { |
| size = 1; |
| index = (imm5 >> 2) & 0x7; |
| } |
| else if (full && (imm5 & 0x4)) |
| { |
| size = 2; |
| index = (imm5 >> 3) & 0x3; |
| } |
| else |
| HALT_UNALLOC; |
| |
| switch (size) |
| { |
| case 0: |
| if (full) |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, |
| aarch64_get_vec_s8 (cpu, vs, index)); |
| else |
| aarch64_set_reg_s32 (cpu, rd, NO_SP, |
| aarch64_get_vec_s8 (cpu, vs, index)); |
| break; |
| |
| case 1: |
| if (full) |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, |
| aarch64_get_vec_s16 (cpu, vs, index)); |
| else |
| aarch64_set_reg_s32 (cpu, rd, NO_SP, |
| aarch64_get_vec_s16 (cpu, vs, index)); |
| break; |
| |
| case 2: |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, |
| aarch64_get_vec_s32 (cpu, vs, index)); |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_vec_UMOV_into_scalar (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = word(0)/long(1) |
| instr[29,21] = 00 1110 000 |
| instr[20,16] = element size and index |
| instr[15,10] = 00 0011 11 |
| instr[9,5] = V source |
| instr[4,0] = R dest */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned imm5 = INSTR (20, 16); |
| unsigned full = INSTR (30, 30); |
| int size, index; |
| |
| NYI_assert (29, 21, 0x070); |
| NYI_assert (15, 10, 0x0F); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| |
| if (!full) |
| { |
| if (imm5 & 0x1) |
| { |
| size = 0; |
| index = (imm5 >> 1) & 0xF; |
| } |
| else if (imm5 & 0x2) |
| { |
| size = 1; |
| index = (imm5 >> 2) & 0x7; |
| } |
| else if (imm5 & 0x4) |
| { |
| size = 2; |
| index = (imm5 >> 3) & 0x3; |
| } |
| else |
| HALT_UNALLOC; |
| } |
| else if (imm5 & 0x8) |
| { |
| size = 3; |
| index = (imm5 >> 4) & 0x1; |
| } |
| else |
| HALT_UNALLOC; |
| |
| switch (size) |
| { |
| case 0: |
| aarch64_set_reg_u32 (cpu, rd, NO_SP, |
| aarch64_get_vec_u8 (cpu, vs, index)); |
| break; |
| |
| case 1: |
| aarch64_set_reg_u32 (cpu, rd, NO_SP, |
| aarch64_get_vec_u16 (cpu, vs, index)); |
| break; |
| |
| case 2: |
| aarch64_set_reg_u32 (cpu, rd, NO_SP, |
| aarch64_get_vec_u32 (cpu, vs, index)); |
| break; |
| |
| case 3: |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_vec_u64 (cpu, vs, index)); |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_vec_INS (sim_cpu *cpu) |
| { |
| /* instr[31,21] = 01001110000 |
| instr[20,16] = element size and index |
| instr[15,10] = 000111 |
| instr[9,5] = W source |
| instr[4,0] = V dest */ |
| |
| int index; |
| unsigned rs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| |
| NYI_assert (31, 21, 0x270); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (16, 16)) |
| { |
| index = INSTR (20, 17); |
| aarch64_set_vec_u8 (cpu, vd, index, |
| aarch64_get_reg_u8 (cpu, rs, NO_SP)); |
| } |
| else if (INSTR (17, 17)) |
| { |
| index = INSTR (20, 18); |
| aarch64_set_vec_u16 (cpu, vd, index, |
| aarch64_get_reg_u16 (cpu, rs, NO_SP)); |
| } |
| else if (INSTR (18, 18)) |
| { |
| index = INSTR (20, 19); |
| aarch64_set_vec_u32 (cpu, vd, index, |
| aarch64_get_reg_u32 (cpu, rs, NO_SP)); |
| } |
| else if (INSTR (19, 19)) |
| { |
| index = INSTR (20, 20); |
| aarch64_set_vec_u64 (cpu, vd, index, |
| aarch64_get_reg_u64 (cpu, rs, NO_SP)); |
| } |
| else |
| HALT_NYI; |
| } |
| |
| static void |
| do_vec_DUP_vector_into_vector (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,21] = 00 1110 000 |
| instr[20,16] = element size and index |
| instr[15,10] = 0000 01 |
| instr[9,5] = V source |
| instr[4,0] = V dest. */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| int i, index; |
| |
| NYI_assert (29, 21, 0x070); |
| NYI_assert (15, 10, 0x01); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (16, 16)) |
| { |
| index = INSTR (20, 17); |
| |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index)); |
| } |
| else if (INSTR (17, 17)) |
| { |
| index = INSTR (20, 18); |
| |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index)); |
| } |
| else if (INSTR (18, 18)) |
| { |
| index = INSTR (20, 19); |
| |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index)); |
| } |
| else |
| { |
| if (INSTR (19, 19) == 0) |
| HALT_UNALLOC; |
| |
| if (! full) |
| HALT_UNALLOC; |
| |
| index = INSTR (20, 20); |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index)); |
| } |
| } |
| |
| static void |
| do_vec_TBL (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,21] = 00 1110 000 |
| instr[20,16] = Vm |
| instr[15] = 0 |
| instr[14,13] = vec length |
| instr[12,10] = 000 |
| instr[9,5] = V start |
| instr[4,0] = V dest */ |
| |
| int full = INSTR (30, 30); |
| int len = INSTR (14, 13) + 1; |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 21, 0x070); |
| NYI_assert (12, 10, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 16 : 8); i++) |
| { |
| unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i); |
| uint8_t val; |
| |
| if (selector < 16) |
| val = aarch64_get_vec_u8 (cpu, vn, selector); |
| else if (selector < 32) |
| val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16); |
| else if (selector < 48) |
| val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32); |
| else if (selector < 64) |
| val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48); |
| else |
| val = 0; |
| |
| aarch64_set_vec_u8 (cpu, vd, i, val); |
| } |
| } |
| |
| static void |
| do_vec_TRN (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size |
| instr[21] = 0 |
| instr[20,16] = Vm |
| instr[15] = 0 |
| instr[14] = TRN1 (0) / TRN2 (1) |
| instr[13,10] = 1010 |
| instr[9,5] = V source |
| instr[4,0] = V dest. */ |
| |
| int full = INSTR (30, 30); |
| int second = INSTR (14, 14); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (13, 10, 0xA); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| { |
| aarch64_set_vec_u8 |
| (cpu, vd, i * 2, |
| aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2)); |
| aarch64_set_vec_u8 |
| (cpu, vd, 1 * 2 + 1, |
| aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1)); |
| } |
| break; |
| |
| case 1: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| aarch64_set_vec_u16 |
| (cpu, vd, i * 2, |
| aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2)); |
| aarch64_set_vec_u16 |
| (cpu, vd, 1 * 2 + 1, |
| aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1)); |
| } |
| break; |
| |
| case 2: |
| aarch64_set_vec_u32 |
| (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0)); |
| aarch64_set_vec_u32 |
| (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1)); |
| aarch64_set_vec_u32 |
| (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2)); |
| aarch64_set_vec_u32 |
| (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3)); |
| break; |
| |
| case 3: |
| if (! full) |
| HALT_UNALLOC; |
| |
| aarch64_set_vec_u64 (cpu, vd, 0, |
| aarch64_get_vec_u64 (cpu, second ? vm : vn, 0)); |
| aarch64_set_vec_u64 (cpu, vd, 1, |
| aarch64_get_vec_u64 (cpu, second ? vn : vm, 1)); |
| break; |
| } |
| } |
| |
| static void |
| do_vec_DUP_scalar_into_vector (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits |
| [must be 1 for 64-bit xfer] |
| instr[29,20] = 00 1110 0000 |
| instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits, |
| 0100=> 32-bits. 1000=>64-bits |
| instr[15,10] = 0000 11 |
| instr[9,5] = W source |
| instr[4,0] = V dest. */ |
| |
| unsigned i; |
| unsigned Vd = INSTR (4, 0); |
| unsigned Rs = INSTR (9, 5); |
| int both = INSTR (30, 30); |
| |
| NYI_assert (29, 20, 0x0E0); |
| NYI_assert (15, 10, 0x03); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (19, 16)) |
| { |
| case 1: |
| for (i = 0; i < (both ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP)); |
| break; |
| |
| case 2: |
| for (i = 0; i < (both ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP)); |
| break; |
| |
| case 4: |
| for (i = 0; i < (both ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP)); |
| break; |
| |
| case 8: |
| if (!both) |
| HALT_NYI; |
| aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); |
| aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP)); |
| break; |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| do_vec_UZP (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: byte(00), half(01), word (10), long (11) |
| instr[21] = 0 |
| instr[20,16] = Vm |
| instr[15] = 0 |
| instr[14] = lower (0) / upper (1) |
| instr[13,10] = 0110 |
| instr[9,5] = Vn |
| instr[4,0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| int upper = INSTR (14, 14); |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| |
| uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); |
| uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); |
| uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); |
| uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); |
| |
| uint64_t val1; |
| uint64_t val2; |
| |
| uint64_t input2 = full ? val_n2 : val_m1; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 0); |
| NYI_assert (15, 15, 0); |
| NYI_assert (13, 10, 6); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| val1 = (val_n1 >> (upper * 8)) & 0xFFULL; |
| val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL; |
| val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL; |
| val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL; |
| |
| val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL; |
| val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL; |
| val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL; |
| val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; |
| |
| if (full) |
| { |
| val2 = (val_m1 >> (upper * 8)) & 0xFFULL; |
| val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL; |
| val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL; |
| val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL; |
| |
| val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL; |
| val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL; |
| val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL; |
| val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL; |
| } |
| break; |
| |
| case 1: |
| val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL; |
| val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; |
| |
| val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;; |
| val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; |
| |
| if (full) |
| { |
| val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL; |
| val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL; |
| |
| val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL; |
| val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL; |
| } |
| break; |
| |
| case 2: |
| val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF; |
| val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; |
| |
| if (full) |
| { |
| val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF; |
| val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL; |
| } |
| break; |
| |
| case 3: |
| if (! full) |
| HALT_UNALLOC; |
| |
| val1 = upper ? val_n2 : val_n1; |
| val2 = upper ? val_m2 : val_m1; |
| break; |
| } |
| |
| aarch64_set_vec_u64 (cpu, vd, 0, val1); |
| if (full) |
| aarch64_set_vec_u64 (cpu, vd, 1, val2); |
| } |
| |
| static void |
| do_vec_ZIP (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: byte(00), hald(01), word (10), long (11) |
| instr[21] = 0 |
| instr[20,16] = Vm |
| instr[15] = 0 |
| instr[14] = lower (0) / upper (1) |
| instr[13,10] = 1110 |
| instr[9,5] = Vn |
| instr[4,0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| int upper = INSTR (14, 14); |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| |
| uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0); |
| uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1); |
| uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0); |
| uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1); |
| |
| uint64_t val1 = 0; |
| uint64_t val2 = 0; |
| |
| uint64_t input1 = upper ? val_n1 : val_m1; |
| uint64_t input2 = upper ? val_n2 : val_m2; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 0); |
| NYI_assert (15, 15, 0); |
| NYI_assert (13, 10, 0xE); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 23)) |
| { |
| case 0: |
| val1 = |
| ((input1 << 0) & (0xFF << 0)) |
| | ((input2 << 8) & (0xFF << 8)) |
| | ((input1 << 8) & (0xFF << 16)) |
| | ((input2 << 16) & (0xFF << 24)) |
| | ((input1 << 16) & (0xFFULL << 32)) |
| | ((input2 << 24) & (0xFFULL << 40)) |
| | ((input1 << 24) & (0xFFULL << 48)) |
| | ((input2 << 32) & (0xFFULL << 56)); |
| |
| val2 = |
| ((input1 >> 32) & (0xFF << 0)) |
| | ((input2 >> 24) & (0xFF << 8)) |
| | ((input1 >> 24) & (0xFF << 16)) |
| | ((input2 >> 16) & (0xFF << 24)) |
| | ((input1 >> 16) & (0xFFULL << 32)) |
| | ((input2 >> 8) & (0xFFULL << 40)) |
| | ((input1 >> 8) & (0xFFULL << 48)) |
| | ((input2 >> 0) & (0xFFULL << 56)); |
| break; |
| |
| case 1: |
| val1 = |
| ((input1 << 0) & (0xFFFF << 0)) |
| | ((input2 << 16) & (0xFFFF << 16)) |
| | ((input1 << 16) & (0xFFFFULL << 32)) |
| | ((input2 << 32) & (0xFFFFULL << 48)); |
| |
| val2 = |
| ((input1 >> 32) & (0xFFFF << 0)) |
| | ((input2 >> 16) & (0xFFFF << 16)) |
| | ((input1 >> 16) & (0xFFFFULL << 32)) |
| | ((input2 >> 0) & (0xFFFFULL << 48)); |
| break; |
| |
| case 2: |
| val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32); |
| val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32); |
| break; |
| |
| case 3: |
| val1 = input1; |
| val2 = input2; |
| break; |
| } |
| |
| aarch64_set_vec_u64 (cpu, vd, 0, val1); |
| if (full) |
| aarch64_set_vec_u64 (cpu, vd, 1, val2); |
| } |
| |
| /* Floating point immediates are encoded in 8 bits. |
| fpimm[7] = sign bit. |
| fpimm[6:4] = signed exponent. |
| fpimm[3:0] = fraction (assuming leading 1). |
| i.e. F = s * 1.f * 2^(e - b). */ |
| |
| static float |
| fp_immediate_for_encoding_32 (uint32_t imm8) |
| { |
| float u; |
| uint32_t s, e, f, i; |
| |
| s = (imm8 >> 7) & 0x1; |
| e = (imm8 >> 4) & 0x7; |
| f = imm8 & 0xf; |
| |
| /* The fp value is s * n/16 * 2r where n is 16+e. */ |
| u = (16.0 + f) / 16.0; |
| |
| /* N.B. exponent is signed. */ |
| if (e < 4) |
| { |
| int epos = e; |
| |
| for (i = 0; i <= epos; i++) |
| u *= 2.0; |
| } |
| else |
| { |
| int eneg = 7 - e; |
| |
| for (i = 0; i < eneg; i++) |
| u /= 2.0; |
| } |
| |
| if (s) |
| u = - u; |
| |
| return u; |
| } |
| |
| static double |
| fp_immediate_for_encoding_64 (uint32_t imm8) |
| { |
| double u; |
| uint32_t s, e, f, i; |
| |
| s = (imm8 >> 7) & 0x1; |
| e = (imm8 >> 4) & 0x7; |
| f = imm8 & 0xf; |
| |
| /* The fp value is s * n/16 * 2r where n is 16+e. */ |
| u = (16.0 + f) / 16.0; |
| |
| /* N.B. exponent is signed. */ |
| if (e < 4) |
| { |
| int epos = e; |
| |
| for (i = 0; i <= epos; i++) |
| u *= 2.0; |
| } |
| else |
| { |
| int eneg = 7 - e; |
| |
| for (i = 0; i < eneg; i++) |
| u /= 2.0; |
| } |
| |
| if (s) |
| u = - u; |
| |
| return u; |
| } |
| |
| static void |
| do_vec_MOV_immediate (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29,19] = 00111100000 |
| instr[18,16] = high 3 bits of uimm8 |
| instr[15,12] = size & shift: |
| 0000 => 32-bit |
| 0010 => 32-bit + LSL#8 |
| 0100 => 32-bit + LSL#16 |
| 0110 => 32-bit + LSL#24 |
| 1010 => 16-bit + LSL#8 |
| 1000 => 16-bit |
| 1101 => 32-bit + MSL#16 |
| 1100 => 32-bit + MSL#8 |
| 1110 => 8-bit |
| 1111 => double |
| instr[11,10] = 01 |
| instr[9,5] = low 5-bits of uimm8 |
| instr[4,0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| unsigned vd = INSTR (4, 0); |
| unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); |
| unsigned i; |
| |
| NYI_assert (29, 19, 0x1E0); |
| NYI_assert (11, 10, 1); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (15, 12)) |
| { |
| case 0x0: /* 32-bit, no shift. */ |
| case 0x2: /* 32-bit, shift by 8. */ |
| case 0x4: /* 32-bit, shift by 16. */ |
| case 0x6: /* 32-bit, shift by 24. */ |
| val <<= (8 * INSTR (14, 13)); |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, val); |
| break; |
| |
| case 0xa: /* 16-bit, shift by 8. */ |
| val <<= 8; |
| /* Fall through. */ |
| case 0x8: /* 16-bit, no shift. */ |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, val); |
| break; |
| |
| case 0xd: /* 32-bit, mask shift by 16. */ |
| val <<= 8; |
| val |= 0xFF; |
| /* Fall through. */ |
| case 0xc: /* 32-bit, mask shift by 8. */ |
| val <<= 8; |
| val |= 0xFF; |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, val); |
| break; |
| |
| case 0xe: /* 8-bit, no shift. */ |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, val); |
| break; |
| |
| case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */ |
| { |
| float u = fp_immediate_for_encoding_32 (val); |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, u); |
| break; |
| } |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| do_vec_MVNI (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29,19] = 10111100000 |
| instr[18,16] = high 3 bits of uimm8 |
| instr[15,12] = selector |
| instr[11,10] = 01 |
| instr[9,5] = low 5-bits of uimm8 |
| instr[4,0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| unsigned vd = INSTR (4, 0); |
| unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5); |
| unsigned i; |
| |
| NYI_assert (29, 19, 0x5E0); |
| NYI_assert (11, 10, 1); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (15, 12)) |
| { |
| case 0x0: /* 32-bit, no shift. */ |
| case 0x2: /* 32-bit, shift by 8. */ |
| case 0x4: /* 32-bit, shift by 16. */ |
| case 0x6: /* 32-bit, shift by 24. */ |
| val <<= (8 * INSTR (14, 13)); |
| val = ~ val; |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, val); |
| return; |
| |
| case 0xa: /* 16-bit, 8 bit shift. */ |
| val <<= 8; |
| case 0x8: /* 16-bit, no shift. */ |
| val = ~ val; |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, val); |
| return; |
| |
| case 0xd: /* 32-bit, mask shift by 16. */ |
| val <<= 8; |
| val |= 0xFF; |
| case 0xc: /* 32-bit, mask shift by 8. */ |
| val <<= 8; |
| val |= 0xFF; |
| val = ~ val; |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, val); |
| return; |
| |
| case 0xE: /* MOVI Dn, #mask64 */ |
| { |
| uint64_t mask = 0; |
| |
| for (i = 0; i < 8; i++) |
| if (val & (1 << i)) |
| mask |= (0xFFUL << (i * 8)); |
| aarch64_set_vec_u64 (cpu, vd, 0, mask); |
| aarch64_set_vec_u64 (cpu, vd, 1, mask); |
| return; |
| } |
| |
| case 0xf: /* FMOV Vd.2D, #fpimm. */ |
| { |
| double u = fp_immediate_for_encoding_64 (val); |
| |
| if (! full) |
| HALT_UNALLOC; |
| |
| aarch64_set_vec_double (cpu, vd, 0, u); |
| aarch64_set_vec_double (cpu, vd, 1, u); |
| return; |
| } |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| #define ABS(A) ((A) < 0 ? - (A) : (A)) |
| |
| static void |
| do_vec_ABS (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit |
| instr[21,10] = 10 0000 1011 10 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 10, 0x82E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_s8 (cpu, vd, i, |
| ABS (aarch64_get_vec_s8 (cpu, vn, i))); |
| break; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_s16 (cpu, vd, i, |
| ABS (aarch64_get_vec_s16 (cpu, vn, i))); |
| break; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_s32 (cpu, vd, i, |
| ABS (aarch64_get_vec_s32 (cpu, vn, i))); |
| break; |
| |
| case 3: |
| if (! full) |
| HALT_NYI; |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_s64 (cpu, vd, i, |
| ABS (aarch64_get_vec_s64 (cpu, vn, i))); |
| break; |
| } |
| } |
| |
| static void |
| do_vec_ADDV (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit |
| instr[21,10] = 11 0001 1011 10 |
| instr[9,5] = Vm |
| instr[4.0] = Rd. */ |
| |
| unsigned vm = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 10, 0xC6E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| { |
| uint8_t val = 0; |
| for (i = 0; i < (full ? 16 : 8); i++) |
| val += aarch64_get_vec_u8 (cpu, vm, i); |
| aarch64_set_vec_u64 (cpu, rd, 0, val); |
| return; |
| } |
| |
| case 1: |
| { |
| uint16_t val = 0; |
| for (i = 0; i < (full ? 8 : 4); i++) |
| val += aarch64_get_vec_u16 (cpu, vm, i); |
| aarch64_set_vec_u64 (cpu, rd, 0, val); |
| return; |
| } |
| |
| case 2: |
| { |
| uint32_t val = 0; |
| if (! full) |
| HALT_UNALLOC; |
| for (i = 0; i < 4; i++) |
| val += aarch64_get_vec_u32 (cpu, vm, i); |
| aarch64_set_vec_u64 (cpu, rd, 0, val); |
| return; |
| } |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_vec_ins_2 (sim_cpu *cpu) |
| { |
| /* instr[31,21] = 01001110000 |
| instr[20,18] = size & element selector |
| instr[17,14] = 0000 |
| instr[13] = direction: to vec(0), from vec (1) |
| instr[12,10] = 111 |
| instr[9,5] = Vm |
| instr[4,0] = Vd. */ |
| |
| unsigned elem; |
| unsigned vm = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| |
| NYI_assert (31, 21, 0x270); |
| NYI_assert (17, 14, 0); |
| NYI_assert (12, 10, 7); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (13, 13) == 1) |
| { |
| if (INSTR (18, 18) == 1) |
| { |
| /* 32-bit moves. */ |
| elem = INSTR (20, 19); |
| aarch64_set_reg_u64 (cpu, vd, NO_SP, |
| aarch64_get_vec_u32 (cpu, vm, elem)); |
| } |
| else |
| { |
| /* 64-bit moves. */ |
| if (INSTR (19, 19) != 1) |
| HALT_NYI; |
| |
| elem = INSTR (20, 20); |
| aarch64_set_reg_u64 (cpu, vd, NO_SP, |
| aarch64_get_vec_u64 (cpu, vm, elem)); |
| } |
| } |
| else |
| { |
| if (INSTR (18, 18) == 1) |
| { |
| /* 32-bit moves. */ |
| elem = INSTR (20, 19); |
| aarch64_set_vec_u32 (cpu, vd, elem, |
| aarch64_get_reg_u32 (cpu, vm, NO_SP)); |
| } |
| else |
| { |
| /* 64-bit moves. */ |
| if (INSTR (19, 19) != 1) |
| HALT_NYI; |
| |
| elem = INSTR (20, 20); |
| aarch64_set_vec_u64 (cpu, vd, elem, |
| aarch64_get_reg_u64 (cpu, vm, NO_SP)); |
| } |
| } |
| } |
| |
| #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \ |
| do \ |
| { \ |
| DST_TYPE a[N], b[N]; \ |
| \ |
| for (i = 0; i < (N); i++) \ |
| { \ |
| a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \ |
| b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \ |
| } \ |
| for (i = 0; i < (N); i++) \ |
| aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \ |
| } \ |
| while (0) |
| |
| static void |
| do_vec_mull (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = lower(0)/upper(1) selector |
| instr[29] = signed(0)/unsigned(1) |
| instr[28,24] = 0 1110 |
| instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10) |
| instr[21] = 1 |
| instr[20,16] = Vm |
| instr[15,10] = 11 0000 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| int unsign = INSTR (29, 29); |
| int bias = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR ( 9, 5); |
| unsigned vd = INSTR ( 4, 0); |
| unsigned i; |
| |
| NYI_assert (28, 24, 0x0E); |
| NYI_assert (15, 10, 0x30); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* NB: Read source values before writing results, in case |
| the source and destination vectors are the same. */ |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| if (bias) |
| bias = 8; |
| if (unsign) |
| DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16); |
| else |
| DO_VEC_WIDENING_MUL (8, int16_t, s8, s16); |
| return; |
| |
| case 1: |
| if (bias) |
| bias = 4; |
| if (unsign) |
| DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32); |
| else |
| DO_VEC_WIDENING_MUL (4, int32_t, s16, s32); |
| return; |
| |
| case 2: |
| if (bias) |
| bias = 2; |
| if (unsign) |
| DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64); |
| else |
| DO_VEC_WIDENING_MUL (2, int64_t, s32, s64); |
| return; |
| |
| case 3: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| do_vec_fadd (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 001110 |
| instr[23] = FADD(0)/FSUB(1) |
| instr[22] = float (0)/double(1) |
| instr[21] = 1 |
| instr[20,16] = Vm |
| instr[15,10] = 110101 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x35); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (23, 23)) |
| { |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_NYI; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| aarch64_get_vec_double (cpu, vn, i) |
| - aarch64_get_vec_double (cpu, vm, i)); |
| } |
| else |
| { |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| aarch64_get_vec_float (cpu, vn, i) |
| - aarch64_get_vec_float (cpu, vm, i)); |
| } |
| } |
| else |
| { |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_NYI; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| aarch64_get_vec_double (cpu, vm, i) |
| + aarch64_get_vec_double (cpu, vn, i)); |
| } |
| else |
| { |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| aarch64_get_vec_float (cpu, vm, i) |
| + aarch64_get_vec_float (cpu, vn, i)); |
| } |
| } |
| } |
| |
| static void |
| do_vec_add (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29,24] = 001110 |
| instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit |
| instr[21] = 1 |
| instr[20,16] = Vn |
| instr[15,10] = 100001 |
| instr[9,5] = Vm |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x21); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) |
| + aarch64_get_vec_u8 (cpu, vm, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) |
| + aarch64_get_vec_u16 (cpu, vm, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) |
| + aarch64_get_vec_u32 (cpu, vm, i)); |
| return; |
| |
| case 3: |
| if (! full) |
| HALT_UNALLOC; |
| aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0) |
| + aarch64_get_vec_u64 (cpu, vm, 0)); |
| aarch64_set_vec_u64 (cpu, vd, 1, |
| aarch64_get_vec_u64 (cpu, vn, 1) |
| + aarch64_get_vec_u64 (cpu, vm, 1)); |
| return; |
| } |
| } |
| |
| static void |
| do_vec_mul (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit |
| instr[21] = 1 |
| instr[20,16] = Vn |
| instr[15,10] = 10 0111 |
| instr[9,5] = Vm |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| int bias = 0; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x27); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8); |
| return; |
| |
| case 1: |
| DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16); |
| return; |
| |
| case 2: |
| DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32); |
| return; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_vec_MLA (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit |
| instr[21] = 1 |
| instr[20,16] = Vn |
| instr[15,10] = 1001 01 |
| instr[9,5] = Vm |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x25); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vd, i) |
| + (aarch64_get_vec_u8 (cpu, vn, i) |
| * aarch64_get_vec_u8 (cpu, vm, i))); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, |
| aarch64_get_vec_u16 (cpu, vd, i) |
| + (aarch64_get_vec_u16 (cpu, vn, i) |
| * aarch64_get_vec_u16 (cpu, vm, i))); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vd, i) |
| + (aarch64_get_vec_u32 (cpu, vn, i) |
| * aarch64_get_vec_u32 (cpu, vm, i))); |
| return; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static float |
| fmaxnm (float a, float b) |
| { |
| if (! isnan (a)) |
| { |
| if (! isnan (b)) |
| return a > b ? a : b; |
| return a; |
| } |
| else if (! isnan (b)) |
| return b; |
| return a; |
| } |
| |
| static float |
| fminnm (float a, float b) |
| { |
| if (! isnan (a)) |
| { |
| if (! isnan (b)) |
| return a < b ? a : b; |
| return a; |
| } |
| else if (! isnan (b)) |
| return b; |
| return a; |
| } |
| |
| static double |
| dmaxnm (double a, double b) |
| { |
| if (! isnan (a)) |
| { |
| if (! isnan (b)) |
| return a > b ? a : b; |
| return a; |
| } |
| else if (! isnan (b)) |
| return b; |
| return a; |
| } |
| |
| static double |
| dminnm (double a, double b) |
| { |
| if (! isnan (a)) |
| { |
| if (! isnan (b)) |
| return a < b ? a : b; |
| return a; |
| } |
| else if (! isnan (b)) |
| return b; |
| return a; |
| } |
| |
| static void |
| do_vec_FminmaxNMP (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half (0)/full (1) |
| instr [29,24] = 10 1110 |
| instr [23] = max(0)/min(1) |
| instr [22] = float (0)/double (1) |
| instr [21] = 1 |
| instr [20,16] = Vn |
| instr [15,10] = 1100 01 |
| instr [9,5] = Vm |
| instr [4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 24, 0x2E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x31); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| double (* fn)(double, double) = INSTR (23, 23) |
| ? dminnm : dmaxnm; |
| |
| if (! full) |
| HALT_NYI; |
| aarch64_set_vec_double (cpu, vd, 0, |
| fn (aarch64_get_vec_double (cpu, vn, 0), |
| aarch64_get_vec_double (cpu, vn, 1))); |
| aarch64_set_vec_double (cpu, vd, 0, |
| fn (aarch64_get_vec_double (cpu, vm, 0), |
| aarch64_get_vec_double (cpu, vm, 1))); |
| } |
| else |
| { |
| float (* fn)(float, float) = INSTR (23, 23) |
| ? fminnm : fmaxnm; |
| |
| aarch64_set_vec_float (cpu, vd, 0, |
| fn (aarch64_get_vec_float (cpu, vn, 0), |
| aarch64_get_vec_float (cpu, vn, 1))); |
| if (full) |
| aarch64_set_vec_float (cpu, vd, 1, |
| fn (aarch64_get_vec_float (cpu, vn, 2), |
| aarch64_get_vec_float (cpu, vn, 3))); |
| |
| aarch64_set_vec_float (cpu, vd, (full ? 2 : 1), |
| fn (aarch64_get_vec_float (cpu, vm, 0), |
| aarch64_get_vec_float (cpu, vm, 1))); |
| if (full) |
| aarch64_set_vec_float (cpu, vd, 3, |
| fn (aarch64_get_vec_float (cpu, vm, 2), |
| aarch64_get_vec_float (cpu, vm, 3))); |
| } |
| } |
| |
| static void |
| do_vec_AND (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,21] = 001110001 |
| instr[20,16] = Vm |
| instr[15,10] = 000111 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 21, 0x071); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vn, i) |
| & aarch64_get_vec_u32 (cpu, vm, i)); |
| } |
| |
| static void |
| do_vec_BSL (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,21] = 101110011 |
| instr[20,16] = Vm |
| instr[15,10] = 000111 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 21, 0x173); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| ( aarch64_get_vec_u8 (cpu, vd, i) |
| & aarch64_get_vec_u8 (cpu, vn, i)) |
| | ((~ aarch64_get_vec_u8 (cpu, vd, i)) |
| & aarch64_get_vec_u8 (cpu, vm, i))); |
| } |
| |
| static void |
| do_vec_EOR (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,21] = 10 1110 001 |
| instr[20,16] = Vm |
| instr[15,10] = 000111 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 21, 0x171); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vn, i) |
| ^ aarch64_get_vec_u32 (cpu, vm, i)); |
| } |
| |
| static void |
| do_vec_bit (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,23] = 10 1110 1 |
| instr[22] = BIT (0) / BIF (1) |
| instr[21] = 1 |
| instr[20,16] = Vm |
| instr[15,10] = 0001 11 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned test_false = INSTR (22, 22); |
| unsigned i; |
| |
| NYI_assert (29, 23, 0x5D); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i); |
| uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i); |
| uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i); |
| if (test_false) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| (vd_val & vm_val) | (vn_val & ~vm_val)); |
| else |
| aarch64_set_vec_u32 (cpu, vd, i, |
| (vd_val & ~vm_val) | (vn_val & vm_val)); |
| } |
| } |
| |
| static void |
| do_vec_ORN (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,21] = 00 1110 111 |
| instr[20,16] = Vm |
| instr[15,10] = 00 0111 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 21, 0x077); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vn, i) |
| | ~ aarch64_get_vec_u8 (cpu, vm, i)); |
| } |
| |
| static void |
| do_vec_ORR (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,21] = 00 1110 101 |
| instr[20,16] = Vm |
| instr[15,10] = 0001 11 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 21, 0x075); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vn, i) |
| | aarch64_get_vec_u8 (cpu, vm, i)); |
| } |
| |
| static void |
| do_vec_BIC (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,21] = 00 1110 011 |
| instr[20,16] = Vm |
| instr[15,10] = 00 0111 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 21, 0x073); |
| NYI_assert (15, 10, 0x07); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vn, i) |
| & ~ aarch64_get_vec_u8 (cpu, vm, i)); |
| } |
| |
| static void |
| do_vec_XTN (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = first part (0)/ second part (1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: byte(00), half(01), word (10) |
| instr[21,10] = 1000 0100 1010 |
| instr[9,5] = Vs |
| instr[4,0] = Vd. */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned bias = INSTR (30, 30); |
| unsigned i; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 10, 0x84A); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < 8; i++) |
| aarch64_set_vec_u8 (cpu, vd, i + (bias * 8), |
| aarch64_get_vec_u16 (cpu, vs, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < 4; i++) |
| aarch64_set_vec_u16 (cpu, vd, i + (bias * 4), |
| aarch64_get_vec_u32 (cpu, vs, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_u32 (cpu, vd, i + (bias * 2), |
| aarch64_get_vec_u64 (cpu, vs, i)); |
| return; |
| } |
| } |
| |
| /* Return the number of bits set in the input value. */ |
| #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) |
| # define popcount __builtin_popcount |
| #else |
| static int |
| popcount (unsigned char x) |
| { |
| static const unsigned char popcnt[16] = |
| { |
| 0, 1, 1, 2, |
| 1, 2, 2, 3, |
| 1, 2, 2, 3, |
| 2, 3, 3, 4 |
| }; |
| |
| /* Only counts the low 8 bits of the input as that is all we need. */ |
| return popcnt[x % 16] + popcnt[x / 16]; |
| } |
| #endif |
| |
| static void |
| do_vec_CNT (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/ full (1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: byte(00) |
| instr[21,10] = 1000 0001 0110 |
| instr[9,5] = Vs |
| instr[4,0] = Vd. */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| int full = INSTR (30, 30); |
| int size = INSTR (23, 22); |
| int i; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 10, 0x816); |
| |
| if (size != 0) |
| HALT_UNALLOC; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| popcount (aarch64_get_vec_u8 (cpu, vs, i))); |
| } |
| |
| static void |
| do_vec_maxv (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29] = signed (0)/unsigned(1) |
| instr[28,24] = 0 1110 |
| instr[23,22] = size: byte(00), half(01), word (10) |
| instr[21] = 1 |
| instr[20,17] = 1 000 |
| instr[16] = max(0)/min(1) |
| instr[15,10] = 1010 10 |
| instr[9,5] = V source |
| instr[4.0] = R dest. */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| |
| NYI_assert (28, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (20, 17, 8); |
| NYI_assert (15, 10, 0x2A); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch ((INSTR (29, 29) << 1) | INSTR (16, 16)) |
| { |
| case 0: /* SMAXV. */ |
| { |
| int64_t smax; |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| smax = aarch64_get_vec_s8 (cpu, vs, 0); |
| for (i = 1; i < (full ? 16 : 8); i++) |
| smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i)); |
| break; |
| case 1: |
| smax = aarch64_get_vec_s16 (cpu, vs, 0); |
| for (i = 1; i < (full ? 8 : 4); i++) |
| smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i)); |
| break; |
| case 2: |
| smax = aarch64_get_vec_s32 (cpu, vs, 0); |
| for (i = 1; i < (full ? 4 : 2); i++) |
| smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i)); |
| break; |
| case 3: |
| HALT_UNALLOC; |
| } |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, smax); |
| return; |
| } |
| |
| case 1: /* SMINV. */ |
| { |
| int64_t smin; |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| smin = aarch64_get_vec_s8 (cpu, vs, 0); |
| for (i = 1; i < (full ? 16 : 8); i++) |
| smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i)); |
| break; |
| case 1: |
| smin = aarch64_get_vec_s16 (cpu, vs, 0); |
| for (i = 1; i < (full ? 8 : 4); i++) |
| smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i)); |
| break; |
| case 2: |
| smin = aarch64_get_vec_s32 (cpu, vs, 0); |
| for (i = 1; i < (full ? 4 : 2); i++) |
| smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i)); |
| break; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, smin); |
| return; |
| } |
| |
| case 2: /* UMAXV. */ |
| { |
| uint64_t umax; |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| umax = aarch64_get_vec_u8 (cpu, vs, 0); |
| for (i = 1; i < (full ? 16 : 8); i++) |
| umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i)); |
| break; |
| case 1: |
| umax = aarch64_get_vec_u16 (cpu, vs, 0); |
| for (i = 1; i < (full ? 8 : 4); i++) |
| umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i)); |
| break; |
| case 2: |
| umax = aarch64_get_vec_u32 (cpu, vs, 0); |
| for (i = 1; i < (full ? 4 : 2); i++) |
| umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i)); |
| break; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, umax); |
| return; |
| } |
| |
| case 3: /* UMINV. */ |
| { |
| uint64_t umin; |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| umin = aarch64_get_vec_u8 (cpu, vs, 0); |
| for (i = 1; i < (full ? 16 : 8); i++) |
| umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i)); |
| break; |
| case 1: |
| umin = aarch64_get_vec_u16 (cpu, vs, 0); |
| for (i = 1; i < (full ? 8 : 4); i++) |
| umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i)); |
| break; |
| case 2: |
| umin = aarch64_get_vec_u32 (cpu, vs, 0); |
| for (i = 1; i < (full ? 4 : 2); i++) |
| umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i)); |
| break; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, umin); |
| return; |
| } |
| } |
| } |
| |
| static void |
| do_vec_fminmaxV (sim_cpu *cpu) |
| { |
| /* instr[31,24] = 0110 1110 |
| instr[23] = max(0)/min(1) |
| instr[22,14] = 011 0000 11 |
| instr[13,12] = nm(00)/normal(11) |
| instr[11,10] = 10 |
| instr[9,5] = V source |
| instr[4.0] = R dest. */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned i; |
| float res = aarch64_get_vec_float (cpu, vs, 0); |
| |
| NYI_assert (31, 24, 0x6E); |
| NYI_assert (22, 14, 0x0C3); |
| NYI_assert (11, 10, 2); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (23, 23)) |
| { |
| switch (INSTR (13, 12)) |
| { |
| case 0: /* FMNINNMV. */ |
| for (i = 1; i < 4; i++) |
| res = fminnm (res, aarch64_get_vec_float (cpu, vs, i)); |
| break; |
| |
| case 3: /* FMINV. */ |
| for (i = 1; i < 4; i++) |
| res = min (res, aarch64_get_vec_float (cpu, vs, i)); |
| break; |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| else |
| { |
| switch (INSTR (13, 12)) |
| { |
| case 0: /* FMNAXNMV. */ |
| for (i = 1; i < 4; i++) |
| res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i)); |
| break; |
| |
| case 3: /* FMAXV. */ |
| for (i = 1; i < 4; i++) |
| res = max (res, aarch64_get_vec_float (cpu, vs, i)); |
| break; |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| aarch64_set_FP_float (cpu, rd, res); |
| } |
| |
| static void |
| do_vec_Fminmax (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 00 1110 |
| instr[23] = max(0)/min(1) |
| instr[22] = float(0)/double(1) |
| instr[21] = 1 |
| instr[20,16] = Vm |
| instr[15,14] = 11 |
| instr[13,12] = nm(00)/normal(11) |
| instr[11,10] = 01 |
| instr[9,5] = Vn |
| instr[4,0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned min = INSTR (23, 23); |
| unsigned i; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 14, 3); |
| NYI_assert (11, 10, 1); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| double (* func)(double, double); |
| |
| if (! full) |
| HALT_NYI; |
| |
| if (INSTR (13, 12) == 0) |
| func = min ? dminnm : dmaxnm; |
| else if (INSTR (13, 12) == 3) |
| func = min ? fmin : fmax; |
| else |
| HALT_NYI; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| func (aarch64_get_vec_double (cpu, vn, i), |
| aarch64_get_vec_double (cpu, vm, i))); |
| } |
| else |
| { |
| float (* func)(float, float); |
| |
| if (INSTR (13, 12) == 0) |
| func = min ? fminnm : fmaxnm; |
| else if (INSTR (13, 12) == 3) |
| func = min ? fminf : fmaxf; |
| else |
| HALT_NYI; |
| |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| func (aarch64_get_vec_float (cpu, vn, i), |
| aarch64_get_vec_float (cpu, vm, i))); |
| } |
| } |
| |
| static void |
| do_vec_SCVTF (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = Q |
| instr[29,23] = 00 1110 0 |
| instr[22] = float(0)/double(1) |
| instr[21,10] = 10 0001 1101 10 |
| instr[9,5] = Vn |
| instr[4,0] = Vd. */ |
| |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned size = INSTR (22, 22); |
| unsigned i; |
| |
| NYI_assert (29, 23, 0x1C); |
| NYI_assert (21, 10, 0x876); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (size) |
| { |
| if (! full) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| { |
| double val = (double) aarch64_get_vec_u64 (cpu, vn, i); |
| aarch64_set_vec_double (cpu, vd, i, val); |
| } |
| } |
| else |
| { |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| float val = (float) aarch64_get_vec_u32 (cpu, vn, i); |
| aarch64_set_vec_float (cpu, vd, i, val); |
| } |
| } |
| } |
| |
| #define VEC_CMP(SOURCE, CMP) \ |
| do \ |
| { \ |
| switch (size) \ |
| { \ |
| case 0: \ |
| for (i = 0; i < (full ? 16 : 8); i++) \ |
| aarch64_set_vec_u8 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ |
| CMP \ |
| aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \ |
| ? -1 : 0); \ |
| return; \ |
| case 1: \ |
| for (i = 0; i < (full ? 8 : 4); i++) \ |
| aarch64_set_vec_u16 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ |
| CMP \ |
| aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \ |
| ? -1 : 0); \ |
| return; \ |
| case 2: \ |
| for (i = 0; i < (full ? 4 : 2); i++) \ |
| aarch64_set_vec_u32 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ |
| CMP \ |
| aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \ |
| ? -1 : 0); \ |
| return; \ |
| case 3: \ |
| if (! full) \ |
| HALT_UNALLOC; \ |
| for (i = 0; i < 2; i++) \ |
| aarch64_set_vec_u64 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ |
| CMP \ |
| aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \ |
| ? -1ULL : 0); \ |
| return; \ |
| } \ |
| } \ |
| while (0) |
| |
| #define VEC_CMP0(SOURCE, CMP) \ |
| do \ |
| { \ |
| switch (size) \ |
| { \ |
| case 0: \ |
| for (i = 0; i < (full ? 16 : 8); i++) \ |
| aarch64_set_vec_u8 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \ |
| CMP 0 ? -1 : 0); \ |
| return; \ |
| case 1: \ |
| for (i = 0; i < (full ? 8 : 4); i++) \ |
| aarch64_set_vec_u16 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \ |
| CMP 0 ? -1 : 0); \ |
| return; \ |
| case 2: \ |
| for (i = 0; i < (full ? 4 : 2); i++) \ |
| aarch64_set_vec_u32 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \ |
| CMP 0 ? -1 : 0); \ |
| return; \ |
| case 3: \ |
| if (! full) \ |
| HALT_UNALLOC; \ |
| for (i = 0; i < 2; i++) \ |
| aarch64_set_vec_u64 (cpu, vd, i, \ |
| aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \ |
| CMP 0 ? -1ULL : 0); \ |
| return; \ |
| } \ |
| } \ |
| while (0) |
| |
| #define VEC_FCMP0(CMP) \ |
| do \ |
| { \ |
| if (vm != 0) \ |
| HALT_NYI; \ |
| if (INSTR (22, 22)) \ |
| { \ |
| if (! full) \ |
| HALT_NYI; \ |
| for (i = 0; i < 2; i++) \ |
| aarch64_set_vec_u64 (cpu, vd, i, \ |
| aarch64_get_vec_double (cpu, vn, i) \ |
| CMP 0.0 ? -1 : 0); \ |
| } \ |
| else \ |
| { \ |
| for (i = 0; i < (full ? 4 : 2); i++) \ |
| aarch64_set_vec_u32 (cpu, vd, i, \ |
| aarch64_get_vec_float (cpu, vn, i) \ |
| CMP 0.0 ? -1 : 0); \ |
| } \ |
| return; \ |
| } \ |
| while (0) |
| |
| #define VEC_FCMP(CMP) \ |
| do \ |
| { \ |
| if (INSTR (22, 22)) \ |
| { \ |
| if (! full) \ |
| HALT_NYI; \ |
| for (i = 0; i < 2; i++) \ |
| aarch64_set_vec_u64 (cpu, vd, i, \ |
| aarch64_get_vec_double (cpu, vn, i) \ |
| CMP \ |
| aarch64_get_vec_double (cpu, vm, i) \ |
| ? -1 : 0); \ |
| } \ |
| else \ |
| { \ |
| for (i = 0; i < (full ? 4 : 2); i++) \ |
| aarch64_set_vec_u32 (cpu, vd, i, \ |
| aarch64_get_vec_float (cpu, vn, i) \ |
| CMP \ |
| aarch64_get_vec_float (cpu, vm, i) \ |
| ? -1 : 0); \ |
| } \ |
| return; \ |
| } \ |
| while (0) |
| |
| static void |
| do_vec_compare (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29] = part-of-comparison-type |
| instr[28,24] = 0 1110 |
| instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11) |
| type of float compares: single (-0) / double (-1) |
| instr[21] = 1 |
| instr[20,16] = Vm or 00000 (compare vs 0) |
| instr[15,10] = part-of-comparison-type |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| int size = INSTR (23, 22); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (28, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if ((INSTR (11, 11) |
| && INSTR (14, 14)) |
| || ((INSTR (11, 11) == 0 |
| && INSTR (10, 10) == 0))) |
| { |
| /* A compare vs 0. */ |
| if (vm != 0) |
| { |
| if (INSTR (15, 10) == 0x2A) |
| do_vec_maxv (cpu); |
| else if (INSTR (15, 10) == 0x32 |
| || INSTR (15, 10) == 0x3E) |
| do_vec_fminmaxV (cpu); |
| else if (INSTR (29, 23) == 0x1C |
| && INSTR (21, 10) == 0x876) |
| do_vec_SCVTF (cpu); |
| else |
| HALT_NYI; |
| return; |
| } |
| } |
| |
| if (INSTR (14, 14)) |
| { |
| /* A floating point compare. */ |
| unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4) |
| | INSTR (13, 10); |
| |
| NYI_assert (15, 15, 1); |
| |
| switch (decode) |
| { |
| case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>); |
| case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=); |
| case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==); |
| case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=); |
| case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<); |
| case /* 0b111001: GT */ 0x39: VEC_FCMP (>); |
| case /* 0b101001: GE */ 0x29: VEC_FCMP (>=); |
| case /* 0b001001: EQ */ 0x09: VEC_FCMP (==); |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| else |
| { |
| unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10); |
| |
| switch (decode) |
| { |
| case 0x0D: /* 0001101 GT */ VEC_CMP (s, > ); |
| case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= ); |
| case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > ); |
| case 0x23: /* 0100011 TST */ VEC_CMP (u, & ); |
| case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == ); |
| case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < ); |
| case 0x4D: /* 1001101 HI */ VEC_CMP (u, > ); |
| case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= ); |
| case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= ); |
| case 0x63: /* 1100011 EQ */ VEC_CMP (u, == ); |
| case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= ); |
| default: |
| if (vm == 0) |
| HALT_NYI; |
| do_vec_maxv (cpu); |
| } |
| } |
| } |
| |
| static void |
| do_vec_SSHL (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = first part (0)/ second part (1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: byte(00), half(01), word (10), long (11) |
| instr[21] = 1 |
| instr[20,16] = Vm |
| instr[15,10] = 0100 01 |
| instr[9,5] = Vn |
| instr[4,0] = Vd. */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| signed int shift; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x11); |
| |
| /* FIXME: What is a signed shift left in this context ?. */ |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i); |
| if (shift >= 0) |
| aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i * 2); |
| if (shift >= 0) |
| aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i * 4); |
| if (shift >= 0) |
| aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| |
| case 3: |
| if (! full) |
| HALT_UNALLOC; |
| for (i = 0; i < 2; i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i * 8); |
| if (shift >= 0) |
| aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| } |
| } |
| |
| static void |
| do_vec_USHL (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = first part (0)/ second part (1) |
| instr[29,24] = 10 1110 |
| instr[23,22] = size: byte(00), half(01), word (10), long (11) |
| instr[21] = 1 |
| instr[20,16] = Vm |
| instr[15,10] = 0100 01 |
| instr[9,5] = Vn |
| instr[4,0] = Vd */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| signed int shift; |
| |
| NYI_assert (29, 24, 0x2E); |
| NYI_assert (15, 10, 0x11); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i); |
| if (shift >= 0) |
| aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i * 2); |
| if (shift >= 0) |
| aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i * 4); |
| if (shift >= 0) |
| aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| |
| case 3: |
| if (! full) |
| HALT_UNALLOC; |
| for (i = 0; i < 2; i++) |
| { |
| shift = aarch64_get_vec_s8 (cpu, vm, i * 8); |
| if (shift >= 0) |
| aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) |
| << shift); |
| else |
| aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i) |
| >> - shift); |
| } |
| return; |
| } |
| } |
| |
| static void |
| do_vec_FMLA (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29,23] = 0011100 |
| instr[22] = size: 0=>float, 1=>double |
| instr[21] = 1 |
| instr[20,16] = Vn |
| instr[15,10] = 1100 11 |
| instr[9,5] = Vm |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 23, 0x1C); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x33); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_UNALLOC; |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| aarch64_get_vec_double (cpu, vn, i) * |
| aarch64_get_vec_double (cpu, vm, i) + |
| aarch64_get_vec_double (cpu, vd, i)); |
| } |
| else |
| { |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| aarch64_get_vec_float (cpu, vn, i) * |
| aarch64_get_vec_float (cpu, vm, i) + |
| aarch64_get_vec_float (cpu, vd, i)); |
| } |
| } |
| |
| static void |
| do_vec_max (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29] = SMAX (0) / UMAX (1) |
| instr[28,24] = 0 1110 |
| instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit |
| instr[21] = 1 |
| instr[20,16] = Vn |
| instr[15,10] = 0110 01 |
| instr[9,5] = Vm |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (28, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x19); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (29, 29)) |
| { |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vn, i) |
| > aarch64_get_vec_u8 (cpu, vm, i) |
| ? aarch64_get_vec_u8 (cpu, vn, i) |
| : aarch64_get_vec_u8 (cpu, vm, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, |
| aarch64_get_vec_u16 (cpu, vn, i) |
| > aarch64_get_vec_u16 (cpu, vm, i) |
| ? aarch64_get_vec_u16 (cpu, vn, i) |
| : aarch64_get_vec_u16 (cpu, vm, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vn, i) |
| > aarch64_get_vec_u32 (cpu, vm, i) |
| ? aarch64_get_vec_u32 (cpu, vn, i) |
| : aarch64_get_vec_u32 (cpu, vm, i)); |
| return; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| } |
| else |
| { |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_s8 (cpu, vd, i, |
| aarch64_get_vec_s8 (cpu, vn, i) |
| > aarch64_get_vec_s8 (cpu, vm, i) |
| ? aarch64_get_vec_s8 (cpu, vn, i) |
| : aarch64_get_vec_s8 (cpu, vm, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_s16 (cpu, vd, i, |
| aarch64_get_vec_s16 (cpu, vn, i) |
| > aarch64_get_vec_s16 (cpu, vm, i) |
| ? aarch64_get_vec_s16 (cpu, vn, i) |
| : aarch64_get_vec_s16 (cpu, vm, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_s32 (cpu, vd, i, |
| aarch64_get_vec_s32 (cpu, vn, i) |
| > aarch64_get_vec_s32 (cpu, vm, i) |
| ? aarch64_get_vec_s32 (cpu, vn, i) |
| : aarch64_get_vec_s32 (cpu, vm, i)); |
| return; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| } |
| } |
| |
| static void |
| do_vec_min (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half selector |
| instr[29] = SMIN (0) / UMIN (1) |
| instr[28,24] = 0 1110 |
| instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit |
| instr[21] = 1 |
| instr[20,16] = Vn |
| instr[15,10] = 0110 11 |
| instr[9,5] = Vm |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (28, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x1B); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (29, 29)) |
| { |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vn, i) |
| < aarch64_get_vec_u8 (cpu, vm, i) |
| ? aarch64_get_vec_u8 (cpu, vn, i) |
| : aarch64_get_vec_u8 (cpu, vm, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, |
| aarch64_get_vec_u16 (cpu, vn, i) |
| < aarch64_get_vec_u16 (cpu, vm, i) |
| ? aarch64_get_vec_u16 (cpu, vn, i) |
| : aarch64_get_vec_u16 (cpu, vm, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vn, i) |
| < aarch64_get_vec_u32 (cpu, vm, i) |
| ? aarch64_get_vec_u32 (cpu, vn, i) |
| : aarch64_get_vec_u32 (cpu, vm, i)); |
| return; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| } |
| else |
| { |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_s8 (cpu, vd, i, |
| aarch64_get_vec_s8 (cpu, vn, i) |
| < aarch64_get_vec_s8 (cpu, vm, i) |
| ? aarch64_get_vec_s8 (cpu, vn, i) |
| : aarch64_get_vec_s8 (cpu, vm, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_s16 (cpu, vd, i, |
| aarch64_get_vec_s16 (cpu, vn, i) |
| < aarch64_get_vec_s16 (cpu, vm, i) |
| ? aarch64_get_vec_s16 (cpu, vn, i) |
| : aarch64_get_vec_s16 (cpu, vm, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_s32 (cpu, vd, i, |
| aarch64_get_vec_s32 (cpu, vn, i) |
| < aarch64_get_vec_s32 (cpu, vm, i) |
| ? aarch64_get_vec_s32 (cpu, vn, i) |
| : aarch64_get_vec_s32 (cpu, vm, i)); |
| return; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| } |
| } |
| |
| static void |
| do_vec_sub_long (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = lower (0) / upper (1) |
| instr[29] = signed (0) / unsigned (1) |
| instr[28,24] = 0 1110 |
| instr[23,22] = size: bytes (00), half (01), word (10) |
| instr[21] = 1 |
| insrt[20,16] = Vm |
| instr[15,10] = 0010 00 |
| instr[9,5] = Vn |
| instr[4,0] = V dest. */ |
| |
| unsigned size = INSTR (23, 22); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned bias = 0; |
| unsigned i; |
| |
| NYI_assert (28, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x08); |
| |
| if (size == 3) |
| HALT_UNALLOC; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (30, 29)) |
| { |
| case 2: /* SSUBL2. */ |
| bias = 2; |
| case 0: /* SSUBL. */ |
| switch (size) |
| { |
| case 0: |
| bias *= 3; |
| for (i = 0; i < 8; i++) |
| aarch64_set_vec_s16 (cpu, vd, i, |
| aarch64_get_vec_s8 (cpu, vn, i + bias) |
| - aarch64_get_vec_s8 (cpu, vm, i + bias)); |
| break; |
| |
| case 1: |
| bias *= 2; |
| for (i = 0; i < 4; i++) |
| aarch64_set_vec_s32 (cpu, vd, i, |
| aarch64_get_vec_s16 (cpu, vn, i + bias) |
| - aarch64_get_vec_s16 (cpu, vm, i + bias)); |
| break; |
| |
| case 2: |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_s64 (cpu, vd, i, |
| aarch64_get_vec_s32 (cpu, vn, i + bias) |
| - aarch64_get_vec_s32 (cpu, vm, i + bias)); |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| break; |
| |
| case 3: /* USUBL2. */ |
| bias = 2; |
| case 1: /* USUBL. */ |
| switch (size) |
| { |
| case 0: |
| bias *= 3; |
| for (i = 0; i < 8; i++) |
| aarch64_set_vec_u16 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vn, i + bias) |
| - aarch64_get_vec_u8 (cpu, vm, i + bias)); |
| break; |
| |
| case 1: |
| bias *= 2; |
| for (i = 0; i < 4; i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u16 (cpu, vn, i + bias) |
| - aarch64_get_vec_u16 (cpu, vm, i + bias)); |
| break; |
| |
| case 2: |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_u64 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vn, i + bias) |
| - aarch64_get_vec_u32 (cpu, vm, i + bias)); |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| break; |
| } |
| } |
| |
| static void |
| do_vec_ADDP (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 00 1110 |
| instr[23,22] = size: bytes (00), half (01), word (10), long (11) |
| instr[21] = 1 |
| insrt[20,16] = Vm |
| instr[15,10] = 1011 11 |
| instr[9,5] = Vn |
| instr[4,0] = V dest. */ |
| |
| FRegister copy_vn; |
| FRegister copy_vm; |
| unsigned full = INSTR (30, 30); |
| unsigned size = INSTR (23, 22); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i, range; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x2F); |
| |
| /* Make copies of the source registers in case vd == vn/vm. */ |
| copy_vn = cpu->fr[vn]; |
| copy_vm = cpu->fr[vm]; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (size) |
| { |
| case 0: |
| range = full ? 8 : 4; |
| for (i = 0; i < range; i++) |
| { |
| aarch64_set_vec_u8 (cpu, vd, i, |
| copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]); |
| aarch64_set_vec_u8 (cpu, vd, i + range, |
| copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]); |
| } |
| return; |
| |
| case 1: |
| range = full ? 4 : 2; |
| for (i = 0; i < range; i++) |
| { |
| aarch64_set_vec_u16 (cpu, vd, i, |
| copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]); |
| aarch64_set_vec_u16 (cpu, vd, i + range, |
| copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]); |
| } |
| return; |
| |
| case 2: |
| range = full ? 2 : 1; |
| for (i = 0; i < range; i++) |
| { |
| aarch64_set_vec_u32 (cpu, vd, i, |
| copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]); |
| aarch64_set_vec_u32 (cpu, vd, i + range, |
| copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]); |
| } |
| return; |
| |
| case 3: |
| if (! full) |
| HALT_UNALLOC; |
| aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]); |
| aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]); |
| return; |
| } |
| } |
| |
| /* Float point vector convert to longer (precision). */ |
| static void |
| do_vec_FCVTL (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0) / all (1) |
| instr[29,23] = 00 1110 0 |
| instr[22] = single (0) / double (1) |
| instr[21,10] = 10 0001 0111 10 |
| instr[9,5] = Rn |
| instr[4,0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| |
| NYI_assert (31, 31, 0); |
| NYI_assert (29, 23, 0x1C); |
| NYI_assert (21, 10, 0x85E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, rd, i, |
| aarch64_get_vec_float (cpu, rn, i + 2*full)); |
| } |
| else |
| { |
| HALT_NYI; |
| |
| #if 0 |
| /* TODO: Implement missing half-float support. */ |
| for (i = 0; i < 4; i++) |
| aarch64_set_vec_float (cpu, rd, i, |
| aarch64_get_vec_halffloat (cpu, rn, i + 4*full)); |
| #endif |
| } |
| } |
| |
| static void |
| do_vec_FABS (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,23] = 00 1110 1 |
| instr[22] = float(0)/double(1) |
| instr[21,16] = 10 0000 |
| instr[15,10] = 1111 10 |
| instr[9,5] = Vn |
| instr[4,0] = Vd. */ |
| |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| |
| NYI_assert (29, 23, 0x1D); |
| NYI_assert (21, 10, 0x83E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_NYI; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| fabs (aarch64_get_vec_double (cpu, vn, i))); |
| } |
| else |
| { |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| fabsf (aarch64_get_vec_float (cpu, vn, i))); |
| } |
| } |
| |
| static void |
| do_vec_FCVTZS (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0) / all (1) |
| instr[29,23] = 00 1110 1 |
| instr[22] = single (0) / double (1) |
| instr[21,10] = 10 0001 1011 10 |
| instr[9,5] = Rn |
| instr[4,0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| |
| NYI_assert (31, 31, 0); |
| NYI_assert (29, 23, 0x1D); |
| NYI_assert (21, 10, 0x86E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_s64 (cpu, rd, i, |
| (int64_t) aarch64_get_vec_double (cpu, rn, i)); |
| } |
| else |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_s32 (cpu, rd, i, |
| (int32_t) aarch64_get_vec_float (cpu, rn, i)); |
| } |
| |
| static void |
| do_vec_REV64 (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half |
| instr[29,24] = 00 1110 |
| instr[23,22] = size |
| instr[21,10] = 10 0000 0000 10 |
| instr[9,5] = Rn |
| instr[4,0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned size = INSTR (23, 22); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| FRegister val; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 10, 0x802); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (size) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i); |
| break; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i); |
| break; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i); |
| break; |
| |
| case 3: |
| HALT_UNALLOC; |
| } |
| |
| aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); |
| if (full) |
| aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); |
| } |
| |
| static void |
| do_vec_REV16 (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half |
| instr[29,24] = 00 1110 |
| instr[23,22] = size |
| instr[21,10] = 10 0000 0001 10 |
| instr[9,5] = Rn |
| instr[4,0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned size = INSTR (23, 22); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| FRegister val; |
| |
| NYI_assert (29, 24, 0x0E); |
| NYI_assert (21, 10, 0x806); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (size) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i); |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| |
| aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); |
| if (full) |
| aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); |
| } |
| |
| static void |
| do_vec_op1 (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half/full |
| instr[29,24] = 00 1110 |
| instr[23,21] = ??? |
| instr[20,16] = Vm |
| instr[15,10] = sub-opcode |
| instr[9,5] = Vn |
| instr[4,0] = Vd */ |
| NYI_assert (29, 24, 0x0E); |
| |
| if (INSTR (21, 21) == 0) |
| { |
| if (INSTR (23, 22) == 0) |
| { |
| if (INSTR (30, 30) == 1 |
| && INSTR (17, 14) == 0 |
| && INSTR (12, 10) == 7) |
| return do_vec_ins_2 (cpu); |
| |
| switch (INSTR (15, 10)) |
| { |
| case 0x01: do_vec_DUP_vector_into_vector (cpu); return; |
| case 0x03: do_vec_DUP_scalar_into_vector (cpu); return; |
| case 0x07: do_vec_INS (cpu); return; |
| case 0x0B: do_vec_SMOV_into_scalar (cpu); return; |
| case 0x0F: do_vec_UMOV_into_scalar (cpu); return; |
| |
| case 0x00: |
| case 0x08: |
| case 0x10: |
| case 0x18: |
| do_vec_TBL (cpu); return; |
| |
| case 0x06: |
| case 0x16: |
| do_vec_UZP (cpu); return; |
| |
| case 0x0A: do_vec_TRN (cpu); return; |
| |
| case 0x0E: |
| case 0x1E: |
| do_vec_ZIP (cpu); return; |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| switch (INSTR (13, 10)) |
| { |
| case 0x6: do_vec_UZP (cpu); return; |
| case 0xE: do_vec_ZIP (cpu); return; |
| case 0xA: do_vec_TRN (cpu); return; |
| default: HALT_NYI; |
| } |
| } |
| |
| switch (INSTR (15, 10)) |
| { |
| case 0x02: do_vec_REV64 (cpu); return; |
| case 0x06: do_vec_REV16 (cpu); return; |
| |
| case 0x07: |
| switch (INSTR (23, 21)) |
| { |
| case 1: do_vec_AND (cpu); return; |
| case 3: do_vec_BIC (cpu); return; |
| case 5: do_vec_ORR (cpu); return; |
| case 7: do_vec_ORN (cpu); return; |
| default: HALT_NYI; |
| } |
| |
| case 0x08: do_vec_sub_long (cpu); return; |
| case 0x0a: do_vec_XTN (cpu); return; |
| case 0x11: do_vec_SSHL (cpu); return; |
| case 0x16: do_vec_CNT (cpu); return; |
| case 0x19: do_vec_max (cpu); return; |
| case 0x1B: do_vec_min (cpu); return; |
| case 0x21: do_vec_add (cpu); return; |
| case 0x25: do_vec_MLA (cpu); return; |
| case 0x27: do_vec_mul (cpu); return; |
| case 0x2F: do_vec_ADDP (cpu); return; |
| case 0x30: do_vec_mull (cpu); return; |
| case 0x33: do_vec_FMLA (cpu); return; |
| case 0x35: do_vec_fadd (cpu); return; |
| |
| case 0x1E: |
| switch (INSTR (20, 16)) |
| { |
| case 0x01: do_vec_FCVTL (cpu); return; |
| default: HALT_NYI; |
| } |
| |
| case 0x2E: |
| switch (INSTR (20, 16)) |
| { |
| case 0x00: do_vec_ABS (cpu); return; |
| case 0x01: do_vec_FCVTZS (cpu); return; |
| case 0x11: do_vec_ADDV (cpu); return; |
| default: HALT_NYI; |
| } |
| |
| case 0x31: |
| case 0x3B: |
| do_vec_Fminmax (cpu); return; |
| |
| case 0x0D: |
| case 0x0F: |
| case 0x22: |
| case 0x23: |
| case 0x26: |
| case 0x2A: |
| case 0x32: |
| case 0x36: |
| case 0x39: |
| case 0x3A: |
| do_vec_compare (cpu); return; |
| |
| case 0x3E: |
| do_vec_FABS (cpu); return; |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| do_vec_xtl (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11) |
| instr[28,22] = 0 1111 00 |
| instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2) |
| instr[15,10] = 1010 01 |
| instr[9,5] = V source |
| instr[4,0] = V dest. */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i, shift, bias = 0; |
| |
| NYI_assert (28, 22, 0x3C); |
| NYI_assert (15, 10, 0x29); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (30, 29)) |
| { |
| case 2: /* SXTL2, SSHLL2. */ |
| bias = 2; |
| case 0: /* SXTL, SSHLL. */ |
| if (INSTR (21, 21)) |
| { |
| int64_t val1, val2; |
| |
| shift = INSTR (20, 16); |
| /* Get the source values before setting the destination values |
| in case the source and destination are the same. */ |
| val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift; |
| val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift; |
| aarch64_set_vec_s64 (cpu, vd, 0, val1); |
| aarch64_set_vec_s64 (cpu, vd, 1, val2); |
| } |
| else if (INSTR (20, 20)) |
| { |
| int32_t v[4]; |
| int32_t v1,v2,v3,v4; |
| |
| shift = INSTR (19, 16); |
| bias *= 2; |
| for (i = 0; i < 4; i++) |
| v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift; |
| for (i = 0; i < 4; i++) |
| aarch64_set_vec_s32 (cpu, vd, i, v[i]); |
| } |
| else |
| { |
| int16_t v[8]; |
| NYI_assert (19, 19, 1); |
| |
| shift = INSTR (18, 16); |
| bias *= 4; |
| for (i = 0; i < 8; i++) |
| v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift; |
| for (i = 0; i < 8; i++) |
| aarch64_set_vec_s16 (cpu, vd, i, v[i]); |
| } |
| return; |
| |
| case 3: /* UXTL2, USHLL2. */ |
| bias = 2; |
| case 1: /* UXTL, USHLL. */ |
| if (INSTR (21, 21)) |
| { |
| uint64_t v1, v2; |
| shift = INSTR (20, 16); |
| v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift; |
| v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift; |
| aarch64_set_vec_u64 (cpu, vd, 0, v1); |
| aarch64_set_vec_u64 (cpu, vd, 1, v2); |
| } |
| else if (INSTR (20, 20)) |
| { |
| uint32_t v[4]; |
| shift = INSTR (19, 16); |
| bias *= 2; |
| for (i = 0; i < 4; i++) |
| v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift; |
| for (i = 0; i < 4; i++) |
| aarch64_set_vec_u32 (cpu, vd, i, v[i]); |
| } |
| else |
| { |
| uint16_t v[8]; |
| NYI_assert (19, 19, 1); |
| |
| shift = INSTR (18, 16); |
| bias *= 4; |
| for (i = 0; i < 8; i++) |
| v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift; |
| for (i = 0; i < 8; i++) |
| aarch64_set_vec_u16 (cpu, vd, i, v[i]); |
| } |
| return; |
| } |
| } |
| |
| static void |
| do_vec_SHL (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half(0)/full(1) |
| instr [29,23] = 001 1110 |
| instr [22,16] = size and shift amount |
| instr [15,10] = 01 0101 |
| instr [9, 5] = Vs |
| instr [4, 0] = Vd. */ |
| |
| int shift; |
| int full = INSTR (30, 30); |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 23, 0x1E); |
| NYI_assert (15, 10, 0x15); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| shift = INSTR (21, 16); |
| |
| if (full == 0) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| { |
| uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); |
| aarch64_set_vec_u64 (cpu, vd, i, val << shift); |
| } |
| |
| return; |
| } |
| |
| if (INSTR (21, 21)) |
| { |
| shift = INSTR (20, 16); |
| |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); |
| aarch64_set_vec_u32 (cpu, vd, i, val << shift); |
| } |
| |
| return; |
| } |
| |
| if (INSTR (20, 20)) |
| { |
| shift = INSTR (19, 16); |
| |
| for (i = 0; i < (full ? 8 : 4); i++) |
| { |
| uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); |
| aarch64_set_vec_u16 (cpu, vd, i, val << shift); |
| } |
| |
| return; |
| } |
| |
| if (INSTR (19, 19) == 0) |
| HALT_UNALLOC; |
| |
| shift = INSTR (18, 16); |
| |
| for (i = 0; i < (full ? 16 : 8); i++) |
| { |
| uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); |
| aarch64_set_vec_u8 (cpu, vd, i, val << shift); |
| } |
| } |
| |
| static void |
| do_vec_SSHR_USHR (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half(0)/full(1) |
| instr [29] = signed(0)/unsigned(1) |
| instr [28,23] = 0 1111 0 |
| instr [22,16] = size and shift amount |
| instr [15,10] = 0000 01 |
| instr [9, 5] = Vs |
| instr [4, 0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| int sign = ! INSTR (29, 29); |
| unsigned shift = INSTR (22, 16); |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (28, 23, 0x1E); |
| NYI_assert (15, 10, 0x01); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| shift = 128 - shift; |
| |
| if (full == 0) |
| HALT_UNALLOC; |
| |
| if (sign) |
| for (i = 0; i < 2; i++) |
| { |
| int64_t val = aarch64_get_vec_s64 (cpu, vs, i); |
| aarch64_set_vec_s64 (cpu, vd, i, val >> shift); |
| } |
| else |
| for (i = 0; i < 2; i++) |
| { |
| uint64_t val = aarch64_get_vec_u64 (cpu, vs, i); |
| aarch64_set_vec_u64 (cpu, vd, i, val >> shift); |
| } |
| |
| return; |
| } |
| |
| if (INSTR (21, 21)) |
| { |
| shift = 64 - shift; |
| |
| if (sign) |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| int32_t val = aarch64_get_vec_s32 (cpu, vs, i); |
| aarch64_set_vec_s32 (cpu, vd, i, val >> shift); |
| } |
| else |
| for (i = 0; i < (full ? 4 : 2); i++) |
| { |
| uint32_t val = aarch64_get_vec_u32 (cpu, vs, i); |
| aarch64_set_vec_u32 (cpu, vd, i, val >> shift); |
| } |
| |
| return; |
| } |
| |
| if (INSTR (20, 20)) |
| { |
| shift = 32 - shift; |
| |
| if (sign) |
| for (i = 0; i < (full ? 8 : 4); i++) |
| { |
| int16_t val = aarch64_get_vec_s16 (cpu, vs, i); |
| aarch64_set_vec_s16 (cpu, vd, i, val >> shift); |
| } |
| else |
| for (i = 0; i < (full ? 8 : 4); i++) |
| { |
| uint16_t val = aarch64_get_vec_u16 (cpu, vs, i); |
| aarch64_set_vec_u16 (cpu, vd, i, val >> shift); |
| } |
| |
| return; |
| } |
| |
| if (INSTR (19, 19) == 0) |
| HALT_UNALLOC; |
| |
| shift = 16 - shift; |
| |
| if (sign) |
| for (i = 0; i < (full ? 16 : 8); i++) |
| { |
| int8_t val = aarch64_get_vec_s8 (cpu, vs, i); |
| aarch64_set_vec_s8 (cpu, vd, i, val >> shift); |
| } |
| else |
| for (i = 0; i < (full ? 16 : 8); i++) |
| { |
| uint8_t val = aarch64_get_vec_u8 (cpu, vs, i); |
| aarch64_set_vec_u8 (cpu, vd, i, val >> shift); |
| } |
| } |
| |
| static void |
| do_vec_MUL_by_element (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half/full |
| instr[29,24] = 00 1111 |
| instr[23,22] = size |
| instr[21] = L |
| instr[20] = M |
| instr[19,16] = m |
| instr[15,12] = 1000 |
| instr[11] = H |
| instr[10] = 0 |
| instr[9,5] = Vn |
| instr[4,0] = Vd */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned L = INSTR (21, 21); |
| unsigned H = INSTR (11, 11); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned size = INSTR (23, 22); |
| unsigned index; |
| unsigned vm; |
| unsigned e; |
| |
| NYI_assert (29, 24, 0x0F); |
| NYI_assert (15, 12, 0x8); |
| NYI_assert (10, 10, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (size) |
| { |
| case 1: |
| { |
| /* 16 bit products. */ |
| uint16_t product; |
| uint16_t element1; |
| uint16_t element2; |
| |
| index = (H << 2) | (L << 1) | INSTR (20, 20); |
| vm = INSTR (19, 16); |
| element2 = aarch64_get_vec_u16 (cpu, vm, index); |
| |
| for (e = 0; e < (full ? 8 : 4); e ++) |
| { |
| element1 = aarch64_get_vec_u16 (cpu, vn, e); |
| product = element1 * element2; |
| aarch64_set_vec_u16 (cpu, vd, e, product); |
| } |
| } |
| break; |
| |
| case 2: |
| { |
| /* 32 bit products. */ |
| uint32_t product; |
| uint32_t element1; |
| uint32_t element2; |
| |
| index = (H << 1) | L; |
| vm = INSTR (20, 16); |
| element2 = aarch64_get_vec_u32 (cpu, vm, index); |
| |
| for (e = 0; e < (full ? 4 : 2); e ++) |
| { |
| element1 = aarch64_get_vec_u32 (cpu, vn, e); |
| product = element1 * element2; |
| aarch64_set_vec_u32 (cpu, vd, e, product); |
| } |
| } |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_FMLA_by_element (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half/full |
| instr[29,23] = 00 1111 1 |
| instr[22] = size |
| instr[21] = L |
| instr[20,16] = m |
| instr[15,12] = 0001 |
| instr[11] = H |
| instr[10] = 0 |
| instr[9,5] = Vn |
| instr[4,0] = Vd */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned size = INSTR (22, 22); |
| unsigned L = INSTR (21, 21); |
| unsigned vm = INSTR (20, 16); |
| unsigned H = INSTR (11, 11); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned e; |
| |
| NYI_assert (29, 23, 0x1F); |
| NYI_assert (15, 12, 0x1); |
| NYI_assert (10, 10, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (size) |
| { |
| double element1, element2; |
| |
| if (! full || L) |
| HALT_UNALLOC; |
| |
| element2 = aarch64_get_vec_double (cpu, vm, H); |
| |
| for (e = 0; e < 2; e++) |
| { |
| element1 = aarch64_get_vec_double (cpu, vn, e); |
| element1 *= element2; |
| element1 += aarch64_get_vec_double (cpu, vd, e); |
| aarch64_set_vec_double (cpu, vd, e, element1); |
| } |
| } |
| else |
| { |
| float element1; |
| float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L); |
| |
| for (e = 0; e < (full ? 4 : 2); e++) |
| { |
| element1 = aarch64_get_vec_float (cpu, vn, e); |
| element1 *= element2; |
| element1 += aarch64_get_vec_float (cpu, vd, e); |
| aarch64_set_vec_float (cpu, vd, e, element1); |
| } |
| } |
| } |
| |
| static void |
| do_vec_op2 (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half/full |
| instr[29,24] = 00 1111 |
| instr[23] = ? |
| instr[22,16] = element size & index |
| instr[15,10] = sub-opcode |
| instr[9,5] = Vm |
| instr[4,0] = Vd */ |
| |
| NYI_assert (29, 24, 0x0F); |
| |
| if (INSTR (23, 23) != 0) |
| { |
| switch (INSTR (15, 10)) |
| { |
| case 0x04: |
| case 0x06: |
| do_FMLA_by_element (cpu); |
| return; |
| |
| case 0x20: |
| case 0x22: |
| do_vec_MUL_by_element (cpu); |
| return; |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| else |
| { |
| switch (INSTR (15, 10)) |
| { |
| case 0x01: do_vec_SSHR_USHR (cpu); return; |
| case 0x15: do_vec_SHL (cpu); return; |
| case 0x20: |
| case 0x22: do_vec_MUL_by_element (cpu); return; |
| case 0x29: do_vec_xtl (cpu); return; |
| default: HALT_NYI; |
| } |
| } |
| } |
| |
| static void |
| do_vec_neg (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full(1)/half(0) |
| instr[29,24] = 10 1110 |
| instr[23,22] = size: byte(00), half (01), word (10), long (11) |
| instr[21,10] = 1000 0010 1110 |
| instr[9,5] = Vs |
| instr[4,0] = Vd */ |
| |
| int full = INSTR (30, 30); |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 24, 0x2E); |
| NYI_assert (21, 10, 0x82E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i)); |
| return; |
| |
| case 3: |
| if (! full) |
| HALT_NYI; |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i)); |
| return; |
| } |
| } |
| |
| static void |
| do_vec_sqrt (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full(1)/half(0) |
| instr[29,23] = 101 1101 |
| instr[22] = single(0)/double(1) |
| instr[21,10] = 1000 0111 1110 |
| instr[9,5] = Vs |
| instr[4,0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 23, 0x5B); |
| NYI_assert (21, 10, 0x87E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22) == 0) |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| sqrtf (aarch64_get_vec_float (cpu, vs, i))); |
| else |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| sqrt (aarch64_get_vec_double (cpu, vs, i))); |
| } |
| |
| static void |
| do_vec_mls_indexed (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,24] = 10 1111 |
| instr[23,22] = 16-bit(01)/32-bit(10) |
| instr[21,20+11] = index (if 16-bit) |
| instr[21+11] = index (if 32-bit) |
| instr[20,16] = Vm |
| instr[15,12] = 0100 |
| instr[11] = part of index |
| instr[10] = 0 |
| instr[9,5] = Vs |
| instr[4,0] = Vd. */ |
| |
| int full = INSTR (30, 30); |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned vm = INSTR (20, 16); |
| unsigned i; |
| |
| NYI_assert (15, 12, 4); |
| NYI_assert (10, 10, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 1: |
| { |
| unsigned elem; |
| uint32_t val; |
| |
| if (vm > 15) |
| HALT_NYI; |
| |
| elem = (INSTR (21, 20) << 1) | INSTR (11, 11); |
| val = aarch64_get_vec_u16 (cpu, vm, elem); |
| |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vd, i) - |
| (aarch64_get_vec_u32 (cpu, vs, i) * val)); |
| return; |
| } |
| |
| case 2: |
| { |
| unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11); |
| uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem); |
| |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u64 (cpu, vd, i, |
| aarch64_get_vec_u64 (cpu, vd, i) - |
| (aarch64_get_vec_u64 (cpu, vs, i) * val)); |
| return; |
| } |
| |
| case 0: |
| case 3: |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| do_vec_SUB (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half(0)/full(1) |
| instr [29,24] = 10 1110 |
| instr [23,22] = size: byte(00, half(01), word (10), long (11) |
| instr [21] = 1 |
| instr [20,16] = Vm |
| instr [15,10] = 10 0001 |
| instr [9, 5] = Vn |
| instr [4, 0] = Vd. */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 24, 0x2E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x21); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_s8 (cpu, vd, i, |
| aarch64_get_vec_s8 (cpu, vn, i) |
| - aarch64_get_vec_s8 (cpu, vm, i)); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_s16 (cpu, vd, i, |
| aarch64_get_vec_s16 (cpu, vn, i) |
| - aarch64_get_vec_s16 (cpu, vm, i)); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_s32 (cpu, vd, i, |
| aarch64_get_vec_s32 (cpu, vn, i) |
| - aarch64_get_vec_s32 (cpu, vm, i)); |
| return; |
| |
| case 3: |
| if (full == 0) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_s64 (cpu, vd, i, |
| aarch64_get_vec_s64 (cpu, vn, i) |
| - aarch64_get_vec_s64 (cpu, vm, i)); |
| return; |
| } |
| } |
| |
| static void |
| do_vec_MLS (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half(0)/full(1) |
| instr [29,24] = 10 1110 |
| instr [23,22] = size: byte(00, half(01), word (10) |
| instr [21] = 1 |
| instr [20,16] = Vm |
| instr [15,10] = 10 0101 |
| instr [9, 5] = Vn |
| instr [4, 0] = Vd. */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 24, 0x2E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x25); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, |
| aarch64_get_vec_u8 (cpu, vd, i) |
| - (aarch64_get_vec_u8 (cpu, vn, i) |
| * aarch64_get_vec_u8 (cpu, vm, i))); |
| return; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, |
| aarch64_get_vec_u16 (cpu, vd, i) |
| - (aarch64_get_vec_u16 (cpu, vn, i) |
| * aarch64_get_vec_u16 (cpu, vm, i))); |
| return; |
| |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, |
| aarch64_get_vec_u32 (cpu, vd, i) |
| - (aarch64_get_vec_u32 (cpu, vn, i) |
| * aarch64_get_vec_u32 (cpu, vm, i))); |
| return; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_vec_FDIV (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half(0)/full(1) |
| instr [29,23] = 10 1110 0 |
| instr [22] = float()/double(1) |
| instr [21] = 1 |
| instr [20,16] = Vm |
| instr [15,10] = 1111 11 |
| instr [9, 5] = Vn |
| instr [4, 0] = Vd. */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 23, 0x5C); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x3F); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| aarch64_get_vec_double (cpu, vn, i) |
| / aarch64_get_vec_double (cpu, vm, i)); |
| } |
| else |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| aarch64_get_vec_float (cpu, vn, i) |
| / aarch64_get_vec_float (cpu, vm, i)); |
| } |
| |
| static void |
| do_vec_FMUL (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half(0)/full(1) |
| instr [29,23] = 10 1110 0 |
| instr [22] = float(0)/double(1) |
| instr [21] = 1 |
| instr [20,16] = Vm |
| instr [15,10] = 1101 11 |
| instr [9, 5] = Vn |
| instr [4, 0] = Vd. */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| |
| NYI_assert (29, 23, 0x5C); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x37); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| aarch64_get_vec_double (cpu, vn, i) |
| * aarch64_get_vec_double (cpu, vm, i)); |
| } |
| else |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| aarch64_get_vec_float (cpu, vn, i) |
| * aarch64_get_vec_float (cpu, vm, i)); |
| } |
| |
| static void |
| do_vec_FADDP (sim_cpu *cpu) |
| { |
| /* instr [31] = 0 |
| instr [30] = half(0)/full(1) |
| instr [29,23] = 10 1110 0 |
| instr [22] = float(0)/double(1) |
| instr [21] = 1 |
| instr [20,16] = Vm |
| instr [15,10] = 1101 01 |
| instr [9, 5] = Vn |
| instr [4, 0] = Vd. */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| |
| NYI_assert (29, 23, 0x5C); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x35); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| /* Extract values before adding them incase vd == vn/vm. */ |
| double tmp1 = aarch64_get_vec_double (cpu, vn, 0); |
| double tmp2 = aarch64_get_vec_double (cpu, vn, 1); |
| double tmp3 = aarch64_get_vec_double (cpu, vm, 0); |
| double tmp4 = aarch64_get_vec_double (cpu, vm, 1); |
| |
| if (! full) |
| HALT_UNALLOC; |
| |
| aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2); |
| aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4); |
| } |
| else |
| { |
| /* Extract values before adding them incase vd == vn/vm. */ |
| float tmp1 = aarch64_get_vec_float (cpu, vn, 0); |
| float tmp2 = aarch64_get_vec_float (cpu, vn, 1); |
| float tmp5 = aarch64_get_vec_float (cpu, vm, 0); |
| float tmp6 = aarch64_get_vec_float (cpu, vm, 1); |
| |
| if (full) |
| { |
| float tmp3 = aarch64_get_vec_float (cpu, vn, 2); |
| float tmp4 = aarch64_get_vec_float (cpu, vn, 3); |
| float tmp7 = aarch64_get_vec_float (cpu, vm, 2); |
| float tmp8 = aarch64_get_vec_float (cpu, vm, 3); |
| |
| aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); |
| aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4); |
| aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6); |
| aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8); |
| } |
| else |
| { |
| aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2); |
| aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6); |
| } |
| } |
| } |
| |
| static void |
| do_vec_FSQRT (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half(0)/full(1) |
| instr[29,23] = 10 1110 1 |
| instr[22] = single(0)/double(1) |
| instr[21,10] = 10 0001 1111 10 |
| instr[9,5] = Vsrc |
| instr[4,0] = Vdest. */ |
| |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| int i; |
| |
| NYI_assert (29, 23, 0x5D); |
| NYI_assert (21, 10, 0x87E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| sqrt (aarch64_get_vec_double (cpu, vn, i))); |
| } |
| else |
| { |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| sqrtf (aarch64_get_vec_float (cpu, vn, i))); |
| } |
| } |
| |
| static void |
| do_vec_FNEG (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,23] = 10 1110 1 |
| instr[22] = single (0)/double (1) |
| instr[21,10] = 10 0000 1111 10 |
| instr[9,5] = Vsrc |
| instr[4,0] = Vdest. */ |
| |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned full = INSTR (30, 30); |
| int i; |
| |
| NYI_assert (29, 23, 0x5D); |
| NYI_assert (21, 10, 0x83E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| if (! full) |
| HALT_UNALLOC; |
| |
| for (i = 0; i < 2; i++) |
| aarch64_set_vec_double (cpu, vd, i, |
| - aarch64_get_vec_double (cpu, vn, i)); |
| } |
| else |
| { |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_float (cpu, vd, i, |
| - aarch64_get_vec_float (cpu, vn, i)); |
| } |
| } |
| |
| static void |
| do_vec_NOT (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,10] = 10 1110 0010 0000 0101 10 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30, 30); |
| |
| NYI_assert (29, 10, 0xB8816); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i)); |
| } |
| |
| static unsigned int |
| clz (uint64_t val, unsigned size) |
| { |
| uint64_t mask = 1; |
| int count; |
| |
| mask <<= (size - 1); |
| count = 0; |
| do |
| { |
| if (val & mask) |
| break; |
| mask >>= 1; |
| count ++; |
| } |
| while (mask); |
| |
| return count; |
| } |
| |
| static void |
| do_vec_CLZ (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = half (0)/full (1) |
| instr[29,24] = 10 1110 |
| instr[23,22] = size |
| instr[21,10] = 10 0000 0100 10 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned i; |
| int full = INSTR (30,30); |
| |
| NYI_assert (29, 24, 0x2E); |
| NYI_assert (21, 10, 0x812); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (23, 22)) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8)); |
| break; |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16)); |
| break; |
| case 2: |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32)); |
| break; |
| case 3: |
| if (! full) |
| HALT_UNALLOC; |
| aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64)); |
| aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64)); |
| break; |
| } |
| } |
| |
| static void |
| do_vec_MOV_element (sim_cpu *cpu) |
| { |
| /* instr[31,21] = 0110 1110 000 |
| instr[20,16] = size & dest index |
| instr[15] = 0 |
| instr[14,11] = source index |
| instr[10] = 1 |
| instr[9,5] = Vs |
| instr[4.0] = Vd. */ |
| |
| unsigned vs = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned src_index; |
| unsigned dst_index; |
| |
| NYI_assert (31, 21, 0x370); |
| NYI_assert (15, 15, 0); |
| NYI_assert (10, 10, 1); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (16, 16)) |
| { |
| /* Move a byte. */ |
| src_index = INSTR (14, 11); |
| dst_index = INSTR (20, 17); |
| aarch64_set_vec_u8 (cpu, vd, dst_index, |
| aarch64_get_vec_u8 (cpu, vs, src_index)); |
| } |
| else if (INSTR (17, 17)) |
| { |
| /* Move 16-bits. */ |
| NYI_assert (11, 11, 0); |
| src_index = INSTR (14, 12); |
| dst_index = INSTR (20, 18); |
| aarch64_set_vec_u16 (cpu, vd, dst_index, |
| aarch64_get_vec_u16 (cpu, vs, src_index)); |
| } |
| else if (INSTR (18, 18)) |
| { |
| /* Move 32-bits. */ |
| NYI_assert (12, 11, 0); |
| src_index = INSTR (14, 13); |
| dst_index = INSTR (20, 19); |
| aarch64_set_vec_u32 (cpu, vd, dst_index, |
| aarch64_get_vec_u32 (cpu, vs, src_index)); |
| } |
| else |
| { |
| NYI_assert (19, 19, 1); |
| NYI_assert (13, 11, 0); |
| src_index = INSTR (14, 14); |
| dst_index = INSTR (20, 20); |
| aarch64_set_vec_u64 (cpu, vd, dst_index, |
| aarch64_get_vec_u64 (cpu, vs, src_index)); |
| } |
| } |
| |
| static void |
| do_vec_REV32 (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half |
| instr[29,24] = 10 1110 |
| instr[23,22] = size |
| instr[21,10] = 10 0000 0000 10 |
| instr[9,5] = Rn |
| instr[4,0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned size = INSTR (23, 22); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| FRegister val; |
| |
| NYI_assert (29, 24, 0x2E); |
| NYI_assert (21, 10, 0x802); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (size) |
| { |
| case 0: |
| for (i = 0; i < (full ? 16 : 8); i++) |
| val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i); |
| break; |
| |
| case 1: |
| for (i = 0; i < (full ? 8 : 4); i++) |
| val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i); |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| |
| aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]); |
| if (full) |
| aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]); |
| } |
| |
| static void |
| do_vec_EXT (sim_cpu *cpu) |
| { |
| /* instr[31] = 0 |
| instr[30] = full/half |
| instr[29,21] = 10 1110 000 |
| instr[20,16] = Vm |
| instr[15] = 0 |
| instr[14,11] = source index |
| instr[10] = 0 |
| instr[9,5] = Vn |
| instr[4.0] = Vd. */ |
| |
| unsigned vm = INSTR (20, 16); |
| unsigned vn = INSTR (9, 5); |
| unsigned vd = INSTR (4, 0); |
| unsigned src_index = INSTR (14, 11); |
| unsigned full = INSTR (30, 30); |
| unsigned i; |
| unsigned j; |
| FRegister val; |
| |
| NYI_assert (31, 21, 0x370); |
| NYI_assert (15, 15, 0); |
| NYI_assert (10, 10, 0); |
| |
| if (!full && (src_index & 0x8)) |
| HALT_UNALLOC; |
| |
| j = 0; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| for (i = src_index; i < (full ? 16 : 8); i++) |
| val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i); |
| for (i = 0; i < src_index; i++) |
| val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i); |
| |
| aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]); |
| if (full) |
| aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]); |
| } |
| |
| static void |
| dexAdvSIMD0 (sim_cpu *cpu) |
| { |
| /* instr [28,25] = 0 111. */ |
| if ( INSTR (15, 10) == 0x07 |
| && (INSTR (9, 5) == |
| INSTR (20, 16))) |
| { |
| if (INSTR (31, 21) == 0x075 |
| || INSTR (31, 21) == 0x275) |
| { |
| do_vec_MOV_whole_vector (cpu); |
| return; |
| } |
| } |
| |
| if (INSTR (29, 19) == 0x1E0) |
| { |
| do_vec_MOV_immediate (cpu); |
| return; |
| } |
| |
| if (INSTR (29, 19) == 0x5E0) |
| { |
| do_vec_MVNI (cpu); |
| return; |
| } |
| |
| if (INSTR (29, 19) == 0x1C0 |
| || INSTR (29, 19) == 0x1C1) |
| { |
| if (INSTR (15, 10) == 0x03) |
| { |
| do_vec_DUP_scalar_into_vector (cpu); |
| return; |
| } |
| } |
| |
| switch (INSTR (29, 24)) |
| { |
| case 0x0E: do_vec_op1 (cpu); return; |
| case 0x0F: do_vec_op2 (cpu); return; |
| |
| case 0x2E: |
| if (INSTR (21, 21) == 1) |
| { |
| switch (INSTR (15, 10)) |
| { |
| case 0x02: |
| do_vec_REV32 (cpu); |
| return; |
| |
| case 0x07: |
| switch (INSTR (23, 22)) |
| { |
| case 0: do_vec_EOR (cpu); return; |
| case 1: do_vec_BSL (cpu); return; |
| case 2: |
| case 3: do_vec_bit (cpu); return; |
| } |
| break; |
| |
| case 0x08: do_vec_sub_long (cpu); return; |
| case 0x11: do_vec_USHL (cpu); return; |
| case 0x12: do_vec_CLZ (cpu); return; |
| case 0x16: do_vec_NOT (cpu); return; |
| case 0x19: do_vec_max (cpu); return; |
| case 0x1B: do_vec_min (cpu); return; |
| case 0x21: do_vec_SUB (cpu); return; |
| case 0x25: do_vec_MLS (cpu); return; |
| case 0x31: do_vec_FminmaxNMP (cpu); return; |
| case 0x35: do_vec_FADDP (cpu); return; |
| case 0x37: do_vec_FMUL (cpu); return; |
| case 0x3F: do_vec_FDIV (cpu); return; |
| |
| case 0x3E: |
| switch (INSTR (20, 16)) |
| { |
| case 0x00: do_vec_FNEG (cpu); return; |
| case 0x01: do_vec_FSQRT (cpu); return; |
| default: HALT_NYI; |
| } |
| |
| case 0x0D: |
| case 0x0F: |
| case 0x22: |
| case 0x23: |
| case 0x26: |
| case 0x2A: |
| case 0x32: |
| case 0x36: |
| case 0x39: |
| case 0x3A: |
| do_vec_compare (cpu); return; |
| |
| default: |
| break; |
| } |
| } |
| |
| if (INSTR (31, 21) == 0x370) |
| { |
| if (INSTR (10, 10)) |
| do_vec_MOV_element (cpu); |
| else |
| do_vec_EXT (cpu); |
| return; |
| } |
| |
| switch (INSTR (21, 10)) |
| { |
| case 0x82E: do_vec_neg (cpu); return; |
| case 0x87E: do_vec_sqrt (cpu); return; |
| default: |
| if (INSTR (15, 10) == 0x30) |
| { |
| do_vec_mull (cpu); |
| return; |
| } |
| break; |
| } |
| break; |
| |
| case 0x2f: |
| switch (INSTR (15, 10)) |
| { |
| case 0x01: do_vec_SSHR_USHR (cpu); return; |
| case 0x10: |
| case 0x12: do_vec_mls_indexed (cpu); return; |
| case 0x29: do_vec_xtl (cpu); return; |
| default: |
| HALT_NYI; |
| } |
| |
| default: |
| break; |
| } |
| |
| HALT_NYI; |
| } |
| |
| /* 3 sources. */ |
| |
| /* Float multiply add. */ |
| static void |
| fmadds (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) |
| + aarch64_get_FP_float (cpu, sn) |
| * aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double multiply add. */ |
| static void |
| fmaddd (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) |
| + aarch64_get_FP_double (cpu, sn) |
| * aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| /* Float multiply subtract. */ |
| static void |
| fmsubs (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa) |
| - aarch64_get_FP_float (cpu, sn) |
| * aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double multiply subtract. */ |
| static void |
| fmsubd (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa) |
| - aarch64_get_FP_double (cpu, sn) |
| * aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| /* Float negative multiply add. */ |
| static void |
| fnmadds (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) |
| + (- aarch64_get_FP_float (cpu, sn)) |
| * aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double negative multiply add. */ |
| static void |
| fnmaddd (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) |
| + (- aarch64_get_FP_double (cpu, sn)) |
| * aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| /* Float negative multiply subtract. */ |
| static void |
| fnmsubs (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa) |
| + aarch64_get_FP_float (cpu, sn) |
| * aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double negative multiply subtract. */ |
| static void |
| fnmsubd (sim_cpu *cpu) |
| { |
| unsigned sa = INSTR (14, 10); |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa) |
| + aarch64_get_FP_double (cpu, sn) |
| * aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| static void |
| dexSimpleFPDataProc3Source (sim_cpu *cpu) |
| { |
| /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC |
| instr[30] = 0 |
| instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC |
| instr[28,25] = 1111 |
| instr[24] = 1 |
| instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC |
| instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated |
| instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */ |
| |
| uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); |
| /* dispatch on combined type:o1:o2. */ |
| uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15); |
| |
| if (M_S != 0) |
| HALT_UNALLOC; |
| |
| switch (dispatch) |
| { |
| case 0: fmadds (cpu); return; |
| case 1: fmsubs (cpu); return; |
| case 2: fnmadds (cpu); return; |
| case 3: fnmsubs (cpu); return; |
| case 4: fmaddd (cpu); return; |
| case 5: fmsubd (cpu); return; |
| case 6: fnmaddd (cpu); return; |
| case 7: fnmsubd (cpu); return; |
| default: |
| /* type > 1 is currently unallocated. */ |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| dexSimpleFPFixedConvert (sim_cpu *cpu) |
| { |
| HALT_NYI; |
| } |
| |
| static void |
| dexSimpleFPCondCompare (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0001 1110 0 |
| instr [22] = type |
| instr [21] = 1 |
| instr [20,16] = Rm |
| instr [15,12] = condition |
| instr [11,10] = 01 |
| instr [9,5] = Rn |
| instr [4] = 0 |
| instr [3,0] = nzcv */ |
| |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| |
| NYI_assert (31, 23, 0x3C); |
| NYI_assert (11, 10, 0x1); |
| NYI_assert (4, 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (! testConditionCode (cpu, INSTR (15, 12))) |
| { |
| aarch64_set_CPSR (cpu, INSTR (3, 0)); |
| return; |
| } |
| |
| if (INSTR (22, 22)) |
| { |
| /* Double precision. */ |
| double val1 = aarch64_get_vec_double (cpu, rn, 0); |
| double val2 = aarch64_get_vec_double (cpu, rm, 0); |
| |
| /* FIXME: Check for NaNs. */ |
| if (val1 == val2) |
| aarch64_set_CPSR (cpu, (Z | C)); |
| else if (val1 < val2) |
| aarch64_set_CPSR (cpu, N); |
| else /* val1 > val2 */ |
| aarch64_set_CPSR (cpu, C); |
| } |
| else |
| { |
| /* Single precision. */ |
| float val1 = aarch64_get_vec_float (cpu, rn, 0); |
| float val2 = aarch64_get_vec_float (cpu, rm, 0); |
| |
| /* FIXME: Check for NaNs. */ |
| if (val1 == val2) |
| aarch64_set_CPSR (cpu, (Z | C)); |
| else if (val1 < val2) |
| aarch64_set_CPSR (cpu, N); |
| else /* val1 > val2 */ |
| aarch64_set_CPSR (cpu, C); |
| } |
| } |
| |
| /* 2 sources. */ |
| |
| /* Float add. */ |
| static void |
| fadds (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) |
| + aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double add. */ |
| static void |
| faddd (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) |
| + aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| /* Float divide. */ |
| static void |
| fdivs (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) |
| / aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double divide. */ |
| static void |
| fdivd (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) |
| / aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| /* Float multiply. */ |
| static void |
| fmuls (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) |
| * aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double multiply. */ |
| static void |
| fmuld (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) |
| * aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| /* Float negate and multiply. */ |
| static void |
| fnmuls (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn) |
| * aarch64_get_FP_float (cpu, sm))); |
| } |
| |
| /* Double negate and multiply. */ |
| static void |
| fnmuld (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn) |
| * aarch64_get_FP_double (cpu, sm))); |
| } |
| |
| /* Float subtract. */ |
| static void |
| fsubs (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn) |
| - aarch64_get_FP_float (cpu, sm)); |
| } |
| |
| /* Double subtract. */ |
| static void |
| fsubd (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn) |
| - aarch64_get_FP_double (cpu, sm)); |
| } |
| |
| static void |
| do_FMINNM (sim_cpu *cpu) |
| { |
| /* instr[31,23] = 0 0011 1100 |
| instr[22] = float(0)/double(1) |
| instr[21] = 1 |
| instr[20,16] = Sm |
| instr[15,10] = 01 1110 |
| instr[9,5] = Sn |
| instr[4,0] = Cpu */ |
| |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| NYI_assert (31, 23, 0x03C); |
| NYI_assert (15, 10, 0x1E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| aarch64_set_FP_double (cpu, sd, |
| dminnm (aarch64_get_FP_double (cpu, sn), |
| aarch64_get_FP_double (cpu, sm))); |
| else |
| aarch64_set_FP_float (cpu, sd, |
| fminnm (aarch64_get_FP_float (cpu, sn), |
| aarch64_get_FP_float (cpu, sm))); |
| } |
| |
| static void |
| do_FMAXNM (sim_cpu *cpu) |
| { |
| /* instr[31,23] = 0 0011 1100 |
| instr[22] = float(0)/double(1) |
| instr[21] = 1 |
| instr[20,16] = Sm |
| instr[15,10] = 01 1010 |
| instr[9,5] = Sn |
| instr[4,0] = Cpu */ |
| |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| |
| NYI_assert (31, 23, 0x03C); |
| NYI_assert (15, 10, 0x1A); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| aarch64_set_FP_double (cpu, sd, |
| dmaxnm (aarch64_get_FP_double (cpu, sn), |
| aarch64_get_FP_double (cpu, sm))); |
| else |
| aarch64_set_FP_float (cpu, sd, |
| fmaxnm (aarch64_get_FP_float (cpu, sn), |
| aarch64_get_FP_float (cpu, sm))); |
| } |
| |
| static void |
| dexSimpleFPDataProc2Source (sim_cpu *cpu) |
| { |
| /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC |
| instr[30] = 0 |
| instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC |
| instr[28,25] = 1111 |
| instr[24] = 0 |
| instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC |
| instr[21] = 1 |
| instr[20,16] = Vm |
| instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV |
| 0010 ==> FADD, 0011 ==> FSUB, |
| 0100 ==> FMAX, 0101 ==> FMIN |
| 0110 ==> FMAXNM, 0111 ==> FMINNM |
| 1000 ==> FNMUL, ow ==> UNALLOC |
| instr[11,10] = 10 |
| instr[9,5] = Vn |
| instr[4,0] = Vd */ |
| |
| uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); |
| uint32_t type = INSTR (23, 22); |
| /* Dispatch on opcode. */ |
| uint32_t dispatch = INSTR (15, 12); |
| |
| if (type > 1) |
| HALT_UNALLOC; |
| |
| if (M_S != 0) |
| HALT_UNALLOC; |
| |
| if (type) |
| switch (dispatch) |
| { |
| case 0: fmuld (cpu); return; |
| case 1: fdivd (cpu); return; |
| case 2: faddd (cpu); return; |
| case 3: fsubd (cpu); return; |
| case 6: do_FMAXNM (cpu); return; |
| case 7: do_FMINNM (cpu); return; |
| case 8: fnmuld (cpu); return; |
| |
| /* Have not yet implemented fmax and fmin. */ |
| case 4: |
| case 5: |
| HALT_NYI; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| else /* type == 0 => floats. */ |
| switch (dispatch) |
| { |
| case 0: fmuls (cpu); return; |
| case 1: fdivs (cpu); return; |
| case 2: fadds (cpu); return; |
| case 3: fsubs (cpu); return; |
| case 6: do_FMAXNM (cpu); return; |
| case 7: do_FMINNM (cpu); return; |
| case 8: fnmuls (cpu); return; |
| |
| case 4: |
| case 5: |
| HALT_NYI; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| dexSimpleFPCondSelect (sim_cpu *cpu) |
| { |
| /* FCSEL |
| instr[31,23] = 0 0011 1100 |
| instr[22] = 0=>single 1=>double |
| instr[21] = 1 |
| instr[20,16] = Sm |
| instr[15,12] = cond |
| instr[11,10] = 11 |
| instr[9,5] = Sn |
| instr[4,0] = Cpu */ |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| unsigned sd = INSTR ( 4, 0); |
| uint32_t set = testConditionCode (cpu, INSTR (15, 12)); |
| |
| NYI_assert (31, 23, 0x03C); |
| NYI_assert (11, 10, 0x3); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn) |
| : aarch64_get_FP_double (cpu, sm))); |
| else |
| aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn) |
| : aarch64_get_FP_float (cpu, sm))); |
| } |
| |
| /* Store 32 bit unscaled signed 9 bit. */ |
| static void |
| fsturs (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, |
| aarch64_get_vec_u32 (cpu, st, 0)); |
| } |
| |
| /* Store 64 bit unscaled signed 9 bit. */ |
| static void |
| fsturd (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset, |
| aarch64_get_vec_u64 (cpu, st, 0)); |
| } |
| |
| /* Store 128 bit unscaled signed 9 bit. */ |
| static void |
| fsturq (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| FRegister a; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_get_FP_long_double (cpu, st, & a); |
| aarch64_set_mem_long_double (cpu, |
| aarch64_get_reg_u64 (cpu, rn, 1) |
| + offset, a); |
| } |
| |
| /* TODO FP move register. */ |
| |
| /* 32 bit fp to fp move register. */ |
| static void |
| ffmovs (sim_cpu *cpu) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn)); |
| } |
| |
| /* 64 bit fp to fp move register. */ |
| static void |
| ffmovd (sim_cpu *cpu) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn)); |
| } |
| |
| /* 32 bit GReg to Vec move register. */ |
| static void |
| fgmovs (sim_cpu *cpu) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP)); |
| } |
| |
| /* 64 bit g to fp move register. */ |
| static void |
| fgmovd (sim_cpu *cpu) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP)); |
| } |
| |
| /* 32 bit fp to g move register. */ |
| static void |
| gfmovs (sim_cpu *cpu) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0)); |
| } |
| |
| /* 64 bit fp to g move register. */ |
| static void |
| gfmovd (sim_cpu *cpu) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0)); |
| } |
| |
| /* FP move immediate |
| |
| These install an immediate 8 bit value in the target register |
| where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3 |
| bit exponent. */ |
| |
| static void |
| fmovs (sim_cpu *cpu) |
| { |
| unsigned int sd = INSTR (4, 0); |
| uint32_t imm = INSTR (20, 13); |
| float f = fp_immediate_for_encoding_32 (imm); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, f); |
| } |
| |
| static void |
| fmovd (sim_cpu *cpu) |
| { |
| unsigned int sd = INSTR (4, 0); |
| uint32_t imm = INSTR (20, 13); |
| double d = fp_immediate_for_encoding_64 (imm); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, d); |
| } |
| |
| static void |
| dexSimpleFPImmediate (sim_cpu *cpu) |
| { |
| /* instr[31,23] == 00111100 |
| instr[22] == type : single(0)/double(1) |
| instr[21] == 1 |
| instr[20,13] == imm8 |
| instr[12,10] == 100 |
| instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC |
| instr[4,0] == Rd */ |
| uint32_t imm5 = INSTR (9, 5); |
| |
| NYI_assert (31, 23, 0x3C); |
| |
| if (imm5 != 0) |
| HALT_UNALLOC; |
| |
| if (INSTR (22, 22)) |
| fmovd (cpu); |
| else |
| fmovs (cpu); |
| } |
| |
| /* TODO specific decode and execute for group Load Store. */ |
| |
| /* TODO FP load/store single register (unscaled offset). */ |
| |
| /* TODO load 8 bit unscaled signed 9 bit. */ |
| /* TODO load 16 bit unscaled signed 9 bit. */ |
| |
| /* Load 32 bit unscaled signed 9 bit. */ |
| static void |
| fldurs (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); |
| } |
| |
| /* Load 64 bit unscaled signed 9 bit. */ |
| static void |
| fldurd (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 |
| (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset)); |
| } |
| |
| /* Load 128 bit unscaled signed 9 bit. */ |
| static void |
| fldurq (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int st = INSTR (4, 0); |
| FRegister a; |
| uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_get_mem_long_double (cpu, addr, & a); |
| aarch64_set_FP_long_double (cpu, st, a); |
| } |
| |
| /* TODO store 8 bit unscaled signed 9 bit. */ |
| /* TODO store 16 bit unscaled signed 9 bit. */ |
| |
| |
| /* 1 source. */ |
| |
| /* Float absolute value. */ |
| static void |
| fabss (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| float value = aarch64_get_FP_float (cpu, sn); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, fabsf (value)); |
| } |
| |
| /* Double absolute value. */ |
| static void |
| fabcpu (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| double value = aarch64_get_FP_double (cpu, sn); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, fabs (value)); |
| } |
| |
| /* Float negative value. */ |
| static void |
| fnegs (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn)); |
| } |
| |
| /* Double negative value. */ |
| static void |
| fnegd (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn)); |
| } |
| |
| /* Float square root. */ |
| static void |
| fsqrts (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn))); |
| } |
| |
| /* Double square root. */ |
| static void |
| fsqrtd (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, |
| sqrt (aarch64_get_FP_double (cpu, sn))); |
| } |
| |
| /* Convert double to float. */ |
| static void |
| fcvtds (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn)); |
| } |
| |
| /* Convert float to double. */ |
| static void |
| fcvtcpu (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn)); |
| } |
| |
| static void |
| do_FRINT (sim_cpu *cpu) |
| { |
| /* instr[31,23] = 0001 1110 0 |
| instr[22] = single(0)/double(1) |
| instr[21,18] = 1001 |
| instr[17,15] = rounding mode |
| instr[14,10] = 10000 |
| instr[9,5] = source |
| instr[4,0] = dest */ |
| |
| float val; |
| unsigned rs = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned int rmode = INSTR (17, 15); |
| |
| NYI_assert (31, 23, 0x03C); |
| NYI_assert (21, 18, 0x9); |
| NYI_assert (14, 10, 0x10); |
| |
| if (rmode == 6 || rmode == 7) |
| /* FIXME: Add support for rmode == 6 exactness check. */ |
| rmode = uimm (aarch64_get_FPSR (cpu), 23, 22); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| double val = aarch64_get_FP_double (cpu, rs); |
| |
| switch (rmode) |
| { |
| case 0: /* mode N: nearest or even. */ |
| { |
| double rval = round (val); |
| |
| if (val - rval == 0.5) |
| { |
| if (((rval / 2.0) * 2.0) != rval) |
| rval += 1.0; |
| } |
| |
| aarch64_set_FP_double (cpu, rd, round (val)); |
| return; |
| } |
| |
| case 1: /* mode P: towards +inf. */ |
| if (val < 0.0) |
| aarch64_set_FP_double (cpu, rd, trunc (val)); |
| else |
| aarch64_set_FP_double (cpu, rd, round (val)); |
| return; |
| |
| case 2: /* mode M: towards -inf. */ |
| if (val < 0.0) |
| aarch64_set_FP_double (cpu, rd, round (val)); |
| else |
| aarch64_set_FP_double (cpu, rd, trunc (val)); |
| return; |
| |
| case 3: /* mode Z: towards 0. */ |
| aarch64_set_FP_double (cpu, rd, trunc (val)); |
| return; |
| |
| case 4: /* mode A: away from 0. */ |
| aarch64_set_FP_double (cpu, rd, round (val)); |
| return; |
| |
| case 6: /* mode X: use FPCR with exactness check. */ |
| case 7: /* mode I: use FPCR mode. */ |
| HALT_NYI; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| val = aarch64_get_FP_float (cpu, rs); |
| |
| switch (rmode) |
| { |
| case 0: /* mode N: nearest or even. */ |
| { |
| float rval = roundf (val); |
| |
| if (val - rval == 0.5) |
| { |
| if (((rval / 2.0) * 2.0) != rval) |
| rval += 1.0; |
| } |
| |
| aarch64_set_FP_float (cpu, rd, rval); |
| return; |
| } |
| |
| case 1: /* mode P: towards +inf. */ |
| if (val < 0.0) |
| aarch64_set_FP_float (cpu, rd, truncf (val)); |
| else |
| aarch64_set_FP_float (cpu, rd, roundf (val)); |
| return; |
| |
| case 2: /* mode M: towards -inf. */ |
| if (val < 0.0) |
| aarch64_set_FP_float (cpu, rd, truncf (val)); |
| else |
| aarch64_set_FP_float (cpu, rd, roundf (val)); |
| return; |
| |
| case 3: /* mode Z: towards 0. */ |
| aarch64_set_FP_float (cpu, rd, truncf (val)); |
| return; |
| |
| case 4: /* mode A: away from 0. */ |
| aarch64_set_FP_float (cpu, rd, roundf (val)); |
| return; |
| |
| case 6: /* mode X: use FPCR with exactness check. */ |
| case 7: /* mode I: use FPCR mode. */ |
| HALT_NYI; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* Convert half to float. */ |
| static void |
| do_FCVT_half_to_single (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 10, 0x7B890); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn)); |
| } |
| |
| /* Convert half to double. */ |
| static void |
| do_FCVT_half_to_double (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 10, 0x7B8B0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn)); |
| } |
| |
| static void |
| do_FCVT_single_to_half (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 10, 0x788F0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn)); |
| } |
| |
| /* Convert double to half. */ |
| static void |
| do_FCVT_double_to_half (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 10, 0x798F0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn)); |
| } |
| |
| static void |
| dexSimpleFPDataProc1Source (sim_cpu *cpu) |
| { |
| /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC |
| instr[30] = 0 |
| instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC |
| instr[28,25] = 1111 |
| instr[24] = 0 |
| instr[23,22] ==> type : 00 ==> source is single, |
| 01 ==> source is double |
| 10 ==> UNALLOC |
| 11 ==> UNALLOC or source is half |
| instr[21] = 1 |
| instr[20,15] ==> opcode : with type 00 or 01 |
| 000000 ==> FMOV, 000001 ==> FABS, |
| 000010 ==> FNEG, 000011 ==> FSQRT, |
| 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double) |
| 000110 ==> UNALLOC, 000111 ==> FCVT (to half) |
| 001000 ==> FRINTN, 001001 ==> FRINTP, |
| 001010 ==> FRINTM, 001011 ==> FRINTZ, |
| 001100 ==> FRINTA, 001101 ==> UNALLOC |
| 001110 ==> FRINTX, 001111 ==> FRINTI |
| with type 11 |
| 000100 ==> FCVT (half-to-single) |
| 000101 ==> FCVT (half-to-double) |
| instr[14,10] = 10000. */ |
| |
| uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); |
| uint32_t type = INSTR (23, 22); |
| uint32_t opcode = INSTR (20, 15); |
| |
| if (M_S != 0) |
| HALT_UNALLOC; |
| |
| if (type == 3) |
| { |
| if (opcode == 4) |
| do_FCVT_half_to_single (cpu); |
| else if (opcode == 5) |
| do_FCVT_half_to_double (cpu); |
| else |
| HALT_UNALLOC; |
| return; |
| } |
| |
| if (type == 2) |
| HALT_UNALLOC; |
| |
| switch (opcode) |
| { |
| case 0: |
| if (type) |
| ffmovd (cpu); |
| else |
| ffmovs (cpu); |
| return; |
| |
| case 1: |
| if (type) |
| fabcpu (cpu); |
| else |
| fabss (cpu); |
| return; |
| |
| case 2: |
| if (type) |
| fnegd (cpu); |
| else |
| fnegs (cpu); |
| return; |
| |
| case 3: |
| if (type) |
| fsqrtd (cpu); |
| else |
| fsqrts (cpu); |
| return; |
| |
| case 4: |
| if (type) |
| fcvtds (cpu); |
| else |
| HALT_UNALLOC; |
| return; |
| |
| case 5: |
| if (type) |
| HALT_UNALLOC; |
| fcvtcpu (cpu); |
| return; |
| |
| case 8: /* FRINTN etc. */ |
| case 9: |
| case 10: |
| case 11: |
| case 12: |
| case 14: |
| case 15: |
| do_FRINT (cpu); |
| return; |
| |
| case 7: |
| if (INSTR (22, 22)) |
| do_FCVT_double_to_half (cpu); |
| else |
| do_FCVT_single_to_half (cpu); |
| return; |
| |
| case 13: |
| HALT_NYI; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* 32 bit signed int to float. */ |
| static void |
| scvtf32 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float |
| (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP)); |
| } |
| |
| /* signed int to float. */ |
| static void |
| scvtf (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_float |
| (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP)); |
| } |
| |
| /* 32 bit signed int to double. */ |
| static void |
| scvtd32 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double |
| (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP)); |
| } |
| |
| /* signed int to double. */ |
| static void |
| scvtd (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned sd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_FP_double |
| (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP)); |
| } |
| |
| static const float FLOAT_INT_MAX = (float) INT_MAX; |
| static const float FLOAT_INT_MIN = (float) INT_MIN; |
| static const double DOUBLE_INT_MAX = (double) INT_MAX; |
| static const double DOUBLE_INT_MIN = (double) INT_MIN; |
| static const float FLOAT_LONG_MAX = (float) LONG_MAX; |
| static const float FLOAT_LONG_MIN = (float) LONG_MIN; |
| static const double DOUBLE_LONG_MAX = (double) LONG_MAX; |
| static const double DOUBLE_LONG_MIN = (double) LONG_MIN; |
| |
| #define UINT_MIN 0 |
| #define ULONG_MIN 0 |
| static const float FLOAT_UINT_MAX = (float) UINT_MAX; |
| static const float FLOAT_UINT_MIN = (float) UINT_MIN; |
| static const double DOUBLE_UINT_MAX = (double) UINT_MAX; |
| static const double DOUBLE_UINT_MIN = (double) UINT_MIN; |
| static const float FLOAT_ULONG_MAX = (float) ULONG_MAX; |
| static const float FLOAT_ULONG_MIN = (float) ULONG_MIN; |
| static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX; |
| static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN; |
| |
| /* Check for FP exception conditions: |
| NaN raises IO |
| Infinity raises IO |
| Out of Range raises IO and IX and saturates value |
| Denormal raises ID and IX and sets to zero. */ |
| #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \ |
| do \ |
| { \ |
| switch (fpclassify (F)) \ |
| { \ |
| case FP_INFINITE: \ |
| case FP_NAN: \ |
| aarch64_set_FPSR (cpu, IO); \ |
| if (signbit (F)) \ |
| VALUE = ITYPE##_MAX; \ |
| else \ |
| VALUE = ITYPE##_MIN; \ |
| break; \ |
| \ |
| case FP_NORMAL: \ |
| if (F >= FTYPE##_##ITYPE##_MAX) \ |
| { \ |
| aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ |
| VALUE = ITYPE##_MAX; \ |
| } \ |
| else if (F <= FTYPE##_##ITYPE##_MIN) \ |
| { \ |
| aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \ |
| VALUE = ITYPE##_MIN; \ |
| } \ |
| break; \ |
| \ |
| case FP_SUBNORMAL: \ |
| aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \ |
| VALUE = 0; \ |
| break; \ |
| \ |
| default: \ |
| case FP_ZERO: \ |
| VALUE = 0; \ |
| break; \ |
| } \ |
| } \ |
| while (0) |
| |
| /* 32 bit convert float to signed int truncate towards zero. */ |
| static void |
| fcvtszs32 (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| /* TODO : check that this rounds toward zero. */ |
| float f = aarch64_get_FP_float (cpu, sn); |
| int32_t value = (int32_t) f; |
| |
| RAISE_EXCEPTIONS (f, value, FLOAT, INT); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* Avoid sign extension to 64 bit. */ |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); |
| } |
| |
| /* 64 bit convert float to signed int truncate towards zero. */ |
| static void |
| fcvtszs (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| float f = aarch64_get_FP_float (cpu, sn); |
| int64_t value = (int64_t) f; |
| |
| RAISE_EXCEPTIONS (f, value, FLOAT, LONG); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, value); |
| } |
| |
| /* 32 bit convert double to signed int truncate towards zero. */ |
| static void |
| fcvtszd32 (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| /* TODO : check that this rounds toward zero. */ |
| double d = aarch64_get_FP_double (cpu, sn); |
| int32_t value = (int32_t) d; |
| |
| RAISE_EXCEPTIONS (d, value, DOUBLE, INT); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* Avoid sign extension to 64 bit. */ |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); |
| } |
| |
| /* 64 bit convert double to signed int truncate towards zero. */ |
| static void |
| fcvtszd (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| /* TODO : check that this rounds toward zero. */ |
| double d = aarch64_get_FP_double (cpu, sn); |
| int64_t value; |
| |
| value = (int64_t) d; |
| |
| RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, value); |
| } |
| |
| static void |
| do_fcvtzu (sim_cpu *cpu) |
| { |
| /* instr[31] = size: 32-bit (0), 64-bit (1) |
| instr[30,23] = 00111100 |
| instr[22] = type: single (0)/ double (1) |
| instr[21] = enable (0)/disable(1) precision |
| instr[20,16] = 11001 |
| instr[15,10] = precision |
| instr[9,5] = Rs |
| instr[4,0] = Rd. */ |
| |
| unsigned rs = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (30, 23, 0x3C); |
| NYI_assert (20, 16, 0x19); |
| |
| if (INSTR (21, 21) != 1) |
| /* Convert to fixed point. */ |
| HALT_NYI; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (31, 31)) |
| { |
| /* Convert to unsigned 64-bit integer. */ |
| if (INSTR (22, 22)) |
| { |
| double d = aarch64_get_FP_double (cpu, rs); |
| uint64_t value = (uint64_t) d; |
| |
| /* Do not raise an exception if we have reached ULONG_MAX. */ |
| if (value != (1UL << 63)) |
| RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value); |
| } |
| else |
| { |
| float f = aarch64_get_FP_float (cpu, rs); |
| uint64_t value = (uint64_t) f; |
| |
| /* Do not raise an exception if we have reached ULONG_MAX. */ |
| if (value != (1UL << 63)) |
| RAISE_EXCEPTIONS (f, value, FLOAT, ULONG); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value); |
| } |
| } |
| else |
| { |
| uint32_t value; |
| |
| /* Convert to unsigned 32-bit integer. */ |
| if (INSTR (22, 22)) |
| { |
| double d = aarch64_get_FP_double (cpu, rs); |
| |
| value = (uint32_t) d; |
| /* Do not raise an exception if we have reached UINT_MAX. */ |
| if (value != (1UL << 31)) |
| RAISE_EXCEPTIONS (d, value, DOUBLE, UINT); |
| } |
| else |
| { |
| float f = aarch64_get_FP_float (cpu, rs); |
| |
| value = (uint32_t) f; |
| /* Do not raise an exception if we have reached UINT_MAX. */ |
| if (value != (1UL << 31)) |
| RAISE_EXCEPTIONS (f, value, FLOAT, UINT); |
| } |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value); |
| } |
| } |
| |
| static void |
| do_UCVTF (sim_cpu *cpu) |
| { |
| /* instr[31] = size: 32-bit (0), 64-bit (1) |
| instr[30,23] = 001 1110 0 |
| instr[22] = type: single (0)/ double (1) |
| instr[21] = enable (0)/disable(1) precision |
| instr[20,16] = 0 0011 |
| instr[15,10] = precision |
| instr[9,5] = Rs |
| instr[4,0] = Rd. */ |
| |
| unsigned rs = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (30, 23, 0x3C); |
| NYI_assert (20, 16, 0x03); |
| |
| if (INSTR (21, 21) != 1) |
| HALT_NYI; |
| |
| /* FIXME: Add exception raising. */ |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (31, 31)) |
| { |
| uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP); |
| |
| if (INSTR (22, 22)) |
| aarch64_set_FP_double (cpu, rd, (double) value); |
| else |
| aarch64_set_FP_float (cpu, rd, (float) value); |
| } |
| else |
| { |
| uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP); |
| |
| if (INSTR (22, 22)) |
| aarch64_set_FP_double (cpu, rd, (double) value); |
| else |
| aarch64_set_FP_float (cpu, rd, (float) value); |
| } |
| } |
| |
| static void |
| float_vector_move (sim_cpu *cpu) |
| { |
| /* instr[31,17] == 100 1111 0101 0111 |
| instr[16] ==> direction 0=> to GR, 1=> from GR |
| instr[15,10] => ??? |
| instr[9,5] ==> source |
| instr[4,0] ==> dest. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 17, 0x4F57); |
| |
| if (INSTR (15, 10) != 0) |
| HALT_UNALLOC; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (16, 16)) |
| aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP)); |
| else |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1)); |
| } |
| |
| static void |
| dexSimpleFPIntegerConvert (sim_cpu *cpu) |
| { |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30 = 0 |
| instr[29] = S : 0 ==> OK, 1 ==> UNALLOC |
| instr[28,25] = 1111 |
| instr[24] = 0 |
| instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC |
| instr[21] = 1 |
| instr[20,19] = rmode |
| instr[18,16] = opcode |
| instr[15,10] = 10 0000 */ |
| |
| uint32_t rmode_opcode; |
| uint32_t size_type; |
| uint32_t type; |
| uint32_t size; |
| uint32_t S; |
| |
| if (INSTR (31, 17) == 0x4F57) |
| { |
| float_vector_move (cpu); |
| return; |
| } |
| |
| size = INSTR (31, 31); |
| S = INSTR (29, 29); |
| if (S != 0) |
| HALT_UNALLOC; |
| |
| type = INSTR (23, 22); |
| if (type > 1) |
| HALT_UNALLOC; |
| |
| rmode_opcode = INSTR (20, 16); |
| size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */ |
| |
| switch (rmode_opcode) |
| { |
| case 2: /* SCVTF. */ |
| switch (size_type) |
| { |
| case 0: scvtf32 (cpu); return; |
| case 1: scvtd32 (cpu); return; |
| case 2: scvtf (cpu); return; |
| case 3: scvtd (cpu); return; |
| } |
| |
| case 6: /* FMOV GR, Vec. */ |
| switch (size_type) |
| { |
| case 0: gfmovs (cpu); return; |
| case 3: gfmovd (cpu); return; |
| default: HALT_UNALLOC; |
| } |
| |
| case 7: /* FMOV vec, GR. */ |
| switch (size_type) |
| { |
| case 0: fgmovs (cpu); return; |
| case 3: fgmovd (cpu); return; |
| default: HALT_UNALLOC; |
| } |
| |
| case 24: /* FCVTZS. */ |
| switch (size_type) |
| { |
| case 0: fcvtszs32 (cpu); return; |
| case 1: fcvtszd32 (cpu); return; |
| case 2: fcvtszs (cpu); return; |
| case 3: fcvtszd (cpu); return; |
| } |
| |
| case 25: do_fcvtzu (cpu); return; |
| case 3: do_UCVTF (cpu); return; |
| |
| case 0: /* FCVTNS. */ |
| case 1: /* FCVTNU. */ |
| case 4: /* FCVTAS. */ |
| case 5: /* FCVTAU. */ |
| case 8: /* FCVPTS. */ |
| case 9: /* FCVTPU. */ |
| case 16: /* FCVTMS. */ |
| case 17: /* FCVTMU. */ |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2) |
| { |
| uint32_t flags; |
| |
| /* FIXME: Add exception raising. */ |
| if (isnan (fvalue1) || isnan (fvalue2)) |
| flags = C|V; |
| else if (isinf (fvalue1) && isinf (fvalue2)) |
| { |
| /* Subtracting two infinities may give a NaN. We only need to compare |
| the signs, which we can get from isinf. */ |
| int result = isinf (fvalue1) - isinf (fvalue2); |
| |
| if (result == 0) |
| flags = Z|C; |
| else if (result < 0) |
| flags = N; |
| else /* (result > 0). */ |
| flags = C; |
| } |
| else |
| { |
| float result = fvalue1 - fvalue2; |
| |
| if (result == 0.0) |
| flags = Z|C; |
| else if (result < 0) |
| flags = N; |
| else /* (result > 0). */ |
| flags = C; |
| } |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| static void |
| fcmps (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| |
| float fvalue1 = aarch64_get_FP_float (cpu, sn); |
| float fvalue2 = aarch64_get_FP_float (cpu, sm); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_float_compare (cpu, fvalue1, fvalue2); |
| } |
| |
| /* Float compare to zero -- Invalid Operation exception |
| only on signaling NaNs. */ |
| static void |
| fcmpzs (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR ( 9, 5); |
| float fvalue1 = aarch64_get_FP_float (cpu, sn); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_float_compare (cpu, fvalue1, 0.0f); |
| } |
| |
| /* Float compare -- Invalid Operation exception on all NaNs. */ |
| static void |
| fcmpes (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| |
| float fvalue1 = aarch64_get_FP_float (cpu, sn); |
| float fvalue2 = aarch64_get_FP_float (cpu, sm); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_float_compare (cpu, fvalue1, fvalue2); |
| } |
| |
| /* Float compare to zero -- Invalid Operation exception on all NaNs. */ |
| static void |
| fcmpzes (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR ( 9, 5); |
| float fvalue1 = aarch64_get_FP_float (cpu, sn); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_float_compare (cpu, fvalue1, 0.0f); |
| } |
| |
| static void |
| set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2) |
| { |
| uint32_t flags; |
| |
| /* FIXME: Add exception raising. */ |
| if (isnan (dval1) || isnan (dval2)) |
| flags = C|V; |
| else if (isinf (dval1) && isinf (dval2)) |
| { |
| /* Subtracting two infinities may give a NaN. We only need to compare |
| the signs, which we can get from isinf. */ |
| int result = isinf (dval1) - isinf (dval2); |
| |
| if (result == 0) |
| flags = Z|C; |
| else if (result < 0) |
| flags = N; |
| else /* (result > 0). */ |
| flags = C; |
| } |
| else |
| { |
| double result = dval1 - dval2; |
| |
| if (result == 0.0) |
| flags = Z|C; |
| else if (result < 0) |
| flags = N; |
| else /* (result > 0). */ |
| flags = C; |
| } |
| |
| aarch64_set_CPSR (cpu, flags); |
| } |
| |
| /* Double compare -- Invalid Operation exception only on signaling NaNs. */ |
| static void |
| fcmpd (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| |
| double dvalue1 = aarch64_get_FP_double (cpu, sn); |
| double dvalue2 = aarch64_get_FP_double (cpu, sm); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_double_compare (cpu, dvalue1, dvalue2); |
| } |
| |
| /* Double compare to zero -- Invalid Operation exception |
| only on signaling NaNs. */ |
| static void |
| fcmpzd (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR ( 9, 5); |
| double dvalue1 = aarch64_get_FP_double (cpu, sn); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_double_compare (cpu, dvalue1, 0.0); |
| } |
| |
| /* Double compare -- Invalid Operation exception on all NaNs. */ |
| static void |
| fcmped (sim_cpu *cpu) |
| { |
| unsigned sm = INSTR (20, 16); |
| unsigned sn = INSTR ( 9, 5); |
| |
| double dvalue1 = aarch64_get_FP_double (cpu, sn); |
| double dvalue2 = aarch64_get_FP_double (cpu, sm); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_double_compare (cpu, dvalue1, dvalue2); |
| } |
| |
| /* Double compare to zero -- Invalid Operation exception on all NaNs. */ |
| static void |
| fcmpzed (sim_cpu *cpu) |
| { |
| unsigned sn = INSTR ( 9, 5); |
| double dvalue1 = aarch64_get_FP_double (cpu, sn); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| set_flags_for_double_compare (cpu, dvalue1, 0.0); |
| } |
| |
| static void |
| dexSimpleFPCompare (sim_cpu *cpu) |
| { |
| /* assert instr[28,25] == 1111 |
| instr[30:24:21:13,10] = 0011000 |
| instr[31] = M : 0 ==> OK, 1 ==> UNALLOC |
| instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC |
| instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC |
| instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC |
| instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE, |
| 01000 ==> FCMPZ, 11000 ==> FCMPEZ, |
| ow ==> UNALLOC */ |
| uint32_t dispatch; |
| uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29); |
| uint32_t type = INSTR (23, 22); |
| uint32_t op = INSTR (15, 14); |
| uint32_t op2_2_0 = INSTR (2, 0); |
| |
| if (op2_2_0 != 0) |
| HALT_UNALLOC; |
| |
| if (M_S != 0) |
| HALT_UNALLOC; |
| |
| if (type > 1) |
| HALT_UNALLOC; |
| |
| if (op != 0) |
| HALT_UNALLOC; |
| |
| /* dispatch on type and top 2 bits of opcode. */ |
| dispatch = (type << 2) | INSTR (4, 3); |
| |
| switch (dispatch) |
| { |
| case 0: fcmps (cpu); return; |
| case 1: fcmpzs (cpu); return; |
| case 2: fcmpes (cpu); return; |
| case 3: fcmpzes (cpu); return; |
| case 4: fcmpd (cpu); return; |
| case 5: fcmpzd (cpu); return; |
| case 6: fcmped (cpu); return; |
| case 7: fcmpzed (cpu); return; |
| } |
| } |
| |
| static void |
| do_scalar_FADDP (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0111 1110 0 |
| instr [22] = single(0)/double(1) |
| instr [21,10] = 11 0000 1101 10 |
| instr [9,5] = Fn |
| instr [4,0] = Fd. */ |
| |
| unsigned Fn = INSTR (9, 5); |
| unsigned Fd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0FC); |
| NYI_assert (21, 10, 0xC36); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| double val1 = aarch64_get_vec_double (cpu, Fn, 0); |
| double val2 = aarch64_get_vec_double (cpu, Fn, 1); |
| |
| aarch64_set_FP_double (cpu, Fd, val1 + val2); |
| } |
| else |
| { |
| float val1 = aarch64_get_vec_float (cpu, Fn, 0); |
| float val2 = aarch64_get_vec_float (cpu, Fn, 1); |
| |
| aarch64_set_FP_float (cpu, Fd, val1 + val2); |
| } |
| } |
| |
| /* Floating point absolute difference. */ |
| |
| static void |
| do_scalar_FABD (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0111 1110 1 |
| instr [22] = float(0)/double(1) |
| instr [21] = 1 |
| instr [20,16] = Rm |
| instr [15,10] = 1101 01 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0FD); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 10, 0x35); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| aarch64_set_FP_double (cpu, rd, |
| fabs (aarch64_get_FP_double (cpu, rn) |
| - aarch64_get_FP_double (cpu, rm))); |
| else |
| aarch64_set_FP_float (cpu, rd, |
| fabsf (aarch64_get_FP_float (cpu, rn) |
| - aarch64_get_FP_float (cpu, rm))); |
| } |
| |
| static void |
| do_scalar_CMGT (sim_cpu *cpu) |
| { |
| /* instr [31,21] = 0101 1110 111 |
| instr [20,16] = Rm |
| instr [15,10] = 00 1101 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 21, 0x2F7); |
| NYI_assert (15, 10, 0x0D); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_u64 (cpu, rn, 0) > |
| aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L); |
| } |
| |
| static void |
| do_scalar_USHR (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0111 1111 0 |
| instr [22,16] = shift amount |
| instr [15,10] = 0000 01 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned amount = 128 - INSTR (22, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0FE); |
| NYI_assert (15, 10, 0x01); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_u64 (cpu, rn, 0) >> amount); |
| } |
| |
| static void |
| do_scalar_SSHL (sim_cpu *cpu) |
| { |
| /* instr [31,21] = 0101 1110 111 |
| instr [20,16] = Rm |
| instr [15,10] = 0100 01 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); |
| |
| NYI_assert (31, 21, 0x2F7); |
| NYI_assert (15, 10, 0x11); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (shift >= 0) |
| aarch64_set_vec_s64 (cpu, rd, 0, |
| aarch64_get_vec_s64 (cpu, rn, 0) << shift); |
| else |
| aarch64_set_vec_s64 (cpu, rd, 0, |
| aarch64_get_vec_s64 (cpu, rn, 0) >> - shift); |
| } |
| |
| /* Floating point scalar compare greater than or equal to 0. */ |
| static void |
| do_scalar_FCMGE_zero (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0111 1110 1 |
| instr [22,22] = size |
| instr [21,16] = 1000 00 |
| instr [15,10] = 1100 10 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned size = INSTR (22, 22); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0FD); |
| NYI_assert (21, 16, 0x20); |
| NYI_assert (15, 10, 0x32); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (size) |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0); |
| else |
| aarch64_set_vec_u32 (cpu, rd, 0, |
| aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0); |
| } |
| |
| /* Floating point scalar compare less than or equal to 0. */ |
| static void |
| do_scalar_FCMLE_zero (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0111 1110 1 |
| instr [22,22] = size |
| instr [21,16] = 1000 00 |
| instr [15,10] = 1101 10 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned size = INSTR (22, 22); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0FD); |
| NYI_assert (21, 16, 0x20); |
| NYI_assert (15, 10, 0x36); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (size) |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0); |
| else |
| aarch64_set_vec_u32 (cpu, rd, 0, |
| aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0); |
| } |
| |
| /* Floating point scalar compare greater than 0. */ |
| static void |
| do_scalar_FCMGT_zero (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0101 1110 1 |
| instr [22,22] = size |
| instr [21,16] = 1000 00 |
| instr [15,10] = 1100 10 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned size = INSTR (22, 22); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0BD); |
| NYI_assert (21, 16, 0x20); |
| NYI_assert (15, 10, 0x32); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (size) |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0); |
| else |
| aarch64_set_vec_u32 (cpu, rd, 0, |
| aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0); |
| } |
| |
| /* Floating point scalar compare equal to 0. */ |
| static void |
| do_scalar_FCMEQ_zero (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0101 1110 1 |
| instr [22,22] = size |
| instr [21,16] = 1000 00 |
| instr [15,10] = 1101 10 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned size = INSTR (22, 22); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0BD); |
| NYI_assert (21, 16, 0x20); |
| NYI_assert (15, 10, 0x36); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (size) |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0); |
| else |
| aarch64_set_vec_u32 (cpu, rd, 0, |
| aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0); |
| } |
| |
| /* Floating point scalar compare less than 0. */ |
| static void |
| do_scalar_FCMLT_zero (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0101 1110 1 |
| instr [22,22] = size |
| instr [21,16] = 1000 00 |
| instr [15,10] = 1110 10 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned size = INSTR (22, 22); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0BD); |
| NYI_assert (21, 16, 0x20); |
| NYI_assert (15, 10, 0x3A); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (size) |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0); |
| else |
| aarch64_set_vec_u32 (cpu, rd, 0, |
| aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0); |
| } |
| |
| static void |
| do_scalar_shift (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0101 1111 0 |
| instr [22,16] = shift amount |
| instr [15,10] = 0101 01 [SHL] |
| instr [15,10] = 0000 01 [SSHR] |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned amount; |
| |
| NYI_assert (31, 23, 0x0BE); |
| |
| if (INSTR (22, 22) == 0) |
| HALT_UNALLOC; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| switch (INSTR (15, 10)) |
| { |
| case 0x01: /* SSHR */ |
| amount = 128 - INSTR (22, 16); |
| aarch64_set_vec_s64 (cpu, rd, 0, |
| aarch64_get_vec_s64 (cpu, rn, 0) >> amount); |
| return; |
| case 0x15: /* SHL */ |
| amount = INSTR (22, 16) - 64; |
| aarch64_set_vec_u64 (cpu, rd, 0, |
| aarch64_get_vec_u64 (cpu, rn, 0) << amount); |
| return; |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| /* FCMEQ FCMGT FCMGE. */ |
| static void |
| do_scalar_FCM (sim_cpu *cpu) |
| { |
| /* instr [31,30] = 01 |
| instr [29] = U |
| instr [28,24] = 1 1110 |
| instr [23] = E |
| instr [22] = size |
| instr [21] = 1 |
| instr [20,16] = Rm |
| instr [15,12] = 1110 |
| instr [11] = AC |
| instr [10] = 1 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11); |
| unsigned result; |
| float val1; |
| float val2; |
| |
| NYI_assert (31, 30, 1); |
| NYI_assert (28, 24, 0x1E); |
| NYI_assert (21, 21, 1); |
| NYI_assert (15, 12, 0xE); |
| NYI_assert (10, 10, 1); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| double val1 = aarch64_get_FP_double (cpu, rn); |
| double val2 = aarch64_get_FP_double (cpu, rm); |
| |
| switch (EUac) |
| { |
| case 0: /* 000 */ |
| result = val1 == val2; |
| break; |
| |
| case 3: /* 011 */ |
| val1 = fabs (val1); |
| val2 = fabs (val2); |
| /* Fall through. */ |
| case 2: /* 010 */ |
| result = val1 >= val2; |
| break; |
| |
| case 7: /* 111 */ |
| val1 = fabs (val1); |
| val2 = fabs (val2); |
| /* Fall through. */ |
| case 6: /* 110 */ |
| result = val1 > val2; |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| |
| aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); |
| return; |
| } |
| |
| val1 = aarch64_get_FP_float (cpu, rn); |
| val2 = aarch64_get_FP_float (cpu, rm); |
| |
| switch (EUac) |
| { |
| case 0: /* 000 */ |
| result = val1 == val2; |
| break; |
| |
| case 3: /* 011 */ |
| val1 = fabsf (val1); |
| val2 = fabsf (val2); |
| /* Fall through. */ |
| case 2: /* 010 */ |
| result = val1 >= val2; |
| break; |
| |
| case 7: /* 111 */ |
| val1 = fabsf (val1); |
| val2 = fabsf (val2); |
| /* Fall through. */ |
| case 6: /* 110 */ |
| result = val1 > val2; |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| |
| aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0); |
| } |
| |
| /* An alias of DUP. */ |
| static void |
| do_scalar_MOV (sim_cpu *cpu) |
| { |
| /* instr [31,21] = 0101 1110 000 |
| instr [20,16] = imm5 |
| instr [15,10] = 0000 01 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| unsigned index; |
| |
| NYI_assert (31, 21, 0x2F0); |
| NYI_assert (15, 10, 0x01); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (16, 16)) |
| { |
| /* 8-bit. */ |
| index = INSTR (20, 17); |
| aarch64_set_vec_u8 |
| (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index)); |
| } |
| else if (INSTR (17, 17)) |
| { |
| /* 16-bit. */ |
| index = INSTR (20, 18); |
| aarch64_set_vec_u16 |
| (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index)); |
| } |
| else if (INSTR (18, 18)) |
| { |
| /* 32-bit. */ |
| index = INSTR (20, 19); |
| aarch64_set_vec_u32 |
| (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index)); |
| } |
| else if (INSTR (19, 19)) |
| { |
| /* 64-bit. */ |
| index = INSTR (20, 20); |
| aarch64_set_vec_u64 |
| (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index)); |
| } |
| else |
| HALT_UNALLOC; |
| } |
| |
| static void |
| do_scalar_NEG (sim_cpu *cpu) |
| { |
| /* instr [31,10] = 0111 1110 1110 0000 1011 10 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 10, 0x1FB82E); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0)); |
| } |
| |
| static void |
| do_scalar_USHL (sim_cpu *cpu) |
| { |
| /* instr [31,21] = 0111 1110 111 |
| instr [20,16] = Rm |
| instr [15,10] = 0100 01 |
| instr [9, 5] = Rn |
| instr [4, 0] = Rd. */ |
| |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| signed int shift = aarch64_get_vec_s8 (cpu, rm, 0); |
| |
| NYI_assert (31, 21, 0x3F7); |
| NYI_assert (15, 10, 0x11); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (shift >= 0) |
| aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift); |
| else |
| aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift); |
| } |
| |
| static void |
| do_double_add (sim_cpu *cpu) |
| { |
| /* instr [31,21] = 0101 1110 111 |
| instr [20,16] = Fn |
| instr [15,10] = 1000 01 |
| instr [9,5] = Fm |
| instr [4,0] = Fd. */ |
| unsigned Fd; |
| unsigned Fm; |
| unsigned Fn; |
| double val1; |
| double val2; |
| |
| NYI_assert (31, 21, 0x2F7); |
| NYI_assert (15, 10, 0x21); |
| |
| Fd = INSTR (4, 0); |
| Fm = INSTR (9, 5); |
| Fn = INSTR (20, 16); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| val1 = aarch64_get_FP_double (cpu, Fm); |
| val2 = aarch64_get_FP_double (cpu, Fn); |
| |
| aarch64_set_FP_double (cpu, Fd, val1 + val2); |
| } |
| |
| static void |
| do_scalar_UCVTF (sim_cpu *cpu) |
| { |
| /* instr [31,23] = 0111 1110 0 |
| instr [22] = single(0)/double(1) |
| instr [21,10] = 10 0001 1101 10 |
| instr [9,5] = rn |
| instr [4,0] = rd. */ |
| |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| NYI_assert (31, 23, 0x0FC); |
| NYI_assert (21, 10, 0x876); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (INSTR (22, 22)) |
| { |
| uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0); |
| |
| aarch64_set_vec_double (cpu, rd, 0, (double) val); |
| } |
| else |
| { |
| uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0); |
| |
| aarch64_set_vec_float (cpu, rd, 0, (float) val); |
| } |
| } |
| |
| static void |
| do_scalar_vec (sim_cpu *cpu) |
| { |
| /* instr [30] = 1. */ |
| /* instr [28,25] = 1111. */ |
| switch (INSTR (31, 23)) |
| { |
| case 0xBC: |
| switch (INSTR (15, 10)) |
| { |
| case 0x01: do_scalar_MOV (cpu); return; |
| case 0x39: do_scalar_FCM (cpu); return; |
| case 0x3B: do_scalar_FCM (cpu); return; |
| } |
| break; |
| |
| case 0xBE: do_scalar_shift (cpu); return; |
| |
| case 0xFC: |
| switch (INSTR (15, 10)) |
| { |
| case 0x36: |
| switch (INSTR (21, 16)) |
| { |
| case 0x30: do_scalar_FADDP (cpu); return; |
| case 0x21: do_scalar_UCVTF (cpu); return; |
| } |
| HALT_NYI; |
| case 0x39: do_scalar_FCM (cpu); return; |
| case 0x3B: do_scalar_FCM (cpu); return; |
| } |
| break; |
| |
| case 0xFD: |
| switch (INSTR (15, 10)) |
| { |
| case 0x0D: do_scalar_CMGT (cpu); return; |
| case 0x11: do_scalar_USHL (cpu); return; |
| case 0x2E: do_scalar_NEG (cpu); return; |
| case 0x32: do_scalar_FCMGE_zero (cpu); return; |
| case 0x35: do_scalar_FABD (cpu); return; |
| case 0x36: do_scalar_FCMLE_zero (cpu); return; |
| case 0x39: do_scalar_FCM (cpu); return; |
| case 0x3B: do_scalar_FCM (cpu); return; |
| default: |
| HALT_NYI; |
| } |
| |
| case 0xFE: do_scalar_USHR (cpu); return; |
| |
| case 0xBD: |
| switch (INSTR (15, 10)) |
| { |
| case 0x21: do_double_add (cpu); return; |
| case 0x11: do_scalar_SSHL (cpu); return; |
| case 0x32: do_scalar_FCMGT_zero (cpu); return; |
| case 0x36: do_scalar_FCMEQ_zero (cpu); return; |
| case 0x3A: do_scalar_FCMLT_zero (cpu); return; |
| default: |
| HALT_NYI; |
| } |
| |
| default: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| dexAdvSIMD1 (sim_cpu *cpu) |
| { |
| /* instr [28,25] = 1 111. */ |
| |
| /* We are currently only interested in the basic |
| scalar fp routines which all have bit 30 = 0. */ |
| if (INSTR (30, 30)) |
| do_scalar_vec (cpu); |
| |
| /* instr[24] is set for FP data processing 3-source and clear for |
| all other basic scalar fp instruction groups. */ |
| else if (INSTR (24, 24)) |
| dexSimpleFPDataProc3Source (cpu); |
| |
| /* instr[21] is clear for floating <-> fixed conversions and set for |
| all other basic scalar fp instruction groups. */ |
| else if (!INSTR (21, 21)) |
| dexSimpleFPFixedConvert (cpu); |
| |
| /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source |
| 11 ==> cond select, 00 ==> other. */ |
| else |
| switch (INSTR (11, 10)) |
| { |
| case 1: dexSimpleFPCondCompare (cpu); return; |
| case 2: dexSimpleFPDataProc2Source (cpu); return; |
| case 3: dexSimpleFPCondSelect (cpu); return; |
| |
| default: |
| /* Now an ordered cascade of tests. |
| FP immediate has instr [12] == 1. |
| FP compare has instr [13] == 1. |
| FP Data Proc 1 Source has instr [14] == 1. |
| FP floating <--> integer conversions has instr [15] == 0. */ |
| if (INSTR (12, 12)) |
| dexSimpleFPImmediate (cpu); |
| |
| else if (INSTR (13, 13)) |
| dexSimpleFPCompare (cpu); |
| |
| else if (INSTR (14, 14)) |
| dexSimpleFPDataProc1Source (cpu); |
| |
| else if (!INSTR (15, 15)) |
| dexSimpleFPIntegerConvert (cpu); |
| |
| else |
| /* If we get here then instr[15] == 1 which means UNALLOC. */ |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* PC relative addressing. */ |
| |
| static void |
| pcadr (sim_cpu *cpu) |
| { |
| /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP |
| instr[30,29] = immlo |
| instr[23,5] = immhi. */ |
| uint64_t address; |
| unsigned rd = INSTR (4, 0); |
| uint32_t isPage = INSTR (31, 31); |
| union { int64_t u64; uint64_t s64; } imm; |
| uint64_t offset; |
| |
| imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5); |
| offset = imm.u64; |
| offset = (offset << 2) | INSTR (30, 29); |
| |
| address = aarch64_get_PC (cpu); |
| |
| if (isPage) |
| { |
| offset <<= 12; |
| address &= ~0xfff; |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset); |
| } |
| |
| /* Specific decode and execute for group Data Processing Immediate. */ |
| |
| static void |
| dexPCRelAddressing (sim_cpu *cpu) |
| { |
| /* assert instr[28,24] = 10000. */ |
| pcadr (cpu); |
| } |
| |
| /* Immediate logical. |
| The bimm32/64 argument is constructed by replicating a 2, 4, 8, |
| 16, 32 or 64 bit sequence pulled out at decode and possibly |
| inverting it.. |
| |
| N.B. the output register (dest) can normally be Xn or SP |
| the exception occurs for flag setting instructions which may |
| only use Xn for the output (dest). The input register can |
| never be SP. */ |
| |
| /* 32 bit and immediate. */ |
| static void |
| and32 (sim_cpu *cpu, uint32_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm); |
| } |
| |
| /* 64 bit and immediate. */ |
| static void |
| and64 (sim_cpu *cpu, uint64_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm); |
| } |
| |
| /* 32 bit and immediate set flags. */ |
| static void |
| ands32 (sim_cpu *cpu, uint32_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t value2 = bimm; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); |
| set_flags_for_binop32 (cpu, value1 & value2); |
| } |
| |
| /* 64 bit and immediate set flags. */ |
| static void |
| ands64 (sim_cpu *cpu, uint64_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t value2 = bimm; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); |
| set_flags_for_binop64 (cpu, value1 & value2); |
| } |
| |
| /* 32 bit exclusive or immediate. */ |
| static void |
| eor32 (sim_cpu *cpu, uint32_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm); |
| } |
| |
| /* 64 bit exclusive or immediate. */ |
| static void |
| eor64 (sim_cpu *cpu, uint64_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm); |
| } |
| |
| /* 32 bit or immediate. */ |
| static void |
| orr32 (sim_cpu *cpu, uint32_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm); |
| } |
| |
| /* 64 bit or immediate. */ |
| static void |
| orr64 (sim_cpu *cpu, uint64_t bimm) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm); |
| } |
| |
| /* Logical shifted register. |
| These allow an optional LSL, ASR, LSR or ROR to the second source |
| register with a count up to the register bit count. |
| N.B register args may not be SP. */ |
| |
| /* 32 bit AND shifted register. */ |
| static void |
| and32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 64 bit AND shifted register. */ |
| static void |
| and64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 32 bit AND shifted register setting flags. */ |
| static void |
| ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); |
| set_flags_for_binop32 (cpu, value1 & value2); |
| } |
| |
| /* 64 bit AND shifted register setting flags. */ |
| static void |
| ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); |
| set_flags_for_binop64 (cpu, value1 & value2); |
| } |
| |
| /* 32 bit BIC shifted register. */ |
| static void |
| bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 64 bit BIC shifted register. */ |
| static void |
| bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 32 bit BIC shifted register setting flags. */ |
| static void |
| bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); |
| set_flags_for_binop32 (cpu, value1 & value2); |
| } |
| |
| /* 64 bit BIC shifted register setting flags. */ |
| static void |
| bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), |
| shift, count); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2); |
| set_flags_for_binop64 (cpu, value1 & value2); |
| } |
| |
| /* 32 bit EON shifted register. */ |
| static void |
| eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 64 bit EON shifted register. */ |
| static void |
| eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 32 bit EOR shifted register. */ |
| static void |
| eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 64 bit EOR shifted register. */ |
| static void |
| eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 32 bit ORR shifted register. */ |
| static void |
| orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 64 bit ORR shifted register. */ |
| static void |
| orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 32 bit ORN shifted register. */ |
| static void |
| orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| /* 64 bit ORN shifted register. */ |
| static void |
| orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count)); |
| } |
| |
| static void |
| dexLogicalImmediate (sim_cpu *cpu) |
| { |
| /* assert instr[28,23] = 1001000 |
| instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS |
| instr[22] = N : used to construct immediate mask |
| instr[21,16] = immr |
| instr[15,10] = imms |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ |
| uint32_t size = INSTR (31, 31); |
| uint32_t N = INSTR (22, 22); |
| /* uint32_t immr = INSTR (21, 16);. */ |
| /* uint32_t imms = INSTR (15, 10);. */ |
| uint32_t index = INSTR (22, 10); |
| uint64_t bimm64 = LITable [index]; |
| uint32_t dispatch = INSTR (30, 29); |
| |
| if (~size & N) |
| HALT_UNALLOC; |
| |
| if (!bimm64) |
| HALT_UNALLOC; |
| |
| if (size == 0) |
| { |
| uint32_t bimm = (uint32_t) bimm64; |
| |
| switch (dispatch) |
| { |
| case 0: and32 (cpu, bimm); return; |
| case 1: orr32 (cpu, bimm); return; |
| case 2: eor32 (cpu, bimm); return; |
| case 3: ands32 (cpu, bimm); return; |
| } |
| } |
| else |
| { |
| switch (dispatch) |
| { |
| case 0: and64 (cpu, bimm64); return; |
| case 1: orr64 (cpu, bimm64); return; |
| case 2: eor64 (cpu, bimm64); return; |
| case 3: ands64 (cpu, bimm64); return; |
| } |
| } |
| HALT_UNALLOC; |
| } |
| |
| /* Immediate move. |
| The uimm argument is a 16 bit value to be inserted into the |
| target register the pos argument locates the 16 bit word in the |
| dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2, |
| 3} for 64 bit. |
| N.B register arg may not be SP so it should be. |
| accessed using the setGZRegisterXXX accessors. */ |
| |
| /* 32 bit move 16 bit immediate zero remaining shorts. */ |
| static void |
| movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos) |
| { |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16)); |
| } |
| |
| /* 64 bit move 16 bit immediate zero remaining shorts. */ |
| static void |
| movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos) |
| { |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16)); |
| } |
| |
| /* 32 bit move 16 bit immediate negated. */ |
| static void |
| movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos) |
| { |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU)); |
| } |
| |
| /* 64 bit move 16 bit immediate negated. */ |
| static void |
| movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos) |
| { |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16)) |
| ^ 0xffffffffffffffffULL)); |
| } |
| |
| /* 32 bit move 16 bit immediate keep remaining shorts. */ |
| static void |
| movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos) |
| { |
| unsigned rd = INSTR (4, 0); |
| uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP); |
| uint32_t value = val << (pos * 16); |
| uint32_t mask = ~(0xffffU << (pos * 16)); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); |
| } |
| |
| /* 64 bit move 16 it immediate keep remaining shorts. */ |
| static void |
| movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos) |
| { |
| unsigned rd = INSTR (4, 0); |
| uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP); |
| uint64_t value = (uint64_t) val << (pos * 16); |
| uint64_t mask = ~(0xffffULL << (pos * 16)); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask))); |
| } |
| |
| static void |
| dexMoveWideImmediate (sim_cpu *cpu) |
| { |
| /* assert instr[28:23] = 100101 |
| instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK |
| instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48 |
| instr[20,5] = uimm16 |
| instr[4,0] = Rd */ |
| |
| /* N.B. the (multiple of 16) shift is applied by the called routine, |
| we just pass the multiplier. */ |
| |
| uint32_t imm; |
| uint32_t size = INSTR (31, 31); |
| uint32_t op = INSTR (30, 29); |
| uint32_t shift = INSTR (22, 21); |
| |
| /* 32 bit can only shift 0 or 1 lot of 16. |
| anything else is an unallocated instruction. */ |
| if (size == 0 && (shift > 1)) |
| HALT_UNALLOC; |
| |
| if (op == 1) |
| HALT_UNALLOC; |
| |
| imm = INSTR (20, 5); |
| |
| if (size == 0) |
| { |
| if (op == 0) |
| movn32 (cpu, imm, shift); |
| else if (op == 2) |
| movz32 (cpu, imm, shift); |
| else |
| movk32 (cpu, imm, shift); |
| } |
| else |
| { |
| if (op == 0) |
| movn64 (cpu, imm, shift); |
| else if (op == 2) |
| movz64 (cpu, imm, shift); |
| else |
| movk64 (cpu, imm, shift); |
| } |
| } |
| |
| /* Bitfield operations. |
| These take a pair of bit positions r and s which are in {0..31} |
| or {0..63} depending on the instruction word size. |
| N.B register args may not be SP. */ |
| |
| /* OK, we start with ubfm which just needs to pick |
| some bits out of source zero the rest and write |
| the result to dest. Just need two logical shifts. */ |
| |
| /* 32 bit bitfield move, left and right of affected zeroed |
| if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ |
| static void |
| ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) |
| { |
| unsigned rd; |
| unsigned rn = INSTR (9, 5); |
| uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| |
| /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ |
| if (r <= s) |
| { |
| /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. |
| We want only bits s:xxx:r at the bottom of the word |
| so we LSL bit s up to bit 31 i.e. by 31 - s |
| and then we LSR to bring bit 31 down to bit s - r |
| i.e. by 31 + r - s. */ |
| value <<= 31 - s; |
| value >>= 31 + r - s; |
| } |
| else |
| { |
| /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0 |
| We want only bits s:xxx:0 starting at it 31-(r-1) |
| so we LSL bit s up to bit 31 i.e. by 31 - s |
| and then we LSL to bring bit 31 down to 31-(r-1)+s |
| i.e. by r - (s + 1). */ |
| value <<= 31 - s; |
| value >>= r - (s + 1); |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| rd = INSTR (4, 0); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value); |
| } |
| |
| /* 64 bit bitfield move, left and right of affected zeroed |
| if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ |
| static void |
| ubfm (sim_cpu *cpu, uint32_t r, uint32_t s) |
| { |
| unsigned rd; |
| unsigned rn = INSTR (9, 5); |
| uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| |
| if (r <= s) |
| { |
| /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. |
| We want only bits s:xxx:r at the bottom of the word. |
| So we LSL bit s up to bit 63 i.e. by 63 - s |
| and then we LSR to bring bit 63 down to bit s - r |
| i.e. by 63 + r - s. */ |
| value <<= 63 - s; |
| value >>= 63 + r - s; |
| } |
| else |
| { |
| /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0. |
| We want only bits s:xxx:0 starting at it 63-(r-1). |
| So we LSL bit s up to bit 63 i.e. by 63 - s |
| and then we LSL to bring bit 63 down to 63-(r-1)+s |
| i.e. by r - (s + 1). */ |
| value <<= 63 - s; |
| value >>= r - (s + 1); |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| rd = INSTR (4, 0); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, value); |
| } |
| |
| /* The signed versions need to insert sign bits |
| on the left of the inserted bit field. so we do |
| much the same as the unsigned version except we |
| use an arithmetic shift right -- this just means |
| we need to operate on signed values. */ |
| |
| /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */ |
| /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ |
| static void |
| sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) |
| { |
| unsigned rd; |
| unsigned rn = INSTR (9, 5); |
| /* as per ubfm32 but use an ASR instead of an LSR. */ |
| int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP); |
| |
| if (r <= s) |
| { |
| value <<= 31 - s; |
| value >>= 31 + r - s; |
| } |
| else |
| { |
| value <<= 31 - s; |
| value >>= r - (s + 1); |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| rd = INSTR (4, 0); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value); |
| } |
| |
| /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */ |
| /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ |
| static void |
| sbfm (sim_cpu *cpu, uint32_t r, uint32_t s) |
| { |
| unsigned rd; |
| unsigned rn = INSTR (9, 5); |
| /* acpu per ubfm but use an ASR instead of an LSR. */ |
| int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP); |
| |
| if (r <= s) |
| { |
| value <<= 63 - s; |
| value >>= 63 + r - s; |
| } |
| else |
| { |
| value <<= 63 - s; |
| value >>= r - (s + 1); |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| rd = INSTR (4, 0); |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, value); |
| } |
| |
| /* Finally, these versions leave non-affected bits |
| as is. so we need to generate the bits as per |
| ubfm and also generate a mask to pick the |
| bits from the original and computed values. */ |
| |
| /* 32 bit bitfield move, non-affected bits left as is. |
| If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */ |
| static void |
| bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s) |
| { |
| unsigned rn = INSTR (9, 5); |
| uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t mask = -1; |
| unsigned rd; |
| uint32_t value2; |
| |
| /* Pick either s+1-r or s+1 consecutive bits out of the original word. */ |
| if (r <= s) |
| { |
| /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0. |
| We want only bits s:xxx:r at the bottom of the word |
| so we LSL bit s up to bit 31 i.e. by 31 - s |
| and then we LSR to bring bit 31 down to bit s - r |
| i.e. by 31 + r - s. */ |
| value <<= 31 - s; |
| value >>= 31 + r - s; |
| /* the mask must include the same bits. */ |
| mask <<= 31 - s; |
| mask >>= 31 + r - s; |
| } |
| else |
| { |
| /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0. |
| We want only bits s:xxx:0 starting at it 31-(r-1) |
| so we LSL bit s up to bit 31 i.e. by 31 - s |
| and then we LSL to bring bit 31 down to 31-(r-1)+s |
| i.e. by r - (s + 1). */ |
| value <<= 31 - s; |
| value >>= r - (s + 1); |
| /* The mask must include the same bits. */ |
| mask <<= 31 - s; |
| mask >>= r - (s + 1); |
| } |
| |
| rd = INSTR (4, 0); |
| value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP); |
| |
| value2 &= ~mask; |
| value2 |= value; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value); |
| } |
| |
| /* 64 bit bitfield move, non-affected bits left as is. |
| If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */ |
| static void |
| bfm (sim_cpu *cpu, uint32_t r, uint32_t s) |
| { |
| unsigned rd; |
| unsigned rn = INSTR (9, 5); |
| uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t mask = 0xffffffffffffffffULL; |
| |
| if (r <= s) |
| { |
| /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0. |
| We want only bits s:xxx:r at the bottom of the word |
| so we LSL bit s up to bit 63 i.e. by 63 - s |
| and then we LSR to bring bit 63 down to bit s - r |
| i.e. by 63 + r - s. */ |
| value <<= 63 - s; |
| value >>= 63 + r - s; |
| /* The mask must include the same bits. */ |
| mask <<= 63 - s; |
| mask >>= 63 + r - s; |
| } |
| else |
| { |
| /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0 |
| We want only bits s:xxx:0 starting at it 63-(r-1) |
| so we LSL bit s up to bit 63 i.e. by 63 - s |
| and then we LSL to bring bit 63 down to 63-(r-1)+s |
| i.e. by r - (s + 1). */ |
| value <<= 63 - s; |
| value >>= r - (s + 1); |
| /* The mask must include the same bits. */ |
| mask <<= 63 - s; |
| mask >>= r - (s + 1); |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| rd = INSTR (4, 0); |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value); |
| } |
| |
| static void |
| dexBitfieldImmediate (sim_cpu *cpu) |
| { |
| /* assert instr[28:23] = 100110 |
| instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC |
| instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC |
| instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit |
| instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ |
| uint32_t dispatch; |
| uint32_t imms; |
| uint32_t size = INSTR (31, 31); |
| uint32_t N = INSTR (22, 22); |
| /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */ |
| /* or else we have an UNALLOC. */ |
| uint32_t immr = INSTR (21, 16); |
| |
| if (~size & N) |
| HALT_UNALLOC; |
| |
| if (!size && uimm (immr, 5, 5)) |
| HALT_UNALLOC; |
| |
| imms = INSTR (15, 10); |
| if (!size && uimm (imms, 5, 5)) |
| HALT_UNALLOC; |
| |
| /* Switch on combined size and op. */ |
| dispatch = INSTR (31, 29); |
| switch (dispatch) |
| { |
| case 0: sbfm32 (cpu, immr, imms); return; |
| case 1: bfm32 (cpu, immr, imms); return; |
| case 2: ubfm32 (cpu, immr, imms); return; |
| case 4: sbfm (cpu, immr, imms); return; |
| case 5: bfm (cpu, immr, imms); return; |
| case 6: ubfm (cpu, immr, imms); return; |
| default: HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_EXTR_32 (sim_cpu *cpu) |
| { |
| /* instr[31:21] = 00010011100 |
| instr[20,16] = Rm |
| instr[15,10] = imms : 0xxxxx for 32 bit |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| unsigned rm = INSTR (20, 16); |
| unsigned imms = INSTR (15, 10) & 31; |
| unsigned rn = INSTR ( 9, 5); |
| unsigned rd = INSTR ( 4, 0); |
| uint64_t val1; |
| uint64_t val2; |
| |
| val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP); |
| val1 >>= imms; |
| val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| val2 <<= (32 - imms); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2); |
| } |
| |
| static void |
| do_EXTR_64 (sim_cpu *cpu) |
| { |
| /* instr[31:21] = 10010011100 |
| instr[20,16] = Rm |
| instr[15,10] = imms |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| unsigned rm = INSTR (20, 16); |
| unsigned imms = INSTR (15, 10) & 63; |
| unsigned rn = INSTR ( 9, 5); |
| unsigned rd = INSTR ( 4, 0); |
| uint64_t val; |
| |
| val = aarch64_get_reg_u64 (cpu, rm, NO_SP); |
| val >>= imms; |
| val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms)); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, val); |
| } |
| |
| static void |
| dexExtractImmediate (sim_cpu *cpu) |
| { |
| /* assert instr[28:23] = 100111 |
| instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC |
| instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC |
| instr[21] = op0 : must be 0 or UNALLOC |
| instr[20,16] = Rm |
| instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| /* 32 bit operations must have N = 0 or else we have an UNALLOC. */ |
| /* 64 bit operations must have N = 1 or else we have an UNALLOC. */ |
| uint32_t dispatch; |
| uint32_t size = INSTR (31, 31); |
| uint32_t N = INSTR (22, 22); |
| /* 32 bit operations must have imms[5] = 0 |
| or else we have an UNALLOC. */ |
| uint32_t imms = INSTR (15, 10); |
| |
| if (size ^ N) |
| HALT_UNALLOC; |
| |
| if (!size && uimm (imms, 5, 5)) |
| HALT_UNALLOC; |
| |
| /* Switch on combined size and op. */ |
| dispatch = INSTR (31, 29); |
| |
| if (dispatch == 0) |
| do_EXTR_32 (cpu); |
| |
| else if (dispatch == 4) |
| do_EXTR_64 (cpu); |
| |
| else if (dispatch == 1) |
| HALT_NYI; |
| else |
| HALT_UNALLOC; |
| } |
| |
| static void |
| dexDPImm (sim_cpu *cpu) |
| { |
| /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); |
| assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001 |
| bits [25,23] of a DPImm are the secondary dispatch vector. */ |
| uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu)); |
| |
| switch (group2) |
| { |
| case DPIMM_PCADR_000: |
| case DPIMM_PCADR_001: |
| dexPCRelAddressing (cpu); |
| return; |
| |
| case DPIMM_ADDSUB_010: |
| case DPIMM_ADDSUB_011: |
| dexAddSubtractImmediate (cpu); |
| return; |
| |
| case DPIMM_LOG_100: |
| dexLogicalImmediate (cpu); |
| return; |
| |
| case DPIMM_MOV_101: |
| dexMoveWideImmediate (cpu); |
| return; |
| |
| case DPIMM_BITF_110: |
| dexBitfieldImmediate (cpu); |
| return; |
| |
| case DPIMM_EXTR_111: |
| dexExtractImmediate (cpu); |
| return; |
| |
| default: |
| /* Should never reach here. */ |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| dexLoadUnscaledImmediate (sim_cpu *cpu) |
| { |
| /* instr[29,24] == 111_00 |
| instr[21] == 0 |
| instr[11,10] == 00 |
| instr[31,30] = size |
| instr[26] = V |
| instr[23,22] = opc |
| instr[20,12] = simm9 |
| instr[9,5] = rn may be SP. */ |
| /* unsigned rt = INSTR (4, 0); */ |
| uint32_t V = INSTR (26, 26); |
| uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); |
| int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); |
| |
| if (!V) |
| { |
| /* GReg operations. */ |
| switch (dispatch) |
| { |
| case 0: sturb (cpu, imm); return; |
| case 1: ldurb32 (cpu, imm); return; |
| case 2: ldursb64 (cpu, imm); return; |
| case 3: ldursb32 (cpu, imm); return; |
| case 4: sturh (cpu, imm); return; |
| case 5: ldurh32 (cpu, imm); return; |
| case 6: ldursh64 (cpu, imm); return; |
| case 7: ldursh32 (cpu, imm); return; |
| case 8: stur32 (cpu, imm); return; |
| case 9: ldur32 (cpu, imm); return; |
| case 10: ldursw (cpu, imm); return; |
| case 12: stur64 (cpu, imm); return; |
| case 13: ldur64 (cpu, imm); return; |
| |
| case 14: |
| /* PRFUM NYI. */ |
| HALT_NYI; |
| |
| default: |
| case 11: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* FReg operations. */ |
| switch (dispatch) |
| { |
| case 2: fsturq (cpu, imm); return; |
| case 3: fldurq (cpu, imm); return; |
| case 8: fsturs (cpu, imm); return; |
| case 9: fldurs (cpu, imm); return; |
| case 12: fsturd (cpu, imm); return; |
| case 13: fldurd (cpu, imm); return; |
| |
| case 0: /* STUR 8 bit FP. */ |
| case 1: /* LDUR 8 bit FP. */ |
| case 4: /* STUR 16 bit FP. */ |
| case 5: /* LDUR 8 bit FP. */ |
| HALT_NYI; |
| |
| default: |
| case 6: |
| case 7: |
| case 10: |
| case 11: |
| case 14: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* N.B. A preliminary note regarding all the ldrs<x>32 |
| instructions |
| |
| The signed value loaded by these instructions is cast to unsigned |
| before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the |
| 64 bit element of the GReg union. this performs a 32 bit sign extension |
| (as required) but avoids 64 bit sign extension, thus ensuring that the |
| top half of the register word is zero. this is what the spec demands |
| when a 32 bit load occurs. */ |
| |
| /* 32 bit load sign-extended byte scaled unsigned 12 bit. */ |
| static void |
| ldrsb32_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned int rn = INSTR (9, 5); |
| unsigned int rt = INSTR (4, 0); |
| |
| /* The target register may not be SP but the source may be |
| there is no scaling required for a byte load. */ |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset; |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, |
| (int64_t) aarch64_get_mem_s8 (cpu, address)); |
| } |
| |
| /* 32 bit load sign-extended byte scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned int rm = INSTR (20, 16); |
| unsigned int rn = INSTR (9, 5); |
| unsigned int rt = INSTR (4, 0); |
| |
| /* rn may reference SP, rm and rt must reference ZR. */ |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| |
| /* There is no scaling required for a byte load. */ |
| aarch64_set_reg_u64 |
| (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address |
| + displacement)); |
| } |
| |
| /* 32 bit load sign-extended byte unscaled signed 9 bit with |
| pre- or post-writeback. */ |
| static void |
| ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| uint64_t address; |
| unsigned int rn = INSTR (9, 5); |
| unsigned int rt = INSTR (4, 0); |
| |
| if (rn == rt && wb != NoWriteBack) |
| HALT_UNALLOC; |
| |
| address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb == Pre) |
| address += offset; |
| |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, |
| (int64_t) aarch64_get_mem_s8 (cpu, address)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, NO_SP, address); |
| } |
| |
| /* 8 bit store scaled. */ |
| static void |
| fstrb_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned st = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| |
| aarch64_set_mem_u8 (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset, |
| aarch64_get_vec_u8 (cpu, st, 0)); |
| } |
| |
| /* 8 bit store scaled or unscaled zero- or |
| sign-extended 8-bit register offset. */ |
| static void |
| fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| uint64_t displacement = scaling == Scaled ? extended : 0; |
| |
| aarch64_set_mem_u8 |
| (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0)); |
| } |
| |
| /* 16 bit store scaled. */ |
| static void |
| fstrh_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned st = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| |
| aarch64_set_mem_u16 |
| (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16), |
| aarch64_get_vec_u16 (cpu, st, 0)); |
| } |
| |
| /* 16 bit store scaled or unscaled zero- |
| or sign-extended 16-bit register offset. */ |
| static void |
| fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| uint64_t displacement = OPT_SCALE (extended, 16, scaling); |
| |
| aarch64_set_mem_u16 |
| (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0)); |
| } |
| |
| /* 32 bit store scaled unsigned 12 bit. */ |
| static void |
| fstrs_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned st = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| |
| aarch64_set_mem_u32 |
| (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32), |
| aarch64_get_vec_u32 (cpu, st, 0)); |
| } |
| |
| /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 32 bit store scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| uint64_t displacement = OPT_SCALE (extended, 32, scaling); |
| |
| aarch64_set_mem_u32 |
| (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0)); |
| } |
| |
| /* 64 bit store scaled unsigned 12 bit. */ |
| static void |
| fstrd_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| unsigned st = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| |
| aarch64_set_mem_u64 |
| (cpu, |
| aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64), |
| aarch64_get_vec_u64 (cpu, st, 0)); |
| } |
| |
| /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 64 bit store scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| uint64_t displacement = OPT_SCALE (extended, 64, scaling); |
| |
| aarch64_set_mem_u64 |
| (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0)); |
| } |
| |
| /* 128 bit store scaled unsigned 12 bit. */ |
| static void |
| fstrq_abs (sim_cpu *cpu, uint32_t offset) |
| { |
| FRegister a; |
| unsigned st = INSTR (4, 0); |
| unsigned rn = INSTR (9, 5); |
| uint64_t addr; |
| |
| aarch64_get_FP_long_double (cpu, st, & a); |
| |
| addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128); |
| aarch64_set_mem_long_double (cpu, addr, a); |
| } |
| |
| /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */ |
| static void |
| fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| FRegister a; |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_get_FP_long_double (cpu, st, & a); |
| aarch64_set_mem_long_double (cpu, address, a); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, address); |
| } |
| |
| /* 128 bit store scaled or unscaled zero- |
| or sign-extended 32-bit register offset. */ |
| static void |
| fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned st = INSTR (4, 0); |
| |
| uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK); |
| int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), |
| extension); |
| uint64_t displacement = OPT_SCALE (extended, 128, scaling); |
| |
| FRegister a; |
| |
| aarch64_get_FP_long_double (cpu, st, & a); |
| aarch64_set_mem_long_double (cpu, address + displacement, a); |
| } |
| |
| static void |
| dexLoadImmediatePrePost (sim_cpu *cpu) |
| { |
| /* instr[31,30] = size |
| instr[29,27] = 111 |
| instr[26] = V |
| instr[25,24] = 00 |
| instr[23,22] = opc |
| instr[21] = 0 |
| instr[20,12] = simm9 |
| instr[11] = wb : 0 ==> Post, 1 ==> Pre |
| instr[10] = 0 |
| instr[9,5] = Rn may be SP. |
| instr[4,0] = Rt */ |
| |
| uint32_t V = INSTR (26, 26); |
| uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); |
| int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12); |
| WriteBack wb = INSTR (11, 11); |
| |
| if (!V) |
| { |
| /* GReg operations. */ |
| switch (dispatch) |
| { |
| case 0: strb_wb (cpu, imm, wb); return; |
| case 1: ldrb32_wb (cpu, imm, wb); return; |
| case 2: ldrsb_wb (cpu, imm, wb); return; |
| case 3: ldrsb32_wb (cpu, imm, wb); return; |
| case 4: strh_wb (cpu, imm, wb); return; |
| case 5: ldrh32_wb (cpu, imm, wb); return; |
| case 6: ldrsh64_wb (cpu, imm, wb); return; |
| case 7: ldrsh32_wb (cpu, imm, wb); return; |
| case 8: str32_wb (cpu, imm, wb); return; |
| case 9: ldr32_wb (cpu, imm, wb); return; |
| case 10: ldrsw_wb (cpu, imm, wb); return; |
| case 12: str_wb (cpu, imm, wb); return; |
| case 13: ldr_wb (cpu, imm, wb); return; |
| |
| default: |
| case 11: |
| case 14: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* FReg operations. */ |
| switch (dispatch) |
| { |
| case 2: fstrq_wb (cpu, imm, wb); return; |
| case 3: fldrq_wb (cpu, imm, wb); return; |
| case 8: fstrs_wb (cpu, imm, wb); return; |
| case 9: fldrs_wb (cpu, imm, wb); return; |
| case 12: fstrd_wb (cpu, imm, wb); return; |
| case 13: fldrd_wb (cpu, imm, wb); return; |
| |
| case 0: /* STUR 8 bit FP. */ |
| case 1: /* LDUR 8 bit FP. */ |
| case 4: /* STUR 16 bit FP. */ |
| case 5: /* LDUR 8 bit FP. */ |
| HALT_NYI; |
| |
| default: |
| case 6: |
| case 7: |
| case 10: |
| case 11: |
| case 14: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| dexLoadRegisterOffset (sim_cpu *cpu) |
| { |
| /* instr[31,30] = size |
| instr[29,27] = 111 |
| instr[26] = V |
| instr[25,24] = 00 |
| instr[23,22] = opc |
| instr[21] = 1 |
| instr[20,16] = rm |
| instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL, |
| 110 ==> SXTW, 111 ==> SXTX, |
| ow ==> RESERVED |
| instr[12] = scaled |
| instr[11,10] = 10 |
| instr[9,5] = rn |
| instr[4,0] = rt. */ |
| |
| uint32_t V = INSTR (26, 26); |
| uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); |
| Scaling scale = INSTR (12, 12); |
| Extension extensionType = INSTR (15, 13); |
| |
| /* Check for illegal extension types. */ |
| if (uimm (extensionType, 1, 1) == 0) |
| HALT_UNALLOC; |
| |
| if (extensionType == UXTX || extensionType == SXTX) |
| extensionType = NoExtension; |
| |
| if (!V) |
| { |
| /* GReg operations. */ |
| switch (dispatch) |
| { |
| case 0: strb_scale_ext (cpu, scale, extensionType); return; |
| case 1: ldrb32_scale_ext (cpu, scale, extensionType); return; |
| case 2: ldrsb_scale_ext (cpu, scale, extensionType); return; |
| case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return; |
| case 4: strh_scale_ext (cpu, scale, extensionType); return; |
| case 5: ldrh32_scale_ext (cpu, scale, extensionType); return; |
| case 6: ldrsh_scale_ext (cpu, scale, extensionType); return; |
| case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return; |
| case 8: str32_scale_ext (cpu, scale, extensionType); return; |
| case 9: ldr32_scale_ext (cpu, scale, extensionType); return; |
| case 10: ldrsw_scale_ext (cpu, scale, extensionType); return; |
| case 12: str_scale_ext (cpu, scale, extensionType); return; |
| case 13: ldr_scale_ext (cpu, scale, extensionType); return; |
| case 14: prfm_scale_ext (cpu, scale, extensionType); return; |
| |
| default: |
| case 11: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* FReg operations. */ |
| switch (dispatch) |
| { |
| case 1: /* LDUR 8 bit FP. */ |
| HALT_NYI; |
| case 3: fldrq_scale_ext (cpu, scale, extensionType); return; |
| case 5: /* LDUR 8 bit FP. */ |
| HALT_NYI; |
| case 9: fldrs_scale_ext (cpu, scale, extensionType); return; |
| case 13: fldrd_scale_ext (cpu, scale, extensionType); return; |
| |
| case 0: fstrb_scale_ext (cpu, scale, extensionType); return; |
| case 2: fstrq_scale_ext (cpu, scale, extensionType); return; |
| case 4: fstrh_scale_ext (cpu, scale, extensionType); return; |
| case 8: fstrs_scale_ext (cpu, scale, extensionType); return; |
| case 12: fstrd_scale_ext (cpu, scale, extensionType); return; |
| |
| default: |
| case 6: |
| case 7: |
| case 10: |
| case 11: |
| case 14: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| dexLoadUnsignedImmediate (sim_cpu *cpu) |
| { |
| /* instr[29,24] == 111_01 |
| instr[31,30] = size |
| instr[26] = V |
| instr[23,22] = opc |
| instr[21,10] = uimm12 : unsigned immediate offset |
| instr[9,5] = rn may be SP. |
| instr[4,0] = rt. */ |
| |
| uint32_t V = INSTR (26,26); |
| uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22)); |
| uint32_t imm = INSTR (21, 10); |
| |
| if (!V) |
| { |
| /* GReg operations. */ |
| switch (dispatch) |
| { |
| case 0: strb_abs (cpu, imm); return; |
| case 1: ldrb32_abs (cpu, imm); return; |
| case 2: ldrsb_abs (cpu, imm); return; |
| case 3: ldrsb32_abs (cpu, imm); return; |
| case 4: strh_abs (cpu, imm); return; |
| case 5: ldrh32_abs (cpu, imm); return; |
| case 6: ldrsh_abs (cpu, imm); return; |
| case 7: ldrsh32_abs (cpu, imm); return; |
| case 8: str32_abs (cpu, imm); return; |
| case 9: ldr32_abs (cpu, imm); return; |
| case 10: ldrsw_abs (cpu, imm); return; |
| case 12: str_abs (cpu, imm); return; |
| case 13: ldr_abs (cpu, imm); return; |
| case 14: prfm_abs (cpu, imm); return; |
| |
| default: |
| case 11: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* FReg operations. */ |
| switch (dispatch) |
| { |
| case 0: fstrb_abs (cpu, imm); return; |
| case 4: fstrh_abs (cpu, imm); return; |
| case 8: fstrs_abs (cpu, imm); return; |
| case 12: fstrd_abs (cpu, imm); return; |
| case 2: fstrq_abs (cpu, imm); return; |
| |
| case 1: fldrb_abs (cpu, imm); return; |
| case 5: fldrh_abs (cpu, imm); return; |
| case 9: fldrs_abs (cpu, imm); return; |
| case 13: fldrd_abs (cpu, imm); return; |
| case 3: fldrq_abs (cpu, imm); return; |
| |
| default: |
| case 6: |
| case 7: |
| case 10: |
| case 11: |
| case 14: |
| case 15: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| dexLoadExclusive (sim_cpu *cpu) |
| { |
| /* assert instr[29:24] = 001000; |
| instr[31,30] = size |
| instr[23] = 0 if exclusive |
| instr[22] = L : 1 if load, 0 if store |
| instr[21] = 1 if pair |
| instr[20,16] = Rs |
| instr[15] = o0 : 1 if ordered |
| instr[14,10] = Rt2 |
| instr[9,5] = Rn |
| instr[4.0] = Rt. */ |
| |
| switch (INSTR (22, 21)) |
| { |
| case 2: ldxr (cpu); return; |
| case 0: stxr (cpu); return; |
| default: HALT_NYI; |
| } |
| } |
| |
| static void |
| dexLoadOther (sim_cpu *cpu) |
| { |
| uint32_t dispatch; |
| |
| /* instr[29,25] = 111_0 |
| instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate |
| instr[21:11,10] is the secondary dispatch. */ |
| if (INSTR (24, 24)) |
| { |
| dexLoadUnsignedImmediate (cpu); |
| return; |
| } |
| |
| dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10)); |
| switch (dispatch) |
| { |
| case 0: dexLoadUnscaledImmediate (cpu); return; |
| case 1: dexLoadImmediatePrePost (cpu); return; |
| case 3: dexLoadImmediatePrePost (cpu); return; |
| case 6: dexLoadRegisterOffset (cpu); return; |
| |
| default: |
| case 2: |
| case 4: |
| case 5: |
| case 7: |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| if ((rn == rd || rm == rd) && wb != NoWriteBack) |
| HALT_UNALLOC; /* ??? */ |
| |
| offset <<= 2; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_mem_u32 (cpu, address, |
| aarch64_get_reg_u32 (cpu, rm, NO_SP)); |
| aarch64_set_mem_u32 (cpu, address + 4, |
| aarch64_get_reg_u32 (cpu, rn, NO_SP)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| if ((rn == rd || rm == rd) && wb != NoWriteBack) |
| HALT_UNALLOC; /* ??? */ |
| |
| offset <<= 3; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_mem_u64 (cpu, address, |
| aarch64_get_reg_u64 (cpu, rm, NO_SP)); |
| aarch64_set_mem_u64 (cpu, address + 8, |
| aarch64_get_reg_u64 (cpu, rn, NO_SP)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| /* Treat this as unalloc to make sure we don't do it. */ |
| if (rn == rm) |
| HALT_UNALLOC; |
| |
| offset <<= 2; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address)); |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| /* Treat this as unalloc to make sure we don't do it. */ |
| if (rn == rm) |
| HALT_UNALLOC; |
| |
| offset <<= 2; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address)); |
| aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| /* Treat this as unalloc to make sure we don't do it. */ |
| if (rn == rm) |
| HALT_UNALLOC; |
| |
| offset <<= 3; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address)); |
| aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| dex_load_store_pair_gr (sim_cpu *cpu) |
| { |
| /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit) |
| instr[29,25] = instruction encoding: 101_0 |
| instr[26] = V : 1 if fp 0 if gp |
| instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) |
| instr[22] = load/store (1=> load) |
| instr[21,15] = signed, scaled, offset |
| instr[14,10] = Rn |
| instr[ 9, 5] = Rd |
| instr[ 4, 0] = Rm. */ |
| |
| uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); |
| int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); |
| |
| switch (dispatch) |
| { |
| case 2: store_pair_u32 (cpu, offset, Post); return; |
| case 3: load_pair_u32 (cpu, offset, Post); return; |
| case 4: store_pair_u32 (cpu, offset, NoWriteBack); return; |
| case 5: load_pair_u32 (cpu, offset, NoWriteBack); return; |
| case 6: store_pair_u32 (cpu, offset, Pre); return; |
| case 7: load_pair_u32 (cpu, offset, Pre); return; |
| |
| case 11: load_pair_s32 (cpu, offset, Post); return; |
| case 13: load_pair_s32 (cpu, offset, NoWriteBack); return; |
| case 15: load_pair_s32 (cpu, offset, Pre); return; |
| |
| case 18: store_pair_u64 (cpu, offset, Post); return; |
| case 19: load_pair_u64 (cpu, offset, Post); return; |
| case 20: store_pair_u64 (cpu, offset, NoWriteBack); return; |
| case 21: load_pair_u64 (cpu, offset, NoWriteBack); return; |
| case 22: store_pair_u64 (cpu, offset, Pre); return; |
| case 23: load_pair_u64 (cpu, offset, Pre); return; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| offset <<= 2; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0)); |
| aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| offset <<= 3; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0)); |
| aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| FRegister a; |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| offset <<= 4; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_get_FP_long_double (cpu, rm, & a); |
| aarch64_set_mem_long_double (cpu, address, a); |
| aarch64_get_FP_long_double (cpu, rn, & a); |
| aarch64_set_mem_long_double (cpu, address + 16, a); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| if (rm == rn) |
| HALT_UNALLOC; |
| |
| offset <<= 2; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address)); |
| aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| if (rm == rn) |
| HALT_UNALLOC; |
| |
| offset <<= 3; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address)); |
| aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8)); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb) |
| { |
| FRegister a; |
| unsigned rn = INSTR (14, 10); |
| unsigned rd = INSTR (9, 5); |
| unsigned rm = INSTR (4, 0); |
| uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK); |
| |
| if (rm == rn) |
| HALT_UNALLOC; |
| |
| offset <<= 4; |
| |
| if (wb != Post) |
| address += offset; |
| |
| aarch64_get_mem_long_double (cpu, address, & a); |
| aarch64_set_FP_long_double (cpu, rm, a); |
| aarch64_get_mem_long_double (cpu, address + 16, & a); |
| aarch64_set_FP_long_double (cpu, rn, a); |
| |
| if (wb == Post) |
| address += offset; |
| |
| if (wb != NoWriteBack) |
| aarch64_set_reg_u64 (cpu, rd, SP_OK, address); |
| } |
| |
| static void |
| dex_load_store_pair_fp (sim_cpu *cpu) |
| { |
| /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit) |
| instr[29,25] = instruction encoding |
| instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre) |
| instr[22] = load/store (1=> load) |
| instr[21,15] = signed, scaled, offset |
| instr[14,10] = Rn |
| instr[ 9, 5] = Rd |
| instr[ 4, 0] = Rm */ |
| |
| uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22)); |
| int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15); |
| |
| switch (dispatch) |
| { |
| case 2: store_pair_float (cpu, offset, Post); return; |
| case 3: load_pair_float (cpu, offset, Post); return; |
| case 4: store_pair_float (cpu, offset, NoWriteBack); return; |
| case 5: load_pair_float (cpu, offset, NoWriteBack); return; |
| case 6: store_pair_float (cpu, offset, Pre); return; |
| case 7: load_pair_float (cpu, offset, Pre); return; |
| |
| case 10: store_pair_double (cpu, offset, Post); return; |
| case 11: load_pair_double (cpu, offset, Post); return; |
| case 12: store_pair_double (cpu, offset, NoWriteBack); return; |
| case 13: load_pair_double (cpu, offset, NoWriteBack); return; |
| case 14: store_pair_double (cpu, offset, Pre); return; |
| case 15: load_pair_double (cpu, offset, Pre); return; |
| |
| case 18: store_pair_long_double (cpu, offset, Post); return; |
| case 19: load_pair_long_double (cpu, offset, Post); return; |
| case 20: store_pair_long_double (cpu, offset, NoWriteBack); return; |
| case 21: load_pair_long_double (cpu, offset, NoWriteBack); return; |
| case 22: store_pair_long_double (cpu, offset, Pre); return; |
| case 23: load_pair_long_double (cpu, offset, Pre); return; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static inline unsigned |
| vec_reg (unsigned v, unsigned o) |
| { |
| return (v + o) & 0x3F; |
| } |
| |
| /* Load multiple N-element structures to M consecutive registers. */ |
| static void |
| vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M) |
| { |
| int all = INSTR (30, 30); |
| unsigned size = INSTR (11, 10); |
| unsigned vd = INSTR (4, 0); |
| unsigned rpt = (N == M) ? 1 : M; |
| unsigned selem = N; |
| unsigned i, j, k; |
| |
| switch (size) |
| { |
| case 0: /* 8-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (8 + (8 * all)); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j, |
| aarch64_get_mem_u8 (cpu, address)); |
| address += 1; |
| } |
| return; |
| |
| case 1: /* 16-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (4 + (4 * all)); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j, |
| aarch64_get_mem_u16 (cpu, address)); |
| address += 2; |
| } |
| return; |
| |
| case 2: /* 32-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (2 + (2 * all)); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j, |
| aarch64_get_mem_u32 (cpu, address)); |
| address += 4; |
| } |
| return; |
| |
| case 3: /* 64-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (1 + all); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j, |
| aarch64_get_mem_u64 (cpu, address)); |
| address += 8; |
| } |
| return; |
| } |
| } |
| |
| /* Load multiple 4-element structures into four consecutive registers. */ |
| static void |
| LD4 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_load (cpu, address, 4, 4); |
| } |
| |
| /* Load multiple 3-element structures into three consecutive registers. */ |
| static void |
| LD3 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_load (cpu, address, 3, 3); |
| } |
| |
| /* Load multiple 2-element structures into two consecutive registers. */ |
| static void |
| LD2 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_load (cpu, address, 2, 2); |
| } |
| |
| /* Load multiple 1-element structures into one register. */ |
| static void |
| LD1_1 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_load (cpu, address, 1, 1); |
| } |
| |
| /* Load multiple 1-element structures into two registers. */ |
| static void |
| LD1_2 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_load (cpu, address, 1, 2); |
| } |
| |
| /* Load multiple 1-element structures into three registers. */ |
| static void |
| LD1_3 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_load (cpu, address, 1, 3); |
| } |
| |
| /* Load multiple 1-element structures into four registers. */ |
| static void |
| LD1_4 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_load (cpu, address, 1, 4); |
| } |
| |
| /* Store multiple N-element structures from M consecutive registers. */ |
| static void |
| vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M) |
| { |
| int all = INSTR (30, 30); |
| unsigned size = INSTR (11, 10); |
| unsigned vd = INSTR (4, 0); |
| unsigned rpt = (N == M) ? 1 : M; |
| unsigned selem = N; |
| unsigned i, j, k; |
| |
| switch (size) |
| { |
| case 0: /* 8-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (8 + (8 * all)); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_mem_u8 |
| (cpu, address, |
| aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j)); |
| address += 1; |
| } |
| return; |
| |
| case 1: /* 16-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (4 + (4 * all)); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_mem_u16 |
| (cpu, address, |
| aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j)); |
| address += 2; |
| } |
| return; |
| |
| case 2: /* 32-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (2 + (2 * all)); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_mem_u32 |
| (cpu, address, |
| aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j)); |
| address += 4; |
| } |
| return; |
| |
| case 3: /* 64-bit operations. */ |
| for (i = 0; i < rpt; i++) |
| for (j = 0; j < (1 + all); j++) |
| for (k = 0; k < selem; k++) |
| { |
| aarch64_set_mem_u64 |
| (cpu, address, |
| aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j)); |
| address += 8; |
| } |
| return; |
| } |
| } |
| |
| /* Store multiple 4-element structure from four consecutive registers. */ |
| static void |
| ST4 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_store (cpu, address, 4, 4); |
| } |
| |
| /* Store multiple 3-element structures from three consecutive registers. */ |
| static void |
| ST3 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_store (cpu, address, 3, 3); |
| } |
| |
| /* Store multiple 2-element structures from two consecutive registers. */ |
| static void |
| ST2 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_store (cpu, address, 2, 2); |
| } |
| |
| /* Store multiple 1-element structures from one register. */ |
| static void |
| ST1_1 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_store (cpu, address, 1, 1); |
| } |
| |
| /* Store multiple 1-element structures from two registers. */ |
| static void |
| ST1_2 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_store (cpu, address, 1, 2); |
| } |
| |
| /* Store multiple 1-element structures from three registers. */ |
| static void |
| ST1_3 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_store (cpu, address, 1, 3); |
| } |
| |
| /* Store multiple 1-element structures from four registers. */ |
| static void |
| ST1_4 (sim_cpu *cpu, uint64_t address) |
| { |
| vec_store (cpu, address, 1, 4); |
| } |
| |
| #define LDn_STn_SINGLE_LANE_AND_SIZE() \ |
| do \ |
| { \ |
| switch (INSTR (15, 14)) \ |
| { \ |
| case 0: \ |
| lane = (full << 3) | (s << 2) | size; \ |
| size = 0; \ |
| break; \ |
| \ |
| case 1: \ |
| if ((size & 1) == 1) \ |
| HALT_UNALLOC; \ |
| lane = (full << 2) | (s << 1) | (size >> 1); \ |
| size = 1; \ |
| break; \ |
| \ |
| case 2: \ |
| if ((size & 2) == 2) \ |
| HALT_UNALLOC; \ |
| \ |
| if ((size & 1) == 0) \ |
| { \ |
| lane = (full << 1) | s; \ |
| size = 2; \ |
| } \ |
| else \ |
| { \ |
| if (s) \ |
| HALT_UNALLOC; \ |
| lane = full; \ |
| size = 3; \ |
| } \ |
| break; \ |
| \ |
| default: \ |
| HALT_UNALLOC; \ |
| } \ |
| } \ |
| while (0) |
| |
| /* Load single structure into one lane of N registers. */ |
| static void |
| do_vec_LDn_single (sim_cpu *cpu, uint64_t address) |
| { |
| /* instr[31] = 0 |
| instr[30] = element selector 0=>half, 1=>all elements |
| instr[29,24] = 00 1101 |
| instr[23] = 0=>simple, 1=>post |
| instr[22] = 1 |
| instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) |
| instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), |
| 11111 (immediate post inc) |
| instr[15,13] = opcode |
| instr[12] = S, used for lane number |
| instr[11,10] = size, also used for lane number |
| instr[9,5] = address |
| instr[4,0] = Vd */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vd = INSTR (4, 0); |
| unsigned size = INSTR (11, 10); |
| unsigned s = INSTR (12, 12); |
| int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; |
| int lane = 0; |
| int i; |
| |
| NYI_assert (29, 24, 0x0D); |
| NYI_assert (22, 22, 1); |
| |
| /* Compute the lane number first (using size), and then compute size. */ |
| LDn_STn_SINGLE_LANE_AND_SIZE (); |
| |
| for (i = 0; i < nregs; i++) |
| switch (size) |
| { |
| case 0: |
| { |
| uint8_t val = aarch64_get_mem_u8 (cpu, address + i); |
| aarch64_set_vec_u8 (cpu, vd + i, lane, val); |
| break; |
| } |
| |
| case 1: |
| { |
| uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2)); |
| aarch64_set_vec_u16 (cpu, vd + i, lane, val); |
| break; |
| } |
| |
| case 2: |
| { |
| uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4)); |
| aarch64_set_vec_u32 (cpu, vd + i, lane, val); |
| break; |
| } |
| |
| case 3: |
| { |
| uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8)); |
| aarch64_set_vec_u64 (cpu, vd + i, lane, val); |
| break; |
| } |
| } |
| } |
| |
| /* Store single structure from one lane from N registers. */ |
| static void |
| do_vec_STn_single (sim_cpu *cpu, uint64_t address) |
| { |
| /* instr[31] = 0 |
| instr[30] = element selector 0=>half, 1=>all elements |
| instr[29,24] = 00 1101 |
| instr[23] = 0=>simple, 1=>post |
| instr[22] = 0 |
| instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1) |
| instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), |
| 11111 (immediate post inc) |
| instr[15,13] = opcode |
| instr[12] = S, used for lane number |
| instr[11,10] = size, also used for lane number |
| instr[9,5] = address |
| instr[4,0] = Vd */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vd = INSTR (4, 0); |
| unsigned size = INSTR (11, 10); |
| unsigned s = INSTR (12, 12); |
| int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; |
| int lane = 0; |
| int i; |
| |
| NYI_assert (29, 24, 0x0D); |
| NYI_assert (22, 22, 0); |
| |
| /* Compute the lane number first (using size), and then compute size. */ |
| LDn_STn_SINGLE_LANE_AND_SIZE (); |
| |
| for (i = 0; i < nregs; i++) |
| switch (size) |
| { |
| case 0: |
| { |
| uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane); |
| aarch64_set_mem_u8 (cpu, address + i, val); |
| break; |
| } |
| |
| case 1: |
| { |
| uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane); |
| aarch64_set_mem_u16 (cpu, address + (i * 2), val); |
| break; |
| } |
| |
| case 2: |
| { |
| uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane); |
| aarch64_set_mem_u32 (cpu, address + (i * 4), val); |
| break; |
| } |
| |
| case 3: |
| { |
| uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane); |
| aarch64_set_mem_u64 (cpu, address + (i * 8), val); |
| break; |
| } |
| } |
| } |
| |
| /* Load single structure into all lanes of N registers. */ |
| static void |
| do_vec_LDnR (sim_cpu *cpu, uint64_t address) |
| { |
| /* instr[31] = 0 |
| instr[30] = element selector 0=>half, 1=>all elements |
| instr[29,24] = 00 1101 |
| instr[23] = 0=>simple, 1=>post |
| instr[22] = 1 |
| instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1) |
| instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP), |
| 11111 (immediate post inc) |
| instr[15,14] = 11 |
| instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1) |
| instr[12] = 0 |
| instr[11,10] = element size 00=> byte(b), 01=> half(h), |
| 10=> word(s), 11=> double(d) |
| instr[9,5] = address |
| instr[4,0] = Vd */ |
| |
| unsigned full = INSTR (30, 30); |
| unsigned vd = INSTR (4, 0); |
| unsigned size = INSTR (11, 10); |
| int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; |
| int i, n; |
| |
| NYI_assert (29, 24, 0x0D); |
| NYI_assert (22, 22, 1); |
| NYI_assert (15, 14, 3); |
| NYI_assert (12, 12, 0); |
| |
| for (n = 0; n < nregs; n++) |
| switch (size) |
| { |
| case 0: |
| { |
| uint8_t val = aarch64_get_mem_u8 (cpu, address + n); |
| for (i = 0; i < (full ? 16 : 8); i++) |
| aarch64_set_vec_u8 (cpu, vd + n, i, val); |
| break; |
| } |
| |
| case 1: |
| { |
| uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2)); |
| for (i = 0; i < (full ? 8 : 4); i++) |
| aarch64_set_vec_u16 (cpu, vd + n, i, val); |
| break; |
| } |
| |
| case 2: |
| { |
| uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4)); |
| for (i = 0; i < (full ? 4 : 2); i++) |
| aarch64_set_vec_u32 (cpu, vd + n, i, val); |
| break; |
| } |
| |
| case 3: |
| { |
| uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8)); |
| for (i = 0; i < (full ? 2 : 1); i++) |
| aarch64_set_vec_u64 (cpu, vd + n, i, val); |
| break; |
| } |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| do_vec_load_store (sim_cpu *cpu) |
| { |
| /* {LD|ST}<N> {Vd..Vd+N}, vaddr |
| |
| instr[31] = 0 |
| instr[30] = element selector 0=>half, 1=>all elements |
| instr[29,25] = 00110 |
| instr[24] = 0=>multiple struct, 1=>single struct |
| instr[23] = 0=>simple, 1=>post |
| instr[22] = 0=>store, 1=>load |
| instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR) |
| instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP), |
| 11111 (immediate post inc) |
| instr[15,12] = elements and destinations. eg for load: |
| 0000=>LD4 => load multiple 4-element to |
| four consecutive registers |
| 0100=>LD3 => load multiple 3-element to |
| three consecutive registers |
| 1000=>LD2 => load multiple 2-element to |
| two consecutive registers |
| 0010=>LD1 => load multiple 1-element to |
| four consecutive registers |
| 0110=>LD1 => load multiple 1-element to |
| three consecutive registers |
| 1010=>LD1 => load multiple 1-element to |
| two consecutive registers |
| 0111=>LD1 => load multiple 1-element to |
| one register |
| 1100=>LDR1,LDR2 |
| 1110=>LDR3,LDR4 |
| instr[11,10] = element size 00=> byte(b), 01=> half(h), |
| 10=> word(s), 11=> double(d) |
| instr[9,5] = Vn, can be SP |
| instr[4,0] = Vd */ |
| |
| int single; |
| int post; |
| int load; |
| unsigned vn; |
| uint64_t address; |
| int type; |
| |
| if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06) |
| HALT_NYI; |
| |
| single = INSTR (24, 24); |
| post = INSTR (23, 23); |
| load = INSTR (22, 22); |
| type = INSTR (15, 12); |
| vn = INSTR (9, 5); |
| address = aarch64_get_reg_u64 (cpu, vn, SP_OK); |
| |
| if (! single && INSTR (21, 21) != 0) |
| HALT_UNALLOC; |
| |
| if (post) |
| { |
| unsigned vm = INSTR (20, 16); |
| |
| if (vm == R31) |
| { |
| unsigned sizeof_operation; |
| |
| if (single) |
| { |
| if ((type >= 0) && (type <= 11)) |
| { |
| int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1; |
| switch (INSTR (15, 14)) |
| { |
| case 0: |
| sizeof_operation = nregs * 1; |
| break; |
| case 1: |
| sizeof_operation = nregs * 2; |
| break; |
| case 2: |
| if (INSTR (10, 10) == 0) |
| sizeof_operation = nregs * 4; |
| else |
| sizeof_operation = nregs * 8; |
| break; |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| else if (type == 0xC) |
| { |
| sizeof_operation = INSTR (21, 21) ? 2 : 1; |
| sizeof_operation <<= INSTR (11, 10); |
| } |
| else if (type == 0xE) |
| { |
| sizeof_operation = INSTR (21, 21) ? 4 : 3; |
| sizeof_operation <<= INSTR (11, 10); |
| } |
| else |
| HALT_UNALLOC; |
| } |
| else |
| { |
| switch (type) |
| { |
| case 0: sizeof_operation = 32; break; |
| case 4: sizeof_operation = 24; break; |
| case 8: sizeof_operation = 16; break; |
| |
| case 7: |
| /* One register, immediate offset variant. */ |
| sizeof_operation = 8; |
| break; |
| |
| case 10: |
| /* Two registers, immediate offset variant. */ |
| sizeof_operation = 16; |
| break; |
| |
| case 6: |
| /* Three registers, immediate offset variant. */ |
| sizeof_operation = 24; |
| break; |
| |
| case 2: |
| /* Four registers, immediate offset variant. */ |
| sizeof_operation = 32; |
| break; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| |
| if (INSTR (30, 30)) |
| sizeof_operation *= 2; |
| } |
| |
| aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation); |
| } |
| else |
| aarch64_set_reg_u64 (cpu, vn, SP_OK, |
| address + aarch64_get_reg_u64 (cpu, vm, NO_SP)); |
| } |
| else |
| { |
| NYI_assert (20, 16, 0); |
| } |
| |
| if (single) |
| { |
| if (load) |
| { |
| if ((type >= 0) && (type <= 11)) |
| do_vec_LDn_single (cpu, address); |
| else if ((type == 0xC) || (type == 0xE)) |
| do_vec_LDnR (cpu, address); |
| else |
| HALT_UNALLOC; |
| return; |
| } |
| |
| /* Stores. */ |
| if ((type >= 0) && (type <= 11)) |
| { |
| do_vec_STn_single (cpu, address); |
| return; |
| } |
| |
| HALT_UNALLOC; |
| } |
| |
| if (load) |
| { |
| switch (type) |
| { |
| case 0: LD4 (cpu, address); return; |
| case 4: LD3 (cpu, address); return; |
| case 8: LD2 (cpu, address); return; |
| case 2: LD1_4 (cpu, address); return; |
| case 6: LD1_3 (cpu, address); return; |
| case 10: LD1_2 (cpu, address); return; |
| case 7: LD1_1 (cpu, address); return; |
| |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* Stores. */ |
| switch (type) |
| { |
| case 0: ST4 (cpu, address); return; |
| case 4: ST3 (cpu, address); return; |
| case 8: ST2 (cpu, address); return; |
| case 2: ST1_4 (cpu, address); return; |
| case 6: ST1_3 (cpu, address); return; |
| case 10: ST1_2 (cpu, address); return; |
| case 7: ST1_1 (cpu, address); return; |
| default: |
| HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| dexLdSt (sim_cpu *cpu) |
| { |
| /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); |
| assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 || |
| group == GROUP_LDST_1100 || group == GROUP_LDST_1110 |
| bits [29,28:26] of a LS are the secondary dispatch vector. */ |
| uint32_t group2 = dispatchLS (aarch64_get_instr (cpu)); |
| |
| switch (group2) |
| { |
| case LS_EXCL_000: |
| dexLoadExclusive (cpu); return; |
| |
| case LS_LIT_010: |
| case LS_LIT_011: |
| dexLoadLiteral (cpu); return; |
| |
| case LS_OTHER_110: |
| case LS_OTHER_111: |
| dexLoadOther (cpu); return; |
| |
| case LS_ADVSIMD_001: |
| do_vec_load_store (cpu); return; |
| |
| case LS_PAIR_100: |
| dex_load_store_pair_gr (cpu); return; |
| |
| case LS_PAIR_101: |
| dex_load_store_pair_fp (cpu); return; |
| |
| default: |
| /* Should never reach here. */ |
| HALT_NYI; |
| } |
| } |
| |
| /* Specific decode and execute for group Data Processing Register. */ |
| |
| static void |
| dexLogicalShiftedRegister (sim_cpu *cpu) |
| { |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30,29] = op |
| instr[28:24] = 01010 |
| instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR |
| instr[21] = N |
| instr[20,16] = Rm |
| instr[15,10] = count : must be 0xxxxx for 32 bit |
| instr[9,5] = Rn |
| instr[4,0] = Rd */ |
| |
| uint32_t size = INSTR (31, 31); |
| Shift shiftType = INSTR (23, 22); |
| uint32_t count = INSTR (15, 10); |
| |
| /* 32 bit operations must have count[5] = 0. |
| or else we have an UNALLOC. */ |
| if (size == 0 && uimm (count, 5, 5)) |
| HALT_UNALLOC; |
| |
| /* Dispatch on size:op:N. */ |
| switch ((INSTR (31, 29) << 1) | INSTR (21, 21)) |
| { |
| case 0: and32_shift (cpu, shiftType, count); return; |
| case 1: bic32_shift (cpu, shiftType, count); return; |
| case 2: orr32_shift (cpu, shiftType, count); return; |
| case 3: orn32_shift (cpu, shiftType, count); return; |
| case 4: eor32_shift (cpu, shiftType, count); return; |
| case 5: eon32_shift (cpu, shiftType, count); return; |
| case 6: ands32_shift (cpu, shiftType, count); return; |
| case 7: bics32_shift (cpu, shiftType, count); return; |
| case 8: and64_shift (cpu, shiftType, count); return; |
| case 9: bic64_shift (cpu, shiftType, count); return; |
| case 10:orr64_shift (cpu, shiftType, count); return; |
| case 11:orn64_shift (cpu, shiftType, count); return; |
| case 12:eor64_shift (cpu, shiftType, count); return; |
| case 13:eon64_shift (cpu, shiftType, count); return; |
| case 14:ands64_shift (cpu, shiftType, count); return; |
| case 15:bics64_shift (cpu, shiftType, count); return; |
| } |
| } |
| |
| /* 32 bit conditional select. */ |
| static void |
| csel32 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| : aarch64_get_reg_u32 (cpu, rm, NO_SP)); |
| } |
| |
| /* 64 bit conditional select. */ |
| static void |
| csel64 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| : aarch64_get_reg_u64 (cpu, rm, NO_SP)); |
| } |
| |
| /* 32 bit conditional increment. */ |
| static void |
| csinc32 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1); |
| } |
| |
| /* 64 bit conditional increment. */ |
| static void |
| csinc64 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1); |
| } |
| |
| /* 32 bit conditional invert. */ |
| static void |
| csinv32 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP)); |
| } |
| |
| /* 64 bit conditional invert. */ |
| static void |
| csinv64 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP)); |
| } |
| |
| /* 32 bit conditional negate. */ |
| static void |
| csneg32 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| : - aarch64_get_reg_u32 (cpu, rm, NO_SP)); |
| } |
| |
| /* 64 bit conditional negate. */ |
| static void |
| csneg64 (sim_cpu *cpu, CondCode cc) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| testConditionCode (cpu, cc) |
| ? aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| : - aarch64_get_reg_u64 (cpu, rm, NO_SP)); |
| } |
| |
| static void |
| dexCondSelect (sim_cpu *cpu) |
| { |
| /* instr[28,21] = 11011011 |
| instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC, |
| 100 ==> CSINV, 101 ==> CSNEG, |
| _1_ ==> UNALLOC |
| instr[29] = S : 0 ==> ok, 1 ==> UNALLOC |
| instr[15,12] = cond |
| instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */ |
| |
| CondCode cc = INSTR (15, 12); |
| uint32_t S = INSTR (29, 29); |
| uint32_t op2 = INSTR (11, 10); |
| |
| if (S == 1) |
| HALT_UNALLOC; |
| |
| if (op2 & 0x2) |
| HALT_UNALLOC; |
| |
| switch ((INSTR (31, 30) << 1) | op2) |
| { |
| case 0: csel32 (cpu, cc); return; |
| case 1: csinc32 (cpu, cc); return; |
| case 2: csinv32 (cpu, cc); return; |
| case 3: csneg32 (cpu, cc); return; |
| case 4: csel64 (cpu, cc); return; |
| case 5: csinc64 (cpu, cc); return; |
| case 6: csinv64 (cpu, cc); return; |
| case 7: csneg64 (cpu, cc); return; |
| } |
| } |
| |
| /* Some helpers for counting leading 1 or 0 bits. */ |
| |
| /* Counts the number of leading bits which are the same |
| in a 32 bit value in the range 1 to 32. */ |
| static uint32_t |
| leading32 (uint32_t value) |
| { |
| int32_t mask= 0xffff0000; |
| uint32_t count= 16; /* Counts number of bits set in mask. */ |
| uint32_t lo = 1; /* Lower bound for number of sign bits. */ |
| uint32_t hi = 32; /* Upper bound for number of sign bits. */ |
| |
| while (lo + 1 < hi) |
| { |
| int32_t test = (value & mask); |
| |
| if (test == 0 || test == mask) |
| { |
| lo = count; |
| count = (lo + hi) / 2; |
| mask >>= (count - lo); |
| } |
| else |
| { |
| hi = count; |
| count = (lo + hi) / 2; |
| mask <<= hi - count; |
| } |
| } |
| |
| if (lo != hi) |
| { |
| int32_t test; |
| |
| mask >>= 1; |
| test = (value & mask); |
| |
| if (test == 0 || test == mask) |
| count = hi; |
| else |
| count = lo; |
| } |
| |
| return count; |
| } |
| |
| /* Counts the number of leading bits which are the same |
| in a 64 bit value in the range 1 to 64. */ |
| static uint64_t |
| leading64 (uint64_t value) |
| { |
| int64_t mask= 0xffffffff00000000LL; |
| uint64_t count = 32; /* Counts number of bits set in mask. */ |
| uint64_t lo = 1; /* Lower bound for number of sign bits. */ |
| uint64_t hi = 64; /* Upper bound for number of sign bits. */ |
| |
| while (lo + 1 < hi) |
| { |
| int64_t test = (value & mask); |
| |
| if (test == 0 || test == mask) |
| { |
| lo = count; |
| count = (lo + hi) / 2; |
| mask >>= (count - lo); |
| } |
| else |
| { |
| hi = count; |
| count = (lo + hi) / 2; |
| mask <<= hi - count; |
| } |
| } |
| |
| if (lo != hi) |
| { |
| int64_t test; |
| |
| mask >>= 1; |
| test = (value & mask); |
| |
| if (test == 0 || test == mask) |
| count = hi; |
| else |
| count = lo; |
| } |
| |
| return count; |
| } |
| |
| /* Bit operations. */ |
| /* N.B register args may not be SP. */ |
| |
| /* 32 bit count leading sign bits. */ |
| static void |
| cls32 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| /* N.B. the result needs to exclude the leading bit. */ |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1); |
| } |
| |
| /* 64 bit count leading sign bits. */ |
| static void |
| cls64 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| /* N.B. the result needs to exclude the leading bit. */ |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1); |
| } |
| |
| /* 32 bit count leading zero bits. */ |
| static void |
| clz32 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| |
| /* if the sign (top) bit is set then the count is 0. */ |
| if (pick32 (value, 31, 31)) |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); |
| else |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value)); |
| } |
| |
| /* 64 bit count leading zero bits. */ |
| static void |
| clz64 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| |
| /* if the sign (top) bit is set then the count is 0. */ |
| if (pick64 (value, 63, 63)) |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L); |
| else |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value)); |
| } |
| |
| /* 32 bit reverse bits. */ |
| static void |
| rbit32 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t result = 0; |
| int i; |
| |
| for (i = 0; i < 32; i++) |
| { |
| result <<= 1; |
| result |= (value & 1); |
| value >>= 1; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| } |
| |
| /* 64 bit reverse bits. */ |
| static void |
| rbit64 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t result = 0; |
| int i; |
| |
| for (i = 0; i < 64; i++) |
| { |
| result <<= 1; |
| result |= (value & 1UL); |
| value >>= 1; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| } |
| |
| /* 32 bit reverse bytes. */ |
| static void |
| rev32 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t result = 0; |
| int i; |
| |
| for (i = 0; i < 4; i++) |
| { |
| result <<= 8; |
| result |= (value & 0xff); |
| value >>= 8; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| } |
| |
| /* 64 bit reverse bytes. */ |
| static void |
| rev64 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t result = 0; |
| int i; |
| |
| for (i = 0; i < 8; i++) |
| { |
| result <<= 8; |
| result |= (value & 0xffULL); |
| value >>= 8; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| } |
| |
| /* 32 bit reverse shorts. */ |
| /* N.B.this reverses the order of the bytes in each half word. */ |
| static void |
| revh32 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint32_t result = 0; |
| int i; |
| |
| for (i = 0; i < 2; i++) |
| { |
| result <<= 8; |
| result |= (value & 0x00ff00ff); |
| value >>= 8; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| } |
| |
| /* 64 bit reverse shorts. */ |
| /* N.B.this reverses the order of the bytes in each half word. */ |
| static void |
| revh64 (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| uint64_t result = 0; |
| int i; |
| |
| for (i = 0; i < 2; i++) |
| { |
| result <<= 8; |
| result |= (value & 0x00ff00ff00ff00ffULL); |
| value >>= 8; |
| } |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, result); |
| } |
| |
| static void |
| dexDataProc1Source (sim_cpu *cpu) |
| { |
| /* instr[30] = 1 |
| instr[28,21] = 111010110 |
| instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[29] = S : 0 ==> ok, 1 ==> UNALLOC |
| instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC |
| instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16, |
| 000010 ==> REV, 000011 ==> UNALLOC |
| 000100 ==> CLZ, 000101 ==> CLS |
| ow ==> UNALLOC |
| instr[9,5] = rn : may not be SP |
| instr[4,0] = rd : may not be SP. */ |
| |
| uint32_t S = INSTR (29, 29); |
| uint32_t opcode2 = INSTR (20, 16); |
| uint32_t opcode = INSTR (15, 10); |
| uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode); |
| |
| if (S == 1) |
| HALT_UNALLOC; |
| |
| if (opcode2 != 0) |
| HALT_UNALLOC; |
| |
| if (opcode & 0x38) |
| HALT_UNALLOC; |
| |
| switch (dispatch) |
| { |
| case 0: rbit32 (cpu); return; |
| case 1: revh32 (cpu); return; |
| case 2: rev32 (cpu); return; |
| case 4: clz32 (cpu); return; |
| case 5: cls32 (cpu); return; |
| case 8: rbit64 (cpu); return; |
| case 9: revh64 (cpu); return; |
| case 10:rev32 (cpu); return; |
| case 11:rev64 (cpu); return; |
| case 12:clz64 (cpu); return; |
| case 13:cls64 (cpu); return; |
| default: HALT_UNALLOC; |
| } |
| } |
| |
| /* Variable shift. |
| Shifts by count supplied in register. |
| N.B register args may not be SP. |
| These all use the shifted auxiliary function for |
| simplicity and clarity. Writing the actual shift |
| inline would avoid a branch and so be faster but |
| would also necessitate getting signs right. */ |
| |
| /* 32 bit arithmetic shift right. */ |
| static void |
| asrv32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR, |
| (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); |
| } |
| |
| /* 64 bit arithmetic shift right. */ |
| static void |
| asrv64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR, |
| (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); |
| } |
| |
| /* 32 bit logical shift left. */ |
| static void |
| lslv32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL, |
| (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); |
| } |
| |
| /* 64 bit arithmetic shift left. */ |
| static void |
| lslv64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL, |
| (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); |
| } |
| |
| /* 32 bit logical shift right. */ |
| static void |
| lsrv32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR, |
| (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); |
| } |
| |
| /* 64 bit logical shift right. */ |
| static void |
| lsrv64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR, |
| (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); |
| } |
| |
| /* 32 bit rotate right. */ |
| static void |
| rorv32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR, |
| (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f))); |
| } |
| |
| /* 64 bit rotate right. */ |
| static void |
| rorv64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR, |
| (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f))); |
| } |
| |
| |
| /* divide. */ |
| |
| /* 32 bit signed divide. */ |
| static void |
| cpuiv32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| /* N.B. the pseudo-code does the divide using 64 bit data. */ |
| /* TODO : check that this rounds towards zero as required. */ |
| int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP); |
| int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP); |
| |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, |
| divisor ? ((int32_t) (dividend / divisor)) : 0); |
| } |
| |
| /* 64 bit signed divide. */ |
| static void |
| cpuiv64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| /* TODO : check that this rounds towards zero as required. */ |
| int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP); |
| |
| aarch64_set_reg_s64 |
| (cpu, rd, NO_SP, |
| divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0); |
| } |
| |
| /* 32 bit unsigned divide. */ |
| static void |
| udiv32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| /* N.B. the pseudo-code does the divide using 64 bit data. */ |
| uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP); |
| uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP); |
| |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| divisor ? (uint32_t) (dividend / divisor) : 0); |
| } |
| |
| /* 64 bit unsigned divide. */ |
| static void |
| udiv64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| /* TODO : check that this rounds towards zero as required. */ |
| uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP); |
| |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0); |
| } |
| |
| static void |
| dexDataProc2Source (sim_cpu *cpu) |
| { |
| /* assert instr[30] == 0 |
| instr[28,21] == 11010110 |
| instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit |
| instr[29] = S : 0 ==> ok, 1 ==> UNALLOC |
| instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV, |
| 001000 ==> LSLV, 001001 ==> LSRV |
| 001010 ==> ASRV, 001011 ==> RORV |
| ow ==> UNALLOC. */ |
| |
| uint32_t dispatch; |
| uint32_t S = INSTR (29, 29); |
| uint32_t opcode = INSTR (15, 10); |
| |
| if (S == 1) |
| HALT_UNALLOC; |
| |
| if (opcode & 0x34) |
| HALT_UNALLOC; |
| |
| dispatch = ( (INSTR (31, 31) << 3) |
| | (uimm (opcode, 3, 3) << 2) |
| | uimm (opcode, 1, 0)); |
| switch (dispatch) |
| { |
| case 2: udiv32 (cpu); return; |
| case 3: cpuiv32 (cpu); return; |
| case 4: lslv32 (cpu); return; |
| case 5: lsrv32 (cpu); return; |
| case 6: asrv32 (cpu); return; |
| case 7: rorv32 (cpu); return; |
| case 10: udiv64 (cpu); return; |
| case 11: cpuiv64 (cpu); return; |
| case 12: lslv64 (cpu); return; |
| case 13: lsrv64 (cpu); return; |
| case 14: asrv64 (cpu); return; |
| case 15: rorv64 (cpu); return; |
| default: HALT_UNALLOC; |
| } |
| } |
| |
| |
| /* Multiply. */ |
| |
| /* 32 bit multiply and add. */ |
| static void |
| madd32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u32 (cpu, ra, NO_SP) |
| + aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| * aarch64_get_reg_u32 (cpu, rm, NO_SP)); |
| } |
| |
| /* 64 bit multiply and add. */ |
| static void |
| madd64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, ra, NO_SP) |
| + (aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| * aarch64_get_reg_u64 (cpu, rm, NO_SP))); |
| } |
| |
| /* 32 bit multiply and sub. */ |
| static void |
| msub32 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u32 (cpu, ra, NO_SP) |
| - aarch64_get_reg_u32 (cpu, rn, NO_SP) |
| * aarch64_get_reg_u32 (cpu, rm, NO_SP)); |
| } |
| |
| /* 64 bit multiply and sub. */ |
| static void |
| msub64 (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, ra, NO_SP) |
| - aarch64_get_reg_u64 (cpu, rn, NO_SP) |
| * aarch64_get_reg_u64 (cpu, rm, NO_SP)); |
| } |
| |
| /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */ |
| static void |
| smaddl (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| /* N.B. we need to multiply the signed 32 bit values in rn, rm to |
| obtain a 64 bit product. */ |
| aarch64_set_reg_s64 |
| (cpu, rd, NO_SP, |
| aarch64_get_reg_s64 (cpu, ra, NO_SP) |
| + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) |
| * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); |
| } |
| |
| /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ |
| static void |
| smsubl (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| /* N.B. we need to multiply the signed 32 bit values in rn, rm to |
| obtain a 64 bit product. */ |
| aarch64_set_reg_s64 |
| (cpu, rd, NO_SP, |
| aarch64_get_reg_s64 (cpu, ra, NO_SP) |
| - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP)) |
| * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP))); |
| } |
| |
| /* Integer Multiply/Divide. */ |
| |
| /* First some macros and a helper function. */ |
| /* Macros to test or access elements of 64 bit words. */ |
| |
| /* Mask used to access lo 32 bits of 64 bit unsigned int. */ |
| #define LOW_WORD_MASK ((1ULL << 32) - 1) |
| /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ |
| #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK) |
| /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */ |
| #define highWordToU64(_value_u64) ((_value_u64) >> 32) |
| |
| /* Offset of sign bit in 64 bit signed integger. */ |
| #define SIGN_SHIFT_U64 63 |
| /* The sign bit itself -- also identifies the minimum negative int value. */ |
| #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64) |
| /* Return true if a 64 bit signed int presented as an unsigned int is the |
| most negative value. */ |
| #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64) |
| /* Return true (non-zero) if a 64 bit signed int presented as an unsigned |
| int has its sign bit set to false. */ |
| #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64) |
| /* Return 1L or -1L according to whether a 64 bit signed int presented as |
| an unsigned int has its sign bit set or not. */ |
| #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L) |
| /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */ |
| #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64) |
| |
| /* Multiply two 64 bit ints and return. |
| the hi 64 bits of the 128 bit product. */ |
| |
| static uint64_t |
| mul64hi (uint64_t value1, uint64_t value2) |
| { |
| uint64_t resultmid1; |
| uint64_t result; |
| uint64_t value1_lo = lowWordToU64 (value1); |
| uint64_t value1_hi = highWordToU64 (value1) ; |
| uint64_t value2_lo = lowWordToU64 (value2); |
| uint64_t value2_hi = highWordToU64 (value2); |
| |
| /* Cross-multiply and collect results. */ |
| uint64_t xproductlo = value1_lo * value2_lo; |
| uint64_t xproductmid1 = value1_lo * value2_hi; |
| uint64_t xproductmid2 = value1_hi * value2_lo; |
| uint64_t xproducthi = value1_hi * value2_hi; |
| uint64_t carry = 0; |
| /* Start accumulating 64 bit results. */ |
| /* Drop bottom half of lowest cross-product. */ |
| uint64_t resultmid = xproductlo >> 32; |
| /* Add in middle products. */ |
| resultmid = resultmid + xproductmid1; |
| |
| /* Check for overflow. */ |
| if (resultmid < xproductmid1) |
| /* Carry over 1 into top cross-product. */ |
| carry++; |
| |
| resultmid1 = resultmid + xproductmid2; |
| |
| /* Check for overflow. */ |
| if (resultmid1 < xproductmid2) |
| /* Carry over 1 into top cross-product. */ |
| carry++; |
| |
| /* Drop lowest 32 bits of middle cross-product. */ |
| result = resultmid1 >> 32; |
| /* Move carry bit to just above middle cross-product highest bit. */ |
| carry = carry << 32; |
| |
| /* Add top cross-product plus and any carry. */ |
| result += xproducthi + carry; |
| |
| return result; |
| } |
| |
| /* Signed multiply high, source, source2 : |
| 64 bit, dest <-- high 64-bit of result. */ |
| static void |
| smulh (sim_cpu *cpu) |
| { |
| uint64_t uresult; |
| int64_t result; |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| GReg ra = INSTR (14, 10); |
| int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP); |
| int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP); |
| uint64_t uvalue1; |
| uint64_t uvalue2; |
| int negate = 0; |
| |
| if (ra != R31) |
| HALT_UNALLOC; |
| |
| /* Convert to unsigned and use the unsigned mul64hi routine |
| the fix the sign up afterwards. */ |
| if (value1 < 0) |
| { |
| negate = !negate; |
| uvalue1 = -value1; |
| } |
| else |
| { |
| uvalue1 = value1; |
| } |
| |
| if (value2 < 0) |
| { |
| negate = !negate; |
| uvalue2 = -value2; |
| } |
| else |
| { |
| uvalue2 = value2; |
| } |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| |
| uresult = mul64hi (uvalue1, uvalue2); |
| result = uresult; |
| |
| if (negate) |
| { |
| /* Multiply 128-bit result by -1, which means highpart gets inverted, |
| and has carry in added only if low part is 0. */ |
| result = ~result; |
| if ((uvalue1 * uvalue2) == 0) |
| result += 1; |
| } |
| |
| aarch64_set_reg_s64 (cpu, rd, NO_SP, result); |
| } |
| |
| /* Unsigned multiply add long -- source, source2 : |
| 32 bit, source3 : 64 bit. */ |
| static void |
| umaddl (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* N.B. we need to multiply the signed 32 bit values in rn, rm to |
| obtain a 64 bit product. */ |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, ra, NO_SP) |
| + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) |
| * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); |
| } |
| |
| /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */ |
| static void |
| umsubl (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned ra = INSTR (14, 10); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* N.B. we need to multiply the signed 32 bit values in rn, rm to |
| obtain a 64 bit product. */ |
| aarch64_set_reg_u64 |
| (cpu, rd, NO_SP, |
| aarch64_get_reg_u64 (cpu, ra, NO_SP) |
| - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP)) |
| * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP))); |
| } |
| |
| /* Unsigned multiply high, source, source2 : |
| 64 bit, dest <-- high 64-bit of result. */ |
| static void |
| umulh (sim_cpu *cpu) |
| { |
| unsigned rm = INSTR (20, 16); |
| unsigned rn = INSTR (9, 5); |
| unsigned rd = INSTR (4, 0); |
| GReg ra = INSTR (14, 10); |
| |
| if (ra != R31) |
| HALT_UNALLOC; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rd, NO_SP, |
| mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP), |
| aarch64_get_reg_u64 (cpu, rm, NO_SP))); |
| } |
| |
| static void |
| dexDataProc3Source (sim_cpu *cpu) |
| { |
| /* assert instr[28,24] == 11011. */ |
| /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least) |
| instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC |
| instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok |
| instr[15] = o0 : 0/1 ==> ok |
| instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit) |
| 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only) |
| 0100 ==> SMULH, (64 bit only) |
| 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only) |
| 1100 ==> UMULH (64 bit only) |
| ow ==> UNALLOC. */ |
| |
| uint32_t dispatch; |
| uint32_t size = INSTR (31, 31); |
| uint32_t op54 = INSTR (30, 29); |
| uint32_t op31 = INSTR (23, 21); |
| uint32_t o0 = INSTR (15, 15); |
| |
| if (op54 != 0) |
| HALT_UNALLOC; |
| |
| if (size == 0) |
| { |
| if (op31 != 0) |
| HALT_UNALLOC; |
| |
| if (o0 == 0) |
| madd32 (cpu); |
| else |
| msub32 (cpu); |
| return; |
| } |
| |
| dispatch = (op31 << 1) | o0; |
| |
| switch (dispatch) |
| { |
| case 0: madd64 (cpu); return; |
| case 1: msub64 (cpu); return; |
| case 2: smaddl (cpu); return; |
| case 3: smsubl (cpu); return; |
| case 4: smulh (cpu); return; |
| case 10: umaddl (cpu); return; |
| case 11: umsubl (cpu); return; |
| case 12: umulh (cpu); return; |
| default: HALT_UNALLOC; |
| } |
| } |
| |
| static void |
| dexDPReg (sim_cpu *cpu) |
| { |
| /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); |
| assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101 |
| bits [28:24:21] of a DPReg are the secondary dispatch vector. */ |
| uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu)); |
| |
| switch (group2) |
| { |
| case DPREG_LOG_000: |
| case DPREG_LOG_001: |
| dexLogicalShiftedRegister (cpu); return; |
| |
| case DPREG_ADDSHF_010: |
| dexAddSubtractShiftedRegister (cpu); return; |
| |
| case DPREG_ADDEXT_011: |
| dexAddSubtractExtendedRegister (cpu); return; |
| |
| case DPREG_ADDCOND_100: |
| { |
| /* This set bundles a variety of different operations. */ |
| /* Check for. */ |
| /* 1) add/sub w carry. */ |
| uint32_t mask1 = 0x1FE00000U; |
| uint32_t val1 = 0x1A000000U; |
| /* 2) cond compare register/immediate. */ |
| uint32_t mask2 = 0x1FE00000U; |
| uint32_t val2 = 0x1A400000U; |
| /* 3) cond select. */ |
| uint32_t mask3 = 0x1FE00000U; |
| uint32_t val3 = 0x1A800000U; |
| /* 4) data proc 1/2 source. */ |
| uint32_t mask4 = 0x1FE00000U; |
| uint32_t val4 = 0x1AC00000U; |
| |
| if ((aarch64_get_instr (cpu) & mask1) == val1) |
| dexAddSubtractWithCarry (cpu); |
| |
| else if ((aarch64_get_instr (cpu) & mask2) == val2) |
| CondCompare (cpu); |
| |
| else if ((aarch64_get_instr (cpu) & mask3) == val3) |
| dexCondSelect (cpu); |
| |
| else if ((aarch64_get_instr (cpu) & mask4) == val4) |
| { |
| /* Bit 30 is clear for data proc 2 source |
| and set for data proc 1 source. */ |
| if (aarch64_get_instr (cpu) & (1U << 30)) |
| dexDataProc1Source (cpu); |
| else |
| dexDataProc2Source (cpu); |
| } |
| |
| else |
| /* Should not reach here. */ |
| HALT_NYI; |
| |
| return; |
| } |
| |
| case DPREG_3SRC_110: |
| dexDataProc3Source (cpu); return; |
| |
| case DPREG_UNALLOC_101: |
| HALT_UNALLOC; |
| |
| case DPREG_3SRC_111: |
| dexDataProc3Source (cpu); return; |
| |
| default: |
| /* Should never reach here. */ |
| HALT_NYI; |
| } |
| } |
| |
| /* Unconditional Branch immediate. |
| Offset is a PC-relative byte offset in the range +/- 128MiB. |
| The offset is assumed to be raw from the decode i.e. the |
| simulator is expected to scale them from word offsets to byte. */ |
| |
| /* Unconditional branch. */ |
| static void |
| buc (sim_cpu *cpu, int32_t offset) |
| { |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| static unsigned stack_depth = 0; |
| |
| /* Unconditional branch and link -- writes return PC to LR. */ |
| static void |
| bl (sim_cpu *cpu, int32_t offset) |
| { |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_save_LR (cpu); |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| |
| if (TRACE_BRANCH_P (cpu)) |
| { |
| ++ stack_depth; |
| TRACE_BRANCH (cpu, |
| " %*scall %" PRIx64 " [%s]" |
| " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", |
| stack_depth, " ", aarch64_get_next_PC (cpu), |
| aarch64_get_func (CPU_STATE (cpu), |
| aarch64_get_next_PC (cpu)), |
| aarch64_get_reg_u64 (cpu, 0, NO_SP), |
| aarch64_get_reg_u64 (cpu, 1, NO_SP), |
| aarch64_get_reg_u64 (cpu, 2, NO_SP) |
| ); |
| } |
| } |
| |
| /* Unconditional Branch register. |
| Branch/return address is in source register. */ |
| |
| /* Unconditional branch. */ |
| static void |
| br (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); |
| } |
| |
| /* Unconditional branch and link -- writes return PC to LR. */ |
| static void |
| blr (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| /* The pseudo code in the spec says we update LR before fetching. |
| the value from the rn. */ |
| aarch64_save_LR (cpu); |
| aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); |
| |
| if (TRACE_BRANCH_P (cpu)) |
| { |
| ++ stack_depth; |
| TRACE_BRANCH (cpu, |
| " %*scall %" PRIx64 " [%s]" |
| " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]", |
| stack_depth, " ", aarch64_get_next_PC (cpu), |
| aarch64_get_func (CPU_STATE (cpu), |
| aarch64_get_next_PC (cpu)), |
| aarch64_get_reg_u64 (cpu, 0, NO_SP), |
| aarch64_get_reg_u64 (cpu, 1, NO_SP), |
| aarch64_get_reg_u64 (cpu, 2, NO_SP) |
| ); |
| } |
| } |
| |
| /* Return -- assembler will default source to LR this is functionally |
| equivalent to br but, presumably, unlike br it side effects the |
| branch predictor. */ |
| static void |
| ret (sim_cpu *cpu) |
| { |
| unsigned rn = INSTR (9, 5); |
| aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP)); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (TRACE_BRANCH_P (cpu)) |
| { |
| TRACE_BRANCH (cpu, |
| " %*sreturn [result: %" PRIx64 "]", |
| stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP)); |
| -- stack_depth; |
| } |
| } |
| |
| /* NOP -- we implement this and call it from the decode in case we |
| want to intercept it later. */ |
| |
| static void |
| nop (sim_cpu *cpu) |
| { |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| } |
| |
| /* Data synchronization barrier. */ |
| |
| static void |
| dsb (sim_cpu *cpu) |
| { |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| } |
| |
| /* Data memory barrier. */ |
| |
| static void |
| dmb (sim_cpu *cpu) |
| { |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| } |
| |
| /* Instruction synchronization barrier. */ |
| |
| static void |
| isb (sim_cpu *cpu) |
| { |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| } |
| |
| static void |
| dexBranchImmediate (sim_cpu *cpu) |
| { |
| /* assert instr[30,26] == 00101 |
| instr[31] ==> 0 == B, 1 == BL |
| instr[25,0] == imm26 branch offset counted in words. */ |
| |
| uint32_t top = INSTR (31, 31); |
| /* We have a 26 byte signed word offset which we need to pass to the |
| execute routine as a signed byte offset. */ |
| int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2; |
| |
| if (top) |
| bl (cpu, offset); |
| else |
| buc (cpu, offset); |
| } |
| |
| /* Control Flow. */ |
| |
| /* Conditional branch |
| |
| Offset is a PC-relative byte offset in the range +/- 1MiB pos is |
| a bit position in the range 0 .. 63 |
| |
| cc is a CondCode enum value as pulled out of the decode |
| |
| N.B. any offset register (source) can only be Xn or Wn. */ |
| |
| static void |
| bcc (sim_cpu *cpu, int32_t offset, CondCode cc) |
| { |
| /* The test returns TRUE if CC is met. */ |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (testConditionCode (cpu, cc)) |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| /* 32 bit branch on register non-zero. */ |
| static void |
| cbnz32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0) |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| /* 64 bit branch on register zero. */ |
| static void |
| cbnz (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0) |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| /* 32 bit branch on register non-zero. */ |
| static void |
| cbz32 (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0) |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| /* 64 bit branch on register zero. */ |
| static void |
| cbz (sim_cpu *cpu, int32_t offset) |
| { |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0) |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| /* Branch on register bit test non-zero -- one size fits all. */ |
| static void |
| tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset) |
| { |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)) |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| /* Branch on register bit test zero -- one size fits all. */ |
| static void |
| tbz (sim_cpu *cpu, uint32_t pos, int32_t offset) |
| { |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))) |
| aarch64_set_next_PC_by_offset (cpu, offset); |
| } |
| |
| static void |
| dexCompareBranchImmediate (sim_cpu *cpu) |
| { |
| /* instr[30,25] = 01 1010 |
| instr[31] = size : 0 ==> 32, 1 ==> 64 |
| instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ |
| instr[23,5] = simm19 branch offset counted in words |
| instr[4,0] = rt */ |
| |
| uint32_t size = INSTR (31, 31); |
| uint32_t op = INSTR (24, 24); |
| int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; |
| |
| if (size == 0) |
| { |
| if (op == 0) |
| cbz32 (cpu, offset); |
| else |
| cbnz32 (cpu, offset); |
| } |
| else |
| { |
| if (op == 0) |
| cbz (cpu, offset); |
| else |
| cbnz (cpu, offset); |
| } |
| } |
| |
| static void |
| dexTestBranchImmediate (sim_cpu *cpu) |
| { |
| /* instr[31] = b5 : bit 5 of test bit idx |
| instr[30,25] = 01 1011 |
| instr[24] = op : 0 ==> TBZ, 1 == TBNZ |
| instr[23,19] = b40 : bits 4 to 0 of test bit idx |
| instr[18,5] = simm14 : signed offset counted in words |
| instr[4,0] = uimm5 */ |
| |
| uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19)); |
| int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2; |
| |
| NYI_assert (30, 25, 0x1b); |
| |
| if (INSTR (24, 24) == 0) |
| tbz (cpu, pos, offset); |
| else |
| tbnz (cpu, pos, offset); |
| } |
| |
| static void |
| dexCondBranchImmediate (sim_cpu *cpu) |
| { |
| /* instr[31,25] = 010 1010 |
| instr[24] = op1; op => 00 ==> B.cond |
| instr[23,5] = simm19 : signed offset counted in words |
| instr[4] = op0 |
| instr[3,0] = cond */ |
| |
| int32_t offset; |
| uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4)); |
| |
| NYI_assert (31, 25, 0x2a); |
| |
| if (op != 0) |
| HALT_UNALLOC; |
| |
| offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2; |
| |
| bcc (cpu, offset, INSTR (3, 0)); |
| } |
| |
| static void |
| dexBranchRegister (sim_cpu *cpu) |
| { |
| /* instr[31,25] = 110 1011 |
| instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS |
| instr[20,16] = op2 : must be 11111 |
| instr[15,10] = op3 : must be 000000 |
| instr[4,0] = op2 : must be 11111. */ |
| |
| uint32_t op = INSTR (24, 21); |
| uint32_t op2 = INSTR (20, 16); |
| uint32_t op3 = INSTR (15, 10); |
| uint32_t op4 = INSTR (4, 0); |
| |
| NYI_assert (31, 25, 0x6b); |
| |
| if (op2 != 0x1F || op3 != 0 || op4 != 0) |
| HALT_UNALLOC; |
| |
| if (op == 0) |
| br (cpu); |
| |
| else if (op == 1) |
| blr (cpu); |
| |
| else if (op == 2) |
| ret (cpu); |
| |
| else |
| { |
| /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */ |
| /* anything else is unallocated. */ |
| uint32_t rn = INSTR (4, 0); |
| |
| if (rn != 0x1f) |
| HALT_UNALLOC; |
| |
| if (op == 4 || op == 5) |
| HALT_NYI; |
| |
| HALT_UNALLOC; |
| } |
| } |
| |
| /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h |
| but this may not be available. So instead we define the values we need |
| here. */ |
| #define AngelSVC_Reason_Open 0x01 |
| #define AngelSVC_Reason_Close 0x02 |
| #define AngelSVC_Reason_Write 0x05 |
| #define AngelSVC_Reason_Read 0x06 |
| #define AngelSVC_Reason_IsTTY 0x09 |
| #define AngelSVC_Reason_Seek 0x0A |
| #define AngelSVC_Reason_FLen 0x0C |
| #define AngelSVC_Reason_Remove 0x0E |
| #define AngelSVC_Reason_Rename 0x0F |
| #define AngelSVC_Reason_Clock 0x10 |
| #define AngelSVC_Reason_Time 0x11 |
| #define AngelSVC_Reason_System 0x12 |
| #define AngelSVC_Reason_Errno 0x13 |
| #define AngelSVC_Reason_GetCmdLine 0x15 |
| #define AngelSVC_Reason_HeapInfo 0x16 |
| #define AngelSVC_Reason_ReportException 0x18 |
| #define AngelSVC_Reason_Elapsed 0x30 |
| |
| |
| static void |
| handle_halt (sim_cpu *cpu, uint32_t val) |
| { |
| uint64_t result = 0; |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| if (val != 0xf000) |
| { |
| TRACE_SYSCALL (cpu, " HLT [0x%x]", val); |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_stopped, SIM_SIGTRAP); |
| } |
| |
| /* We have encountered an Angel SVC call. See if we can process it. */ |
| switch (aarch64_get_reg_u32 (cpu, 0, NO_SP)) |
| { |
| case AngelSVC_Reason_HeapInfo: |
| { |
| /* Get the values. */ |
| uint64_t stack_top = aarch64_get_stack_start (cpu); |
| uint64_t heap_base = aarch64_get_heap_start (cpu); |
| |
| /* Get the pointer */ |
| uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); |
| ptr = aarch64_get_mem_u64 (cpu, ptr); |
| |
| /* Fill in the memory block. */ |
| /* Start addr of heap. */ |
| aarch64_set_mem_u64 (cpu, ptr + 0, heap_base); |
| /* End addr of heap. */ |
| aarch64_set_mem_u64 (cpu, ptr + 8, stack_top); |
| /* Lowest stack addr. */ |
| aarch64_set_mem_u64 (cpu, ptr + 16, heap_base); |
| /* Initial stack addr. */ |
| aarch64_set_mem_u64 (cpu, ptr + 24, stack_top); |
| |
| TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info"); |
| } |
| break; |
| |
| case AngelSVC_Reason_Open: |
| { |
| /* Get the pointer */ |
| /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */ |
| /* FIXME: For now we just assume that we will only be asked |
| to open the standard file descriptors. */ |
| static int fd = 0; |
| result = fd ++; |
| |
| TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1); |
| } |
| break; |
| |
| case AngelSVC_Reason_Close: |
| { |
| uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK); |
| TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh); |
| result = 0; |
| } |
| break; |
| |
| case AngelSVC_Reason_Errno: |
| result = 0; |
| TRACE_SYSCALL (cpu, " AngelSVC: Get Errno"); |
| break; |
| |
| case AngelSVC_Reason_Clock: |
| result = |
| #ifdef CLOCKS_PER_SEC |
| (CLOCKS_PER_SEC >= 100) |
| ? (clock () / (CLOCKS_PER_SEC / 100)) |
| : ((clock () * 100) / CLOCKS_PER_SEC) |
| #else |
| /* Presume unix... clock() returns microseconds. */ |
| (clock () / 10000) |
| #endif |
| ; |
| TRACE_SYSCALL (cpu, " AngelSVC: Get Clock"); |
| break; |
| |
| case AngelSVC_Reason_GetCmdLine: |
| { |
| /* Get the pointer */ |
| uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); |
| ptr = aarch64_get_mem_u64 (cpu, ptr); |
| |
| /* FIXME: No command line for now. */ |
| aarch64_set_mem_u64 (cpu, ptr, 0); |
| TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line"); |
| } |
| break; |
| |
| case AngelSVC_Reason_IsTTY: |
| result = 1; |
| TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?"); |
| break; |
| |
| case AngelSVC_Reason_Write: |
| { |
| /* Get the pointer */ |
| uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); |
| /* Get the write control block. */ |
| uint64_t fd = aarch64_get_mem_u64 (cpu, ptr); |
| uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8); |
| uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16); |
| |
| TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %" |
| PRIx64 " on descriptor %" PRIx64, |
| len, buf, fd); |
| |
| if (len > 1280) |
| { |
| TRACE_SYSCALL (cpu, |
| " AngelSVC: Write: Suspiciously long write: %ld", |
| (long) len); |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_stopped, SIM_SIGBUS); |
| } |
| else if (fd == 1) |
| { |
| printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf)); |
| } |
| else if (fd == 2) |
| { |
| TRACE (cpu, 0, "\n"); |
| sim_io_eprintf (CPU_STATE (cpu), "%.*s", |
| (int) len, aarch64_get_mem_ptr (cpu, buf)); |
| TRACE (cpu, 0, "\n"); |
| } |
| else |
| { |
| TRACE_SYSCALL (cpu, |
| " AngelSVC: Write: Unexpected file handle: %d", |
| (int) fd); |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_stopped, SIM_SIGABRT); |
| } |
| } |
| break; |
| |
| case AngelSVC_Reason_ReportException: |
| { |
| /* Get the pointer */ |
| uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK); |
| /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */ |
| uint64_t type = aarch64_get_mem_u64 (cpu, ptr); |
| uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8); |
| |
| TRACE_SYSCALL (cpu, |
| "Angel Exception: type 0x%" PRIx64 " state %" PRIx64, |
| type, state); |
| |
| if (type == 0x20026) |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_exited, state); |
| else |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_stopped, SIM_SIGINT); |
| } |
| break; |
| |
| case AngelSVC_Reason_Read: |
| case AngelSVC_Reason_FLen: |
| case AngelSVC_Reason_Seek: |
| case AngelSVC_Reason_Remove: |
| case AngelSVC_Reason_Time: |
| case AngelSVC_Reason_System: |
| case AngelSVC_Reason_Rename: |
| case AngelSVC_Reason_Elapsed: |
| default: |
| TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]", |
| aarch64_get_reg_u32 (cpu, 0, NO_SP)); |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_stopped, SIM_SIGTRAP); |
| } |
| |
| aarch64_set_reg_u64 (cpu, 0, NO_SP, result); |
| } |
| |
| static void |
| dexExcpnGen (sim_cpu *cpu) |
| { |
| /* instr[31:24] = 11010100 |
| instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK |
| 010 ==> HLT, 101 ==> DBG GEN EXCPN |
| instr[20,5] = imm16 |
| instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC |
| instr[1,0] = LL : discriminates opc */ |
| |
| uint32_t opc = INSTR (23, 21); |
| uint32_t imm16 = INSTR (20, 5); |
| uint32_t opc2 = INSTR (4, 2); |
| uint32_t LL; |
| |
| NYI_assert (31, 24, 0xd4); |
| |
| if (opc2 != 0) |
| HALT_UNALLOC; |
| |
| LL = INSTR (1, 0); |
| |
| /* We only implement HLT and BRK for now. */ |
| if (opc == 1 && LL == 0) |
| { |
| TRACE_EVENTS (cpu, " BRK [0x%x]", imm16); |
| sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu), |
| sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK)); |
| } |
| |
| if (opc == 2 && LL == 0) |
| handle_halt (cpu, imm16); |
| |
| else if (opc == 0 || opc == 5) |
| HALT_NYI; |
| |
| else |
| HALT_UNALLOC; |
| } |
| |
| /* Stub for accessing system registers. */ |
| |
| static uint64_t |
| system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, |
| unsigned crm, unsigned op2) |
| { |
| if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7) |
| /* DCZID_EL0 - the Data Cache Zero ID register. |
| We do not support DC ZVA at the moment, so |
| we return a value with the disable bit set. |
| We implement support for the DCZID register since |
| it is used by the C library's memset function. */ |
| return ((uint64_t) 1) << 4; |
| |
| if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1) |
| /* Cache Type Register. */ |
| return 0x80008000UL; |
| |
| if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2) |
| /* TPIDR_EL0 - thread pointer id. */ |
| return aarch64_get_thread_id (cpu); |
| |
| if (op1 == 3 && crm == 4 && op2 == 0) |
| return aarch64_get_FPCR (cpu); |
| |
| if (op1 == 3 && crm == 4 && op2 == 1) |
| return aarch64_get_FPSR (cpu); |
| |
| else if (op1 == 3 && crm == 2 && op2 == 0) |
| return aarch64_get_CPSR (cpu); |
| |
| HALT_NYI; |
| } |
| |
| static void |
| system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn, |
| unsigned crm, unsigned op2, uint64_t val) |
| { |
| if (op1 == 3 && crm == 4 && op2 == 0) |
| aarch64_set_FPCR (cpu, val); |
| |
| else if (op1 == 3 && crm == 4 && op2 == 1) |
| aarch64_set_FPSR (cpu, val); |
| |
| else if (op1 == 3 && crm == 2 && op2 == 0) |
| aarch64_set_CPSR (cpu, val); |
| |
| else |
| HALT_NYI; |
| } |
| |
| static void |
| do_mrs (sim_cpu *cpu) |
| { |
| /* instr[31:20] = 1101 0101 0001 1 |
| instr[19] = op0 |
| instr[18,16] = op1 |
| instr[15,12] = CRn |
| instr[11,8] = CRm |
| instr[7,5] = op2 |
| instr[4,0] = Rt */ |
| unsigned sys_op0 = INSTR (19, 19) + 2; |
| unsigned sys_op1 = INSTR (18, 16); |
| unsigned sys_crn = INSTR (15, 12); |
| unsigned sys_crm = INSTR (11, 8); |
| unsigned sys_op2 = INSTR (7, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| aarch64_set_reg_u64 (cpu, rt, NO_SP, |
| system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2)); |
| } |
| |
| static void |
| do_MSR_immediate (sim_cpu *cpu) |
| { |
| /* instr[31:19] = 1101 0101 0000 0 |
| instr[18,16] = op1 |
| instr[15,12] = 0100 |
| instr[11,8] = CRm |
| instr[7,5] = op2 |
| instr[4,0] = 1 1111 */ |
| |
| unsigned op1 = INSTR (18, 16); |
| /*unsigned crm = INSTR (11, 8);*/ |
| unsigned op2 = INSTR (7, 5); |
| |
| NYI_assert (31, 19, 0x1AA0); |
| NYI_assert (15, 12, 0x4); |
| NYI_assert (4, 0, 0x1F); |
| |
| if (op1 == 0) |
| { |
| if (op2 == 5) |
| HALT_NYI; /* set SPSel. */ |
| else |
| HALT_UNALLOC; |
| } |
| else if (op1 == 3) |
| { |
| if (op2 == 6) |
| HALT_NYI; /* set DAIFset. */ |
| else if (op2 == 7) |
| HALT_NYI; /* set DAIFclr. */ |
| else |
| HALT_UNALLOC; |
| } |
| else |
| HALT_UNALLOC; |
| } |
| |
| static void |
| do_MSR_reg (sim_cpu *cpu) |
| { |
| /* instr[31:20] = 1101 0101 0001 |
| instr[19] = op0 |
| instr[18,16] = op1 |
| instr[15,12] = CRn |
| instr[11,8] = CRm |
| instr[7,5] = op2 |
| instr[4,0] = Rt */ |
| |
| unsigned sys_op0 = INSTR (19, 19) + 2; |
| unsigned sys_op1 = INSTR (18, 16); |
| unsigned sys_crn = INSTR (15, 12); |
| unsigned sys_crm = INSTR (11, 8); |
| unsigned sys_op2 = INSTR (7, 5); |
| unsigned rt = INSTR (4, 0); |
| |
| NYI_assert (31, 20, 0xD51); |
| |
| TRACE_DECODE (cpu, "emulated at line %d", __LINE__); |
| system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2, |
| aarch64_get_reg_u64 (cpu, rt, NO_SP)); |
| } |
| |
| static void |
| do_SYS (sim_cpu *cpu) |
| { |
| /* instr[31,19] = 1101 0101 0000 1 |
| instr[18,16] = op1 |
| instr[15,12] = CRn |
| instr[11,8] = CRm |
| instr[7,5] = op2 |
| instr[4,0] = Rt */ |
| NYI_assert (31, 19, 0x1AA1); |
| |
| /* FIXME: For now we just silently accept system ops. */ |
| } |
| |
| static void |
| dexSystem (sim_cpu *cpu) |
| { |
| /* instr[31:22] = 1101 01010 0 |
| instr[21] = L |
| instr[20,19] = op0 |
| instr[18,16] = op1 |
| instr[15,12] = CRn |
| instr[11,8] = CRm |
| instr[7,5] = op2 |
| instr[4,0] = uimm5 */ |
| |
| /* We are interested in HINT, DSB, DMB and ISB |
| |
| Hint #0 encodes NOOP (this is the only hint we care about) |
| L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111, |
| CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101 |
| |
| DSB, DMB, ISB are data store barrier, data memory barrier and |
| instruction store barrier, respectively, where |
| |
| L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111, |
| op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110 |
| CRm<3:2> ==> domain, CRm<1:0> ==> types, |
| domain : 00 ==> OuterShareable, 01 ==> Nonshareable, |
| 10 ==> InerShareable, 11 ==> FullSystem |
| types : 01 ==> Reads, 10 ==> Writes, |
| 11 ==> All, 00 ==> All (domain == FullSystem). */ |
| |
| unsigned rt = INSTR (4, 0); |
| |
| NYI_assert (31, 22, 0x354); |
| |
| switch (INSTR (21, 12)) |
| { |
| case 0x032: |
| if (rt == 0x1F) |
| { |
| /* NOP has CRm != 0000 OR. */ |
| /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */ |
| uint32_t crm = INSTR (11, 8); |
| uint32_t op2 = INSTR (7, 5); |
| |
| if (crm != 0 || (op2 == 0 || op2 > 5)) |
| { |
| /* Actually call nop method so we can reimplement it later. */ |
| nop (cpu); |
| return; |
| } |
| } |
| HALT_NYI; |
| |
| case 0x033: |
| { |
| uint32_t op2 = INSTR (7, 5); |
| |
| switch (op2) |
| { |
| case 2: HALT_NYI; |
| case 4: dsb (cpu); return; |
| case 5: dmb (cpu); return; |
| case 6: isb (cpu); return; |
| default: HALT_UNALLOC; |
| } |
| } |
| |
| case 0x3B0: |
| case 0x3B4: |
| case 0x3BD: |
| do_mrs (cpu); |
| return; |
| |
| case 0x0B7: |
| do_SYS (cpu); /* DC is an alias of SYS. */ |
| return; |
| |
| default: |
| if (INSTR (21, 20) == 0x1) |
| do_MSR_reg (cpu); |
| else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4) |
| do_MSR_immediate (cpu); |
| else |
| HALT_NYI; |
| return; |
| } |
| } |
| |
| static void |
| dexBr (sim_cpu *cpu) |
| { |
| /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu)); |
| assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011 |
| bits [31,29] of a BrExSys are the secondary dispatch vector. */ |
| uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu)); |
| |
| switch (group2) |
| { |
| case BR_IMM_000: |
| return dexBranchImmediate (cpu); |
| |
| case BR_IMMCMP_001: |
| /* Compare has bit 25 clear while test has it set. */ |
| if (!INSTR (25, 25)) |
| dexCompareBranchImmediate (cpu); |
| else |
| dexTestBranchImmediate (cpu); |
| return; |
| |
| case BR_IMMCOND_010: |
| /* This is a conditional branch if bit 25 is clear otherwise |
| unallocated. */ |
| if (!INSTR (25, 25)) |
| dexCondBranchImmediate (cpu); |
| else |
| HALT_UNALLOC; |
| return; |
| |
| case BR_UNALLOC_011: |
| HALT_UNALLOC; |
| |
| case BR_IMM_100: |
| dexBranchImmediate (cpu); |
| return; |
| |
| case BR_IMMCMP_101: |
| /* Compare has bit 25 clear while test has it set. */ |
| if (!INSTR (25, 25)) |
| dexCompareBranchImmediate (cpu); |
| else |
| dexTestBranchImmediate (cpu); |
| return; |
| |
| case BR_REG_110: |
| /* Unconditional branch reg has bit 25 set. */ |
| if (INSTR (25, 25)) |
| dexBranchRegister (cpu); |
| |
| /* This includes both Excpn Gen, System and unalloc operations. |
| We need to decode the Excpn Gen operation BRK so we can plant |
| debugger entry points. |
| Excpn Gen operations have instr [24] = 0. |
| we need to decode at least one of the System operations NOP |
| which is an alias for HINT #0. |
| System operations have instr [24,22] = 100. */ |
| else if (INSTR (24, 24) == 0) |
| dexExcpnGen (cpu); |
| |
| else if (INSTR (24, 22) == 4) |
| dexSystem (cpu); |
| |
| else |
| HALT_UNALLOC; |
| |
| return; |
| |
| case BR_UNALLOC_111: |
| HALT_UNALLOC; |
| |
| default: |
| /* Should never reach here. */ |
| HALT_NYI; |
| } |
| } |
| |
| static void |
| aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc) |
| { |
| /* We need to check if gdb wants an in here. */ |
| /* checkBreak (cpu);. */ |
| |
| uint64_t group = dispatchGroup (aarch64_get_instr (cpu)); |
| |
| switch (group) |
| { |
| case GROUP_PSEUDO_0000: dexPseudo (cpu); break; |
| case GROUP_LDST_0100: dexLdSt (cpu); break; |
| case GROUP_DPREG_0101: dexDPReg (cpu); break; |
| case GROUP_LDST_0110: dexLdSt (cpu); break; |
| case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break; |
| case GROUP_DPIMM_1000: dexDPImm (cpu); break; |
| case GROUP_DPIMM_1001: dexDPImm (cpu); break; |
| case GROUP_BREXSYS_1010: dexBr (cpu); break; |
| case GROUP_BREXSYS_1011: dexBr (cpu); break; |
| case GROUP_LDST_1100: dexLdSt (cpu); break; |
| case GROUP_DPREG_1101: dexDPReg (cpu); break; |
| case GROUP_LDST_1110: dexLdSt (cpu); break; |
| case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break; |
| |
| case GROUP_UNALLOC_0001: |
| case GROUP_UNALLOC_0010: |
| case GROUP_UNALLOC_0011: |
| HALT_UNALLOC; |
| |
| default: |
| /* Should never reach here. */ |
| HALT_NYI; |
| } |
| } |
| |
| static bfd_boolean |
| aarch64_step (sim_cpu *cpu) |
| { |
| uint64_t pc = aarch64_get_PC (cpu); |
| |
| if (pc == TOP_LEVEL_RETURN_PC) |
| return FALSE; |
| |
| aarch64_set_next_PC (cpu, pc + 4); |
| |
| /* Code is always little-endian. */ |
| sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map, |
| & aarch64_get_instr (cpu), pc, 4); |
| aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu)); |
| |
| TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc, |
| aarch64_get_instr (cpu)); |
| TRACE_DISASM (cpu, pc); |
| |
| aarch64_decode_and_execute (cpu, pc); |
| |
| return TRUE; |
| } |
| |
| void |
| aarch64_run (SIM_DESC sd) |
| { |
| sim_cpu *cpu = STATE_CPU (sd, 0); |
| |
| while (aarch64_step (cpu)) |
| { |
| aarch64_update_PC (cpu); |
| |
| if (sim_events_tick (sd)) |
| sim_events_process (sd); |
| } |
| |
| sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu), |
| sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP)); |
| } |
| |
| void |
| aarch64_init (sim_cpu *cpu, uint64_t pc) |
| { |
| uint64_t sp = aarch64_get_stack_start (cpu); |
| |
| /* Install SP, FP and PC and set LR to -20 |
| so we can detect a top-level return. */ |
| aarch64_set_reg_u64 (cpu, SP, SP_OK, sp); |
| aarch64_set_reg_u64 (cpu, FP, SP_OK, sp); |
| aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC); |
| aarch64_set_next_PC (cpu, pc); |
| aarch64_update_PC (cpu); |
| aarch64_init_LIT_table (); |
| } |