| // Copyright 2016 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #ifndef ZIRCON_KERNEL_ARCH_ARM64_INCLUDE_ARCH_ARM64_MP_H_ |
| #define ZIRCON_KERNEL_ARCH_ARM64_INCLUDE_ARCH_ARM64_MP_H_ |
| |
| #define PERCPU_IN_RESTRICTED_MODE 32 |
| |
| #ifndef __ASSEMBLER__ |
| |
| #include <zircon/compiler.h> |
| |
| #include <arch/arm64.h> |
| #include <arch/arm64/feature.h> |
| #include <kernel/align.h> |
| #include <kernel/cpu.h> |
| #include <ktl/bit.h> |
| #include <ktl/type_traits.h> |
| |
| // bits for mpidr register |
| #define MPIDR_AFF0_MASK 0xFFULL |
| #define MPIDR_AFF0_SHIFT 0 |
| #define MPIDR_AFF1_MASK (0xFFULL << 8) |
| #define MPIDR_AFF1_SHIFT 8 |
| #define MPIDR_AFF2_MASK (0xFFULL << 16) |
| #define MPIDR_AFF2_SHIFT 16 |
| #define MPIDR_AFF3_MASK (0xFFULL << 32) |
| #define MPIDR_AFF3_SHIFT 32 |
| |
| // construct a ARM MPID from cluster (AFF1) and cpu number (AFF0) |
| #define ARM64_MPID(cluster, cpu) \ |
| (((cluster << MPIDR_AFF1_SHIFT) & MPIDR_AFF1_MASK) | \ |
| ((cpu << MPIDR_AFF0_SHIFT) & MPIDR_AFF0_MASK)) |
| |
| #define ARM64_MPIDR_MASK (MPIDR_AFF3_MASK | MPIDR_AFF2_MASK | MPIDR_AFF1_MASK | MPIDR_AFF0_MASK) |
| |
| // TODO: add support for AFF2 and AFF3 |
| |
| struct percpu; |
| |
| // Per cpu structure, pointed to by a fixed register while in kernel mode. |
| // Aligned on the maximum architectural cache line to avoid cache |
| // line sharing between cpus. |
| struct arm64_percpu { |
| // cpu number |
| cpu_num_t cpu_num; |
| |
| // Whether blocking is disallowed. See arch_blocking_disallowed(). |
| uint32_t blocking_disallowed; |
| |
| // Number of spinlocks currently held. |
| uint32_t num_spinlocks; |
| |
| // Microarchitecture of this cpu (ex: Cortex-A53) |
| arm64_microarch microarch; |
| |
| // True if the branch predictor should be invalidated during context switch |
| // to mitigate Spectre V2 attacks. |
| bool should_invalidate_bp_on_context_switch; |
| |
| // True if the branch predictor should be invalidated on suspicious entries |
| // to EL1 from EL0 to mitigate Spectre V2 attacks. |
| bool should_invalidate_bp_on_el0_exception; |
| |
| // A pointer providing fast access to the high-level arch-agnostic per-cpu struct. |
| percpu* high_level_percpu; |
| |
| // Flag to track that we're in restricted mode. |
| uint32_t in_restricted_mode; |
| } __CPU_ALIGN; |
| static_assert(offsetof(struct arm64_percpu, in_restricted_mode) == PERCPU_IN_RESTRICTED_MODE, |
| "in_restricted mode is at the wrong offset"); |
| |
| void arch_init_cpu_map(uint cluster_count, const uint* cluster_cpus); |
| void arch_register_mpid(uint cpu_id, uint64_t mpid); |
| void arm64_init_percpu_early(); |
| |
| extern uint arm_num_cpus; |
| extern uint arm64_cpu_cluster_ids[SMP_MAX_CPUS]; |
| extern uint arm64_cpu_cpu_ids[SMP_MAX_CPUS]; |
| |
| // Use the x20 register to always point at the local cpu structure for fast access. |
| // x20 is the first available callee-saved register that clang will allow to be marked |
| // as fixed (via -ffixed-x20 command line). Since it's callee saved when making firmware |
| // calls to PSCI or SMCC the register will be naturally saved and restored. |
| inline void arm64_write_percpu_ptr(struct arm64_percpu* percpu) { |
| __asm__ volatile("mov x20, %0" ::"r"(percpu)); |
| } |
| |
| inline struct arm64_percpu* arm64_read_percpu_ptr() { |
| struct arm64_percpu* p; |
| __asm__ volatile("mov %0, x20" : "=r"(p)); |
| return p; |
| } |
| |
| // Mark as volatile to force a read of the field to make sure the compiler |
| // always emits a read when asked and does not cache a copy between. For the |
| // same reason, this can't by done via the arm64_percpu_ptr variable, since |
| // the compiler could copy x20 into another register and access it after a |
| // reschedule. |
| template <typename T, size_t Offset> |
| [[gnu::always_inline]] inline T arm64_read_percpu_field() { |
| static_assert((Offset & (alignof(T) - 1)) == 0, "Bad offset alignment"); |
| |
| // The `ldr` instruction is 64 bits or 32 bits depending on the register name |
| // used. All the load instructions for sizes smaller that 64 bits use the |
| // 32-bit register form, but sizes smaller than 32 bits have their own load |
| // instructions: `ldrh` for 16 bits, `ldrb` for 8 bits. These still write a |
| // full 32-bit register, so the output operand is uint32_t. Narrow the |
| // result to the actual value type. |
| constexpr auto narrow = [](uint32_t value) -> T { |
| if constexpr (sizeof(T) > sizeof(uint32_t)) { |
| PANIC("unreachable"); |
| return {}; |
| } else if constexpr (ktl::is_signed_v<T>) { |
| using U = ktl::make_unsigned_t<T>; |
| return ktl::bit_cast<T>(static_cast<U>(value)); |
| } else { |
| return static_cast<T>(value); |
| } |
| }; |
| |
| if constexpr (sizeof(T) == sizeof(uint32_t)) { |
| T value; |
| __asm__ volatile("ldr %w[val], [x20, %[offset]]" : [val] "=r"(value) : [offset] "Ir"(Offset)); |
| return value; |
| } else if constexpr (sizeof(T) == sizeof(uint16_t)) { |
| uint32_t value; |
| __asm__ volatile("ldrh %w[val], [x20, %[offset]]" : [val] "=r"(value) : [offset] "Ir"(Offset)); |
| return narrow(value); |
| } else if constexpr (sizeof(T) == sizeof(uint8_t)) { |
| uint32_t value; |
| __asm__ volatile("ldrb %w[val], [x20, %[offset]]" : [val] "=r"(value) : [offset] "Ir"(Offset)); |
| return narrow(value); |
| } else { |
| static_assert(sizeof(T) == sizeof(uint64_t)); |
| T value; |
| __asm__ volatile("ldr %[val], [x20, %[offset]]" : [val] "=r"(value) : [offset] "Ir"(Offset)); |
| return value; |
| } |
| } |
| #define READ_PERCPU_FIELD(field) \ |
| (arm64_read_percpu_field<decltype(arm64_percpu::field), offsetof(arm64_percpu, field)>()) |
| |
| template <typename T, size_t Offset> |
| [[gnu::always_inline]] inline void arm64_write_percpu_field(T value) { |
| static_assert((Offset & (alignof(T) - 1)) == 0, "Bad offset alignment"); |
| |
| // The `str` instruction is 64 bits or 32 bits depending on the register name |
| // used. All the store instructions for sizes smaller that 64 bits use the |
| // 32-bit register form, but sizes smaller than 32 bits have their own store |
| // instructions: `strh` for 16 bits, `strb` for 8 bits. In all cases the |
| // "unused" bits of the register are ignored. But just in case, zero-extend |
| // the value from its actual bit width up to uint32_t since the asm operand |
| // uses uint32_t to reflect that the 32-bit operand register appears in asm. |
| constexpr auto widen = [](T value) -> uint32_t { |
| if constexpr (sizeof(T) > sizeof(uint32_t)) { |
| PANIC("unreachable"); |
| return 0; |
| } else if constexpr (ktl::is_signed_v<T>) { |
| using U = ktl::make_unsigned_t<T>; |
| return ktl::bit_cast<U>(value); |
| } else { |
| return value; |
| } |
| }; |
| |
| if constexpr (sizeof(T) == sizeof(uint32_t)) { |
| __asm__ volatile("str %w[val], [x20, %[offset]]" |
| : |
| : [val] "r"(value), [offset] "Ir"(Offset) |
| : "memory"); |
| } else if constexpr (sizeof(T) == sizeof(uint16_t)) { |
| __asm__ volatile("strh %w[val], [x20, %[offset]]" |
| : |
| : [val] "r"(widen(value)), [offset] "Ir"(Offset) |
| : "memory"); |
| } else if constexpr (sizeof(T) == sizeof(uint8_t)) { |
| __asm__ volatile("strb %w[val], [x20, %[offset]]" |
| : |
| : [val] "r"(widen(value)), [offset] "Ir"(Offset) |
| : "memory"); |
| } else { |
| static_assert(sizeof(T) == sizeof(uint64_t)); |
| __asm__ volatile("str %[val], [x20, %[offset]]" |
| : |
| : [val] "r"(value), [offset] "Ir"(Offset) |
| : "memory"); |
| } |
| } |
| #define WRITE_PERCPU_FIELD(field, value) \ |
| (arm64_write_percpu_field<decltype(arm64_percpu::field), offsetof(arm64_percpu, field)>(value)) |
| |
| // Return a pointer to the high-level percpu struct for the calling CPU. |
| inline struct percpu* arch_get_curr_percpu() { return READ_PERCPU_FIELD(high_level_percpu); } |
| |
| inline cpu_num_t arch_curr_cpu_num() { return READ_PERCPU_FIELD(cpu_num); } |
| |
| // TODO(https://fxbug.dev/42108040) get num_cpus from topology. |
| // This needs to be set very early (before arch_init). |
| inline void arch_set_num_cpus(uint cpu_count) { arm_num_cpus = cpu_count; } |
| |
| inline uint arch_max_num_cpus() { return arm_num_cpus; } |
| |
| // translate a cpu number back to the cluster ID (AFF1) |
| inline uint arch_cpu_num_to_cluster_id(cpu_num_t cpu) { |
| DEBUG_ASSERT(cpu < SMP_MAX_CPUS); |
| |
| return arm64_cpu_cluster_ids[cpu]; |
| } |
| |
| // translate a cpu number back to the MP cpu number within a cluster (AFF0) |
| inline uint arch_cpu_num_to_cpu_id(cpu_num_t cpu) { |
| DEBUG_ASSERT(cpu < SMP_MAX_CPUS); |
| |
| return arm64_cpu_cpu_ids[cpu]; |
| } |
| |
| // Translate a CPU number back to the MPIDR of the CPU. |
| uint64_t arch_cpu_num_to_mpidr(cpu_num_t cpu_num); |
| |
| // translate mpidr to cpu number |
| cpu_num_t arm64_mpidr_to_cpu_num(uint64_t mpidr); |
| |
| // Setup the high-level percpu struct pointer for |cpu_num|. |
| void arch_setup_percpu(cpu_num_t cpu_num, struct percpu* percpu); |
| |
| inline void arch_set_restricted_flag(bool restricted) { |
| WRITE_PERCPU_FIELD(in_restricted_mode, restricted ? 1 : 0); |
| } |
| |
| inline bool arch_get_restricted_flag() { return READ_PERCPU_FIELD(in_restricted_mode); } |
| |
| #endif // !__ASSEMBLER__ |
| |
| #endif // ZIRCON_KERNEL_ARCH_ARM64_INCLUDE_ARCH_ARM64_MP_H_ |