zircon/kernel/arch/arm64/hypervisor/vcpu.cc - fuchsia - Git at Google

 // Copyright 2017 The Fuchsia Authors
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT

 #include <bits.h>
 #include <lib/fit/defer.h>
 #include <lib/ktrace.h>
 #include <zircon/errors.h>
 #include <zircon/syscalls/hypervisor.h>

 #include <arch/hypervisor.h>
 #include <arch/ops.h>
 #include <dev/interrupt/arm_gic_common.h>
 #include <dev/interrupt/arm_gic_hw_interface.h>
 #include <hypervisor/aspace.h>
 #include <hypervisor/cpu.h>
 #include <hypervisor/ktrace.h>
 #include <kernel/event.h>
 #include <kernel/percpu.h>
 #include <kernel/stats.h>
 #include <platform/timer.h>
 #include <vm/physmap.h>
 #include <vm/pmm.h>

 #include "el2_cpu_state_priv.h"
 #include "vmexit_priv.h"

 static constexpr uint32_t kGichHcrEn = 1u << 0;
 static constexpr uint32_t kGichHcrUie = 1u << 1;
 static constexpr uint32_t kGichMisrU = 1u << 1;
 static constexpr uint32_t kSpsrDaif = 0b1111 << 6;
 static constexpr uint32_t kSpsrEl1h = 0b0101;
 static constexpr uint32_t kSpsrNzcv = 0b1111 << 28;

 static uint64_t vmpidr_of(uint16_t vpid) {
   constexpr uint64_t res1 = 1ul << 31;
   return (vpid - 1) | res1;
 }

 template <typename F>
 static bool for_each_lr(IchState* ich_state, F function) {
   for (uint8_t i = 0; i < ich_state->num_lrs; i++) {
     if (BIT(ich_state->elrsr, i)) {
       continue;
     }
     InterruptState state;
     uint32_t vector = gic_get_vector_from_lr(ich_state->lr[i], &state);
     auto result = function(i, state, vector);
     if (result.is_error()) {
       return false;
     }
   }
   return true;
 }

 static void gich_maybe_interrupt(GichState* gich_state, IchState* ich_state) {
   // From ARM GIC v3/v4, Section 4.8: If, on a particular CPU interface,
   // multiple pending interrupts have the same priority, and have sufficient
   // priority for the interface to signal them to the PE, it is IMPLEMENTATION
   // DEFINED how the interface selects which interrupt to signal.
   //
   // If interrupts are of the same priority, we can choose whatever ordering
   // we prefer when populating the LRs.
   for (uint64_t elrsr = ich_state->elrsr; elrsr != 0;) {
     uint32_t vector = 0;
     bool pending = gich_state->Pop(&vector);
     if (!pending) {
       // There are no more pending interrupts.
       break;
     }
     uint32_t lr_index = __builtin_ctzl(elrsr);
     // From ARM GIC v3/v4, Section 4.8: If the GIC implements fewer than 256
     // priority levels, the low-order bits of the priority fields are
     // RAZ/WI.
     // ...
     // In the GIC prioritization scheme, lower numbers have higher priority.
     //
     // We may have as few as 16 priority levels, so step by 16 to the next
     // lowest priority in order to prioritise SGIs and PPIs over SPIs.
     uint8_t prio = vector < GIC_BASE_SPI ? 0 : 0x10;
     InterruptState state = InterruptState::PENDING;

     if (gich_state->InListRegister(vector)) {
       bool skip =
           for_each_lr(ich_state, [&](uint8_t i, InterruptState s, uint32_t v) -> zx::result<> {
             if (v != vector || s != InterruptState::ACTIVE) {
               return zx::ok();
             }
             // If the interrupt is active, change its state to pending and active.
             state = InterruptState::PENDING_AND_ACTIVE;
             lr_index = i;
             return zx::error(ZX_ERR_STOP);
           });
       if (skip) {
         // Skip an interrupt if it is in an LR, and its state is not changing.
         continue;
       }
     }

     ich_state->lr[lr_index] = gic_get_lr_from_vector(prio, state, vector);
     elrsr &= ~(1u << lr_index);
   }
 }

 GichState::GichState() {
   zx_status_t status = lr_tracker_.Reset(kNumInterrupts);
   // `lr_tracker_` uses static storage, so `Reset` cannot fail.
   DEBUG_ASSERT(status == ZX_OK);
 }

 void GichState::TrackAllListRegisters(IchState* ich_state) {
   lr_tracker_.ClearAll();
   for_each_lr(ich_state, [this](uint8_t i, InterruptState s, uint32_t v) -> zx::result<> {
     lr_tracker_.SetOne(v);
     return zx::ok();
   });
 }

 static VcpuExit vmexit_interrupt_ktrace_meta(uint32_t misr) {
   if (misr & kGichMisrU) {
     return VCPU_UNDERFLOW_MAINTENANCE_INTERRUPT;
   }
   return VCPU_PHYSICAL_INTERRUPT;
 }

 AutoGich::AutoGich(IchState* ich_state, bool pending) : ich_state_(ich_state) {
   // From ARM GIC v3/v4, Section 8.4.5: Underflow Interrupt Enable. Enables
   // the signaling of a maintenance interrupt when the List registers are
   // empty, or hold only one valid entry.
   //
   // We use it when there are not enough free LRs to inject all pending
   // interrupts, so when guest finishes processing most of them, a maintenance
   // interrupt will cause VM exit and will give us a chance to inject the
   // remaining interrupts. The point of this is to reduce latency when
   // processing interrupts.
   uint32_t gich_hcr = kGichHcrEn;
   if (pending && ich_state_->num_lrs > 1) {
     gich_hcr |= kGichHcrUie;
   }

   DEBUG_ASSERT(!arch_ints_disabled());
   int_state_ = arch_interrupt_save();
   arch_set_blocking_disallowed(true);
   gic_write_gich_state(ich_state_, gich_hcr);
 }

 AutoGich::~AutoGich() {
   DEBUG_ASSERT(arch_ints_disabled());
   gic_read_gich_state(ich_state_);
   arch_set_blocking_disallowed(false);
   arch_interrupt_restore(int_state_);
 }

 // Returns the number of active priorities registers, based on the number of
 // preemption bits.
 //
 // From ARM GIC v2, Section 5.3.2: In GICv2, the only valid value is 5 bits.
 //
 // From ARM GIC v3/v4, Section 8.4.2: If 5 bits of preemption are implemented
 // (bits [7:3] of priority), then there are 32 preemption levels... If 6 bits of
 // preemption are implemented (bits [7:2] of priority), then there are 64
 // preemption levels... If 7 bits of preemption are implemented (bits [7:1] of
 // priority), then there are 128 preemption levels...
 static uint8_t num_aprs(uint8_t num_pres) { return static_cast<uint8_t>(1u << (num_pres - 5u)); }

 // static
 zx::result<ktl::unique_ptr<Vcpu>> Vcpu::Create(Guest& guest, zx_vaddr_t entry) {
   hypervisor::GuestPhysicalAspace& gpa = guest.AddressSpace();
   if (entry >= gpa.size()) {
     return zx::error(ZX_ERR_INVALID_ARGS);
   }

   Thread* thread = Thread::Current::Get();
   if (thread->vcpu()) {
     return zx::error(ZX_ERR_BAD_STATE);
   }

   auto vpid = guest.AllocVpid();
   if (vpid.is_error()) {
     return vpid.take_error();
   }

   fbl::AllocChecker ac;
   ktl::unique_ptr<Vcpu> vcpu(new (&ac) Vcpu(guest, *vpid, thread));
   if (!ac.check()) {
     auto result = guest.FreeVpid(*vpid);
     ASSERT(result.is_ok());
     return zx::error(ZX_ERR_NO_MEMORY);
   }

   if (auto result = vcpu->el2_state_.Alloc(); result.is_error()) {
     return result.take_error();
   }

   vcpu->el2_state_->guest_state.system_state.elr_el2 = entry;
   vcpu->el2_state_->guest_state.system_state.spsr_el2 = kSpsrDaif | kSpsrEl1h;
   vcpu->el2_state_->guest_state.vmpidr_el2 = vmpidr_of(*vpid);
   const uint8_t num_lrs = gic_get_num_lrs();
   vcpu->el2_state_->ich_state.num_aprs = num_aprs(gic_get_num_pres());
   vcpu->el2_state_->ich_state.num_lrs = num_lrs;
   vcpu->el2_state_->ich_state.vmcr = gic_default_gich_vmcr();
   vcpu->el2_state_->ich_state.elrsr = (1ul << num_lrs) - 1;
   vcpu->hcr_ = HCR_EL2_VM | HCR_EL2_PTW | HCR_EL2_FMO | HCR_EL2_IMO | HCR_EL2_AMO | HCR_EL2_TWI |
                HCR_EL2_TWE | HCR_EL2_TSC | HCR_EL2_TSW | HCR_EL2_TVM | HCR_EL2_RW | HCR_EL2_DC;

   return zx::ok(ktl::move(vcpu));
 }

 Vcpu::Vcpu(Guest& guest, uint16_t vpid, Thread* thread)
     : guest_(guest), vpid_(vpid), last_cpu_(thread->LastCpu()), thread_(thread) {
   thread->set_vcpu(true);
   thread->SetMigrateFn([this](Thread* thread, auto stage) {
     ChainLockTransaction::AssertActive();
     thread->get_lock().AssertHeld();
     Migrate(thread, stage);
   });
 }

 Vcpu::~Vcpu() {
   {
     // Taking the Thread list lock guarantees that our thread cannot be in the
     // Exiting stage of our migration function as Thread::Exit holds the list
     // lock during the migration callback.  `thread_` is only ever mutated
     // during this callback as the thread exits, so we are guaranteed to see
     // either our thread, or nullptr if the thread has already exited.
     Guard<SpinLock, IrqSave> guard{&Thread::get_list_lock()};
     if (thread_ != nullptr) {
       SingleChainLockGuard thread_guard{NoIrqSaveOption, thread_->get_lock(),
                                         CLT_TAG("Vcpu::~Vcpu")};
       thread_->set_vcpu(false);
       // Clear the migration function, so that |thread_| does not reference
       // |this| after destruction of the VCPU.
       thread_->SetMigrateFnLocked(nullptr);
     }
   }

   auto result = guest_.FreeVpid(vpid_);
   ZX_ASSERT(result.is_ok());
 }

 void Vcpu::Migrate(Thread* thread, Thread::MigrateStage stage) {
   // The thread being passed to us must be _our_ thread, and its lock must
   // currently be held (a requirement for calling a Migration function).  Assert
   // this to make the lock analysis happy.
   DEBUG_ASSERT(ThreadIsOurThread(thread));
   [this]() TA_NO_THREAD_SAFETY_ANALYSIS TA_RET_CAP(
       thread_->get_lock()) -> auto& { return thread_->get_lock(); }().AssertHeld();

   switch (stage) {
     case Thread::MigrateStage::Save:
       last_cpu_ = INVALID_CPU;
       break;
     case Thread::MigrateStage::Restore:
       // After thread migration, update the |last_cpu_| for Vcpu::Interrupt().
       DEBUG_ASSERT(last_cpu_ == INVALID_CPU);
       last_cpu_ = thread->LastCpuLocked();
       break;
     case Thread::MigrateStage::Exiting:
       // The |thread_| is exiting and so we must clear our reference to it.
       Thread::get_list_lock().lock().AssertHeld();
       ktl::atomic_ref{thread_}.store(nullptr, ktl::memory_order_relaxed);
       break;
   }
 }

 void Vcpu::InterruptCpu() {
   // Enter the global thread list lock, allowing us to see if our thread still exists.
   Guard<SpinLock, IrqSave> guard{&Thread::get_list_lock()};
   if (thread_ != nullptr) {
     // If our thread is still around, grab its lock to prevent our last_cpu_
     // bookkeeping from changing.
     //
     // TODO(johngro): Do we need any of this?  Can we just use the Thread's
     // last_cpu member instead?
     SingleChainLockGuard thread_guard{NoIrqSaveOption, thread_->get_lock(),
                                       CLT_TAG("Vcpu::InterruptCpu")};

     // If the VCPU thread is still running, and we have a valid last_cpu_, send
     // the thread's CPU an IPI.
     if (thread_->state() == THREAD_RUNNING && last_cpu_ != INVALID_CPU) {
       mp_interrupt(mp_ipi_target::MASK, cpu_num_to_mask(last_cpu_));
     }
   }
 }

 zx::result<> Vcpu::Enter(zx_port_packet_t& packet) {
   Thread* current_thread = Thread::Current::Get();
   if (!ThreadIsOurThread(current_thread)) {
     return zx::error(ZX_ERR_BAD_STATE);
   }

   const ArchVmAspace& arch_aspace = guest_.AddressSpace().arch_aspace();
   uint64_t vttbr = arm64_vttbr(arch_aspace.arch_asid(), arch_aspace.arch_table_phys());
   GuestState* guest_state = &el2_state_->guest_state;
   IchState* ich_state = &el2_state_->ich_state;
   zx_status_t status;
   do {
     // If the thread was killed or suspended, then we should exit with an error.
     status = current_thread->CheckKillOrSuspendSignal();
     if (status != ZX_OK) {
       return zx::error(status);
     }
     timer_maybe_interrupt(guest_state, &gich_state_);
     gich_maybe_interrupt(&gich_state_, ich_state);
     {
       AutoGich auto_gich(ich_state, gich_state_.Pending());

       // We check whether a kick was requested before entering the guest so that:
       // 1. When we enter the syscall, we can return immediately without entering
       //    the guest.
       // 2. If we have already exited the guest to handle a packet, it allows us
       //    to return and gives user-space a chance to handle that packet, without
       //    the request to kick interfering with the packet in-flight.
       //
       // We also do this after we have disabled interrupts, so if an interrupt was
       // fired before we disabled interrupts, we have the opportunity to check
       // whether a kick was requested, but the interrupt was lost. If an interrupt
       // is fired after we have disabled interrupts, when we enter the guest we
       // will exit due to the interrupt, and run this check again.
       if (kicked_.exchange(false)) {
         return zx::error(ZX_ERR_CANCELED);
       }

       KTRACE_DURATION_BEGIN("kernel:arch", "vcpu");

       GUEST_STATS_INC(vm_entries);
       status = arm64_el2_enter(vttbr, el2_state_.PhysicalAddress(), hcr_);
       GUEST_STATS_INC(vm_exits);
     }
     gich_state_.TrackAllListRegisters(ich_state);
     if (status == ZX_ERR_NEXT) {
       // We received a physical interrupt. Continue execution of the guest.
       ktrace_vcpu_exit(vmexit_interrupt_ktrace_meta(ich_state->misr),
                        guest_state->system_state.elr_el2);
       GUEST_STATS_INC(interrupts);
       status = ZX_OK;
     } else if (status == ZX_OK) {
       status = vmexit_handler(&hcr_, guest_state, &gich_state_, &guest_.AddressSpace(),
                               &guest_.Traps(), &packet)
                    .status_value();
     } else {
       ktrace_vcpu_exit(VCPU_FAILURE, guest_state->system_state.elr_el2);
       dprintf(INFO, "hypervisor: VCPU enter failed: %d\n", status);
     }
   } while (status == ZX_OK);
   return zx::make_result(status == ZX_ERR_NEXT ? ZX_OK : status);
 }

 void Vcpu::Kick() {
   kicked_.store(true);
   // Cancel any pending or upcoming wait-for-interrupts.
   gich_state_.Cancel();
   // Send an IPI to our thread's CPU, if it is currently running.
   InterruptCpu();
 }

 void Vcpu::Interrupt(uint32_t vector) {
   gich_state_.Interrupt(vector);
   // Send an IPI to our thread's CPU, if it is currently running.
   InterruptCpu();
 }

 zx::result<> Vcpu::ReadState(zx_vcpu_state_t& state) const {
   if (!ThreadIsOurThread(Thread::Current::Get())) {
     return zx::error(ZX_ERR_BAD_STATE);
   }

   ASSERT(sizeof(state.x) >= sizeof(el2_state_->guest_state.x));
   memcpy(state.x, el2_state_->guest_state.x, sizeof(el2_state_->guest_state.x));
   state.sp = el2_state_->guest_state.system_state.sp_el1;
   state.cpsr = el2_state_->guest_state.system_state.spsr_el2 & kSpsrNzcv;
   return zx::ok();
 }

 zx::result<> Vcpu::WriteState(const zx_vcpu_state_t& state) {
   if (!ThreadIsOurThread(Thread::Current::Get())) {
     return zx::error(ZX_ERR_BAD_STATE);
   }

   ASSERT(sizeof(el2_state_->guest_state.x) >= sizeof(state.x));
   memcpy(el2_state_->guest_state.x, state.x, sizeof(state.x));
   el2_state_->guest_state.system_state.sp_el1 = state.sp;
   el2_state_->guest_state.system_state.spsr_el2 |= state.cpsr & kSpsrNzcv;
   return zx::ok();
 }

 zx_info_vcpu_t Vcpu::GetInfo() const {
   if (kicked_.load()) {
     return {.flags = ZX_INFO_VCPU_FLAG_KICKED};
   }
   return {};
 }
	// Copyright 2017 The Fuchsia Authors
	//
	// Use of this source code is governed by a MIT-style
	// license that can be found in the LICENSE file or at
	// https://opensource.org/licenses/MIT

	#include <bits.h>
	#include <lib/fit/defer.h>
	#include <lib/ktrace.h>
	#include <zircon/errors.h>
	#include <zircon/syscalls/hypervisor.h>

	#include <arch/hypervisor.h>
	#include <arch/ops.h>
	#include <dev/interrupt/arm_gic_common.h>
	#include <dev/interrupt/arm_gic_hw_interface.h>
	#include <hypervisor/aspace.h>
	#include <hypervisor/cpu.h>
	#include <hypervisor/ktrace.h>
	#include <kernel/event.h>
	#include <kernel/percpu.h>
	#include <kernel/stats.h>
	#include <platform/timer.h>
	#include <vm/physmap.h>
	#include <vm/pmm.h>

	#include "el2_cpu_state_priv.h"
	#include "vmexit_priv.h"

	static constexpr uint32_t kGichHcrEn = 1u << 0;
	static constexpr uint32_t kGichHcrUie = 1u << 1;
	static constexpr uint32_t kGichMisrU = 1u << 1;
	static constexpr uint32_t kSpsrDaif = 0b1111 << 6;
	static constexpr uint32_t kSpsrEl1h = 0b0101;
	static constexpr uint32_t kSpsrNzcv = 0b1111 << 28;

	static uint64_t vmpidr_of(uint16_t vpid) {
	constexpr uint64_t res1 = 1ul << 31;
	return (vpid - 1) \| res1;
	}

	template <typename F>
	static bool for_each_lr(IchState* ich_state, F function) {
	for (uint8_t i = 0; i < ich_state->num_lrs; i++) {
	if (BIT(ich_state->elrsr, i)) {
	continue;
	}
	InterruptState state;
	uint32_t vector = gic_get_vector_from_lr(ich_state->lr[i], &state);
	auto result = function(i, state, vector);
	if (result.is_error()) {
	return false;
	}
	}
	return true;
	}

	static void gich_maybe_interrupt(GichState* gich_state, IchState* ich_state) {
	// From ARM GIC v3/v4, Section 4.8: If, on a particular CPU interface,
	// multiple pending interrupts have the same priority, and have sufficient
	// priority for the interface to signal them to the PE, it is IMPLEMENTATION
	// DEFINED how the interface selects which interrupt to signal.
	//
	// If interrupts are of the same priority, we can choose whatever ordering
	// we prefer when populating the LRs.
	for (uint64_t elrsr = ich_state->elrsr; elrsr != 0;) {
	uint32_t vector = 0;
	bool pending = gich_state->Pop(&vector);
	if (!pending) {
	// There are no more pending interrupts.
	break;
	}
	uint32_t lr_index = __builtin_ctzl(elrsr);
	// From ARM GIC v3/v4, Section 4.8: If the GIC implements fewer than 256
	// priority levels, the low-order bits of the priority fields are
	// RAZ/WI.
	// ...
	// In the GIC prioritization scheme, lower numbers have higher priority.
	//
	// We may have as few as 16 priority levels, so step by 16 to the next
	// lowest priority in order to prioritise SGIs and PPIs over SPIs.
	uint8_t prio = vector < GIC_BASE_SPI ? 0 : 0x10;
	InterruptState state = InterruptState::PENDING;

	if (gich_state->InListRegister(vector)) {
	bool skip =
	for_each_lr(ich_state, [&](uint8_t i, InterruptState s, uint32_t v) -> zx::result<> {
	if (v != vector \|\| s != InterruptState::ACTIVE) {
	return zx::ok();
	}
	// If the interrupt is active, change its state to pending and active.
	state = InterruptState::PENDING_AND_ACTIVE;
	lr_index = i;
	return zx::error(ZX_ERR_STOP);
	});
	if (skip) {
	// Skip an interrupt if it is in an LR, and its state is not changing.
	continue;
	}
	}

	ich_state->lr[lr_index] = gic_get_lr_from_vector(prio, state, vector);
	elrsr &= ~(1u << lr_index);
	}
	}

	GichState::GichState() {
	zx_status_t status = lr_tracker_.Reset(kNumInterrupts);
	// `lr_tracker_` uses static storage, so `Reset` cannot fail.
	DEBUG_ASSERT(status == ZX_OK);
	}

	void GichState::TrackAllListRegisters(IchState* ich_state) {
	lr_tracker_.ClearAll();
	for_each_lr(ich_state, [this](uint8_t i, InterruptState s, uint32_t v) -> zx::result<> {
	lr_tracker_.SetOne(v);
	return zx::ok();
	});
	}

	static VcpuExit vmexit_interrupt_ktrace_meta(uint32_t misr) {
	if (misr & kGichMisrU) {
	return VCPU_UNDERFLOW_MAINTENANCE_INTERRUPT;
	}
	return VCPU_PHYSICAL_INTERRUPT;
	}

	AutoGich::AutoGich(IchState* ich_state, bool pending) : ich_state_(ich_state) {
	// From ARM GIC v3/v4, Section 8.4.5: Underflow Interrupt Enable. Enables
	// the signaling of a maintenance interrupt when the List registers are
	// empty, or hold only one valid entry.
	//
	// We use it when there are not enough free LRs to inject all pending
	// interrupts, so when guest finishes processing most of them, a maintenance
	// interrupt will cause VM exit and will give us a chance to inject the
	// remaining interrupts. The point of this is to reduce latency when
	// processing interrupts.
	uint32_t gich_hcr = kGichHcrEn;
	if (pending && ich_state_->num_lrs > 1) {
	gich_hcr \|= kGichHcrUie;
	}

	DEBUG_ASSERT(!arch_ints_disabled());
	int_state_ = arch_interrupt_save();
	arch_set_blocking_disallowed(true);
	gic_write_gich_state(ich_state_, gich_hcr);
	}

	AutoGich::~AutoGich() {
	DEBUG_ASSERT(arch_ints_disabled());
	gic_read_gich_state(ich_state_);
	arch_set_blocking_disallowed(false);
	arch_interrupt_restore(int_state_);
	}

	// Returns the number of active priorities registers, based on the number of
	// preemption bits.
	//
	// From ARM GIC v2, Section 5.3.2: In GICv2, the only valid value is 5 bits.
	//
	// From ARM GIC v3/v4, Section 8.4.2: If 5 bits of preemption are implemented
	// (bits [7:3] of priority), then there are 32 preemption levels... If 6 bits of
	// preemption are implemented (bits [7:2] of priority), then there are 64
	// preemption levels... If 7 bits of preemption are implemented (bits [7:1] of
	// priority), then there are 128 preemption levels...
	static uint8_t num_aprs(uint8_t num_pres) { return static_cast<uint8_t>(1u << (num_pres - 5u)); }

	// static
	zx::result<ktl::unique_ptr<Vcpu>> Vcpu::Create(Guest& guest, zx_vaddr_t entry) {
	hypervisor::GuestPhysicalAspace& gpa = guest.AddressSpace();
	if (entry >= gpa.size()) {
	return zx::error(ZX_ERR_INVALID_ARGS);
	}

	Thread* thread = Thread::Current::Get();
	if (thread->vcpu()) {
	return zx::error(ZX_ERR_BAD_STATE);
	}

	auto vpid = guest.AllocVpid();
	if (vpid.is_error()) {
	return vpid.take_error();
	}

	fbl::AllocChecker ac;
	ktl::unique_ptr<Vcpu> vcpu(new (&ac) Vcpu(guest, *vpid, thread));
	if (!ac.check()) {
	auto result = guest.FreeVpid(*vpid);
	ASSERT(result.is_ok());
	return zx::error(ZX_ERR_NO_MEMORY);
	}

	if (auto result = vcpu->el2_state_.Alloc(); result.is_error()) {
	return result.take_error();
	}

	vcpu->el2_state_->guest_state.system_state.elr_el2 = entry;
	vcpu->el2_state_->guest_state.system_state.spsr_el2 = kSpsrDaif \| kSpsrEl1h;
	vcpu->el2_state_->guest_state.vmpidr_el2 = vmpidr_of(*vpid);
	const uint8_t num_lrs = gic_get_num_lrs();
	vcpu->el2_state_->ich_state.num_aprs = num_aprs(gic_get_num_pres());
	vcpu->el2_state_->ich_state.num_lrs = num_lrs;
	vcpu->el2_state_->ich_state.vmcr = gic_default_gich_vmcr();
	vcpu->el2_state_->ich_state.elrsr = (1ul << num_lrs) - 1;
	vcpu->hcr_ = HCR_EL2_VM \| HCR_EL2_PTW \| HCR_EL2_FMO \| HCR_EL2_IMO \| HCR_EL2_AMO \| HCR_EL2_TWI \|
	HCR_EL2_TWE \| HCR_EL2_TSC \| HCR_EL2_TSW \| HCR_EL2_TVM \| HCR_EL2_RW \| HCR_EL2_DC;

	return zx::ok(ktl::move(vcpu));
	}

	Vcpu::Vcpu(Guest& guest, uint16_t vpid, Thread* thread)
	: guest_(guest), vpid_(vpid), last_cpu_(thread->LastCpu()), thread_(thread) {
	thread->set_vcpu(true);
	thread->SetMigrateFn([this](Thread* thread, auto stage) {
	ChainLockTransaction::AssertActive();
	thread->get_lock().AssertHeld();
	Migrate(thread, stage);
	});
	}

	Vcpu::~Vcpu() {
	{
	// Taking the Thread list lock guarantees that our thread cannot be in the
	// Exiting stage of our migration function as Thread::Exit holds the list
	// lock during the migration callback. `thread_` is only ever mutated
	// during this callback as the thread exits, so we are guaranteed to see
	// either our thread, or nullptr if the thread has already exited.
	Guard<SpinLock, IrqSave> guard{&Thread::get_list_lock()};
	if (thread_ != nullptr) {
	SingleChainLockGuard thread_guard{NoIrqSaveOption, thread_->get_lock(),
	CLT_TAG("Vcpu::~Vcpu")};
	thread_->set_vcpu(false);
	// Clear the migration function, so that \|thread_\| does not reference
	// \|this\| after destruction of the VCPU.
	thread_->SetMigrateFnLocked(nullptr);
	}
	}

	auto result = guest_.FreeVpid(vpid_);
	ZX_ASSERT(result.is_ok());
	}

	void Vcpu::Migrate(Thread* thread, Thread::MigrateStage stage) {
	// The thread being passed to us must be _our_ thread, and its lock must
	// currently be held (a requirement for calling a Migration function). Assert
	// this to make the lock analysis happy.
	DEBUG_ASSERT(ThreadIsOurThread(thread));
	[this]() TA_NO_THREAD_SAFETY_ANALYSIS TA_RET_CAP(
	thread_->get_lock()) -> auto& { return thread_->get_lock(); }().AssertHeld();

	switch (stage) {
	case Thread::MigrateStage::Save:
	last_cpu_ = INVALID_CPU;
	break;
	case Thread::MigrateStage::Restore:
	// After thread migration, update the \|last_cpu_\| for Vcpu::Interrupt().
	DEBUG_ASSERT(last_cpu_ == INVALID_CPU);
	last_cpu_ = thread->LastCpuLocked();
	break;
	case Thread::MigrateStage::Exiting:
	// The \|thread_\| is exiting and so we must clear our reference to it.
	Thread::get_list_lock().lock().AssertHeld();
	ktl::atomic_ref{thread_}.store(nullptr, ktl::memory_order_relaxed);
	break;
	}
	}

	void Vcpu::InterruptCpu() {
	// Enter the global thread list lock, allowing us to see if our thread still exists.
	Guard<SpinLock, IrqSave> guard{&Thread::get_list_lock()};
	if (thread_ != nullptr) {
	// If our thread is still around, grab its lock to prevent our last_cpu_
	// bookkeeping from changing.
	//
	// TODO(johngro): Do we need any of this? Can we just use the Thread's
	// last_cpu member instead?
	SingleChainLockGuard thread_guard{NoIrqSaveOption, thread_->get_lock(),
	CLT_TAG("Vcpu::InterruptCpu")};

	// If the VCPU thread is still running, and we have a valid last_cpu_, send
	// the thread's CPU an IPI.
	if (thread_->state() == THREAD_RUNNING && last_cpu_ != INVALID_CPU) {
	mp_interrupt(mp_ipi_target::MASK, cpu_num_to_mask(last_cpu_));
	}
	}
	}

	zx::result<> Vcpu::Enter(zx_port_packet_t& packet) {
	Thread* current_thread = Thread::Current::Get();
	if (!ThreadIsOurThread(current_thread)) {
	return zx::error(ZX_ERR_BAD_STATE);
	}

	const ArchVmAspace& arch_aspace = guest_.AddressSpace().arch_aspace();
	uint64_t vttbr = arm64_vttbr(arch_aspace.arch_asid(), arch_aspace.arch_table_phys());
	GuestState* guest_state = &el2_state_->guest_state;
	IchState* ich_state = &el2_state_->ich_state;
	zx_status_t status;
	do {
	// If the thread was killed or suspended, then we should exit with an error.
	status = current_thread->CheckKillOrSuspendSignal();
	if (status != ZX_OK) {
	return zx::error(status);
	}
	timer_maybe_interrupt(guest_state, &gich_state_);
	gich_maybe_interrupt(&gich_state_, ich_state);
	{
	AutoGich auto_gich(ich_state, gich_state_.Pending());

	// We check whether a kick was requested before entering the guest so that:
	// 1. When we enter the syscall, we can return immediately without entering
	// the guest.
	// 2. If we have already exited the guest to handle a packet, it allows us
	// to return and gives user-space a chance to handle that packet, without
	// the request to kick interfering with the packet in-flight.
	//
	// We also do this after we have disabled interrupts, so if an interrupt was
	// fired before we disabled interrupts, we have the opportunity to check
	// whether a kick was requested, but the interrupt was lost. If an interrupt
	// is fired after we have disabled interrupts, when we enter the guest we
	// will exit due to the interrupt, and run this check again.
	if (kicked_.exchange(false)) {
	return zx::error(ZX_ERR_CANCELED);
	}

	KTRACE_DURATION_BEGIN("kernel:arch", "vcpu");

	GUEST_STATS_INC(vm_entries);
	status = arm64_el2_enter(vttbr, el2_state_.PhysicalAddress(), hcr_);
	GUEST_STATS_INC(vm_exits);
	}
	gich_state_.TrackAllListRegisters(ich_state);
	if (status == ZX_ERR_NEXT) {
	// We received a physical interrupt. Continue execution of the guest.
	ktrace_vcpu_exit(vmexit_interrupt_ktrace_meta(ich_state->misr),
	guest_state->system_state.elr_el2);
	GUEST_STATS_INC(interrupts);
	status = ZX_OK;
	} else if (status == ZX_OK) {
	status = vmexit_handler(&hcr_, guest_state, &gich_state_, &guest_.AddressSpace(),
	&guest_.Traps(), &packet)
	.status_value();
	} else {
	ktrace_vcpu_exit(VCPU_FAILURE, guest_state->system_state.elr_el2);
	dprintf(INFO, "hypervisor: VCPU enter failed: %d\n", status);
	}
	} while (status == ZX_OK);
	return zx::make_result(status == ZX_ERR_NEXT ? ZX_OK : status);
	}

	void Vcpu::Kick() {
	kicked_.store(true);
	// Cancel any pending or upcoming wait-for-interrupts.
	gich_state_.Cancel();
	// Send an IPI to our thread's CPU, if it is currently running.
	InterruptCpu();
	}

	void Vcpu::Interrupt(uint32_t vector) {
	gich_state_.Interrupt(vector);
	// Send an IPI to our thread's CPU, if it is currently running.
	InterruptCpu();
	}

	zx::result<> Vcpu::ReadState(zx_vcpu_state_t& state) const {
	if (!ThreadIsOurThread(Thread::Current::Get())) {
	return zx::error(ZX_ERR_BAD_STATE);
	}

	ASSERT(sizeof(state.x) >= sizeof(el2_state_->guest_state.x));
	memcpy(state.x, el2_state_->guest_state.x, sizeof(el2_state_->guest_state.x));
	state.sp = el2_state_->guest_state.system_state.sp_el1;
	state.cpsr = el2_state_->guest_state.system_state.spsr_el2 & kSpsrNzcv;
	return zx::ok();
	}

	zx::result<> Vcpu::WriteState(const zx_vcpu_state_t& state) {
	if (!ThreadIsOurThread(Thread::Current::Get())) {
	return zx::error(ZX_ERR_BAD_STATE);
	}

	ASSERT(sizeof(el2_state_->guest_state.x) >= sizeof(state.x));
	memcpy(el2_state_->guest_state.x, state.x, sizeof(state.x));
	el2_state_->guest_state.system_state.sp_el1 = state.sp;
	el2_state_->guest_state.system_state.spsr_el2 \|= state.cpsr & kSpsrNzcv;
	return zx::ok();
	}

	zx_info_vcpu_t Vcpu::GetInfo() const {
	if (kicked_.load()) {
	return {.flags = ZX_INFO_VCPU_FLAG_KICKED};
	}
	return {};
	}