src/virtualization/bin/vmm/guest.cc - fuchsia - Git at Google

 // Copyright 2017 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "src/virtualization/bin/vmm/guest.h"

 #include <lib/syslog/cpp/macros.h>
 #include <lib/zircon-internal/align.h>
 #include <zircon/assert.h>
 #include <zircon/status.h>
 #include <zircon/syscalls/hypervisor.h>
 #include <zircon/threads.h>

 #include "src/lib/fxl/strings/string_printf.h"
 #include "src/virtualization/bin/vmm/pci.h"
 #include "src/virtualization/bin/vmm/sysinfo.h"

 namespace {

 #if __aarch64__
 constexpr uint8_t kSpiBase = 32;
 #endif

 constexpr GuestMemoryRegion RestrictUntilEnd(zx_gpaddr_t start) {
   return {start, kGuestMemoryAllRemainingRange};
 }

 #if __x86_64__
 constexpr uint64_t kOneKibibyte = 1ul << 10;
 constexpr uint64_t kOneMebibyte = 1ul << 20;
 constexpr uint64_t kOneGibibyte = 1ul << 30;

 constexpr GuestMemoryRegion RestrictRegion(zx_gpaddr_t start, zx_gpaddr_t end) {
   return {start, end - start};
 }
 #endif

 // Ranges to avoid allocating guest memory in. These regions must not overlap and must be
 // sorted by increasing base address. These requirements are enforced by a static_assert
 // below.
 constexpr std::array kRestrictedRegions = {
 #if __aarch64__
     // For ARM PCI devices are mapped in at a relatively high address, so it's reasonable to just
     // block off the rest of guest memory.
     RestrictUntilEnd(std::min(kDevicePhysBase, kFirstDynamicDeviceAddr)),
 #elif __x86_64__
     // Reserve regions in the first MiB for use by the BIOS.
     RestrictRegion(0x0, 32 * kOneKibibyte),
     RestrictRegion(512 * kOneKibibyte, kOneMebibyte),
     // For x86 PCI devices are mapped in somewhere below 4 GiB, and the range extends to 4 GiB.
     RestrictRegion(kDevicePhysBase, 4 * kOneGibibyte),
     // Dynamic devices are mapped in at a very high address, so everything beyond that point
     // can be blocked off.
     RestrictUntilEnd(kFirstDynamicDeviceAddr),
 #endif
 };

 constexpr bool CheckForOverlappingRestrictedRegions() {
   auto overlaps = [](const GuestMemoryRegion& first, const GuestMemoryRegion& second) -> bool {
     const auto& begin = std::min(first, second, GuestMemoryRegion::CompareMinByBase);
     const auto& end = std::max(first, second, GuestMemoryRegion::CompareMinByBase);
     return begin.base + begin.size >= end.base;
   };

   for (auto curr = kRestrictedRegions.begin(); curr != kRestrictedRegions.end(); curr++) {
     for (auto next = std::next(curr); next != kRestrictedRegions.end(); next++) {
       if (overlaps(*curr, *next)) {
         return false;
       }
     }
   }

   return true;
 }

 // Compile time check that no regions overlap in kRestrictedRegions. If adding a region that
 // overlaps with another, just merge them into one larger region.
 static_assert(CheckForOverlappingRestrictedRegions());

 constexpr bool CheckRestrictedRegionsAreSorted() {
   for (auto curr = kRestrictedRegions.begin(); curr != kRestrictedRegions.end(); curr++) {
     if (std::next(curr) == kRestrictedRegions.end()) {
       break;
     }
     if (!GuestMemoryRegion::CompareMinByBase(*curr, *std::next(curr))) {
       return false;
     }
   }

   return true;
 }

 // Compile time check that regions in kRestrictedRegions are sorted by increasing base address.
 static_assert(CheckRestrictedRegionsAreSorted());

 constexpr uint32_t trap_kind(TrapType type) {
   switch (type) {
     case TrapType::MMIO_SYNC:
       return ZX_GUEST_TRAP_MEM;
     case TrapType::MMIO_BELL:
       return ZX_GUEST_TRAP_BELL;
     case TrapType::PIO_SYNC:
       return ZX_GUEST_TRAP_IO;
     default:
       ZX_PANIC("Unhandled TrapType %d", static_cast<int>(type));
       return 0;
   }
 }

 }  // namespace

 // Static.
 cpp20::span<const GuestMemoryRegion> Guest::GetDefaultRestrictionsForArchitecture() {
   return kRestrictedRegions;
 }

 // Static.
 uint64_t Guest::GetPageAlignedGuestMemory(uint64_t guest_memory) {
   const uint32_t page_size = zx_system_get_page_size();
   uint32_t page_alignment = guest_memory % page_size;
   if (page_alignment != 0) {
     uint32_t padding = page_size - page_alignment;
     FX_LOGS(INFO) << "The requested guest memory (" << guest_memory
                   << " bytes) is not a multiple of system page size (" << page_size
                   << " bytes), so increasing guest memory by " << padding << " bytes.";
     guest_memory += padding;
   }

   return guest_memory;
 }

 // Static.
 bool Guest::PageAlignGuestMemoryRegion(GuestMemoryRegion& region) {
   const uint32_t page_size = zx_system_get_page_size();

   // This guest region is bounded by restricted regions, so size cannot be increased. If this
   // region is smaller than a page this region must just be discarded.
   if (region.size < page_size) {
     return false;
   }

   zx_gpaddr_t start = region.base;
   zx_gpaddr_t end = region.base + region.size;

   // Round the starting address up to the nearest page, and the ending address down to the nearest
   // page.
   if (start % page_size != 0) {
     start += page_size - (start % page_size);
   }
   if (end % page_size != 0) {
     end -= end % page_size;
   }

   // Require a valid region to be at least a single page in size after adjustments. Both start and
   // end have just been page aligned.
   if (start >= end) {
     return false;
   }

   region.base = start;
   region.size = end - start;

   return true;
 }

 // Static.
 bool Guest::GenerateGuestMemoryRegions(uint64_t guest_memory,
                                        cpp20::span<const GuestMemoryRegion> restrictions,
                                        std::vector<GuestMemoryRegion>* regions) {
   // Special case where there's no restrictions. Currently this isn't true for any production
   // architecture due to the need to assign dynamic device addresses.
   if (restrictions.empty()) {
     regions->push_back({.base = 0x0, .size = guest_memory});
     return true;
   }

   bool first_region = true;
   GuestMemoryRegion current_region;
   auto restriction = restrictions.begin();
   fit::function<bool()> next_range = [&]() -> bool {
     if (first_region) {
       first_region = false;
       if (restriction->base != 0) {
         current_region = {0x0, restriction->base};
       } else {
         return next_range();
       }
     } else {
       if (restriction->size == kGuestMemoryAllRemainingRange) {
         return false;  // No remaining valid guest memory regions.
       }

       // The current unrestricted region extends from the end of the current restriction to the
       // start of the next restriction, or if this is the last restriction it extends to a very
       // large number.
       zx_gpaddr_t unrestricted_base_address = restriction->base + restriction->size;
       uint64_t unrestricted_size = std::next(restriction) == restrictions.end()
                                        ? kGuestMemoryAllRemainingRange - unrestricted_base_address
                                        : std::next(restriction)->base - unrestricted_base_address;

       current_region = {unrestricted_base_address, unrestricted_size};
       restriction++;
     }

     if (!Guest::PageAlignGuestMemoryRegion(current_region)) {
       return next_range();
     }

     return true;
   };

   uint64_t mem_required = guest_memory;
   while (mem_required > 0) {
     if (!next_range()) {
       FX_LOGS(ERROR) << "Unable to allocate enough guest memory due to guest memory restrictions. "
                         "Managed to allocate "
                      << guest_memory - mem_required << " of " << guest_memory << " bytes";
       return false;
     }

     uint64_t mem_used = std::min(current_region.size, mem_required);
     regions->push_back({current_region.base, mem_used});
     mem_required -= mem_used;
   }

   return true;
 }

 bool Guest::FitPluggableRegionBase(cpp20::span<const GuestMemoryRegion> restrictions, uint64_t base,
                                    uint64_t size, uint64_t alignment, uint64_t* result_base) {
   base = ZX_ALIGN(base, alignment);
   for (auto& restriction : restrictions) {
     if (restriction.HasOverlap(GuestMemoryRegion{base, size})) {
       if (restriction.size == kGuestMemoryAllRemainingRange) {
         return false;
       }
       base = ZX_ALIGN(restriction.base + restriction.size, alignment);
     }
   }
   *result_base = base;
   return true;
 }

 zx_status_t Guest::Init(uint64_t guest_memory, uint64_t pluggable_region_size,
                         uint64_t pluggable_region_alignment) {
   zx::resource hypervisor_resource;
   zx_status_t status = get_hypervisor_resource(&hypervisor_resource);
   if (status != ZX_OK) {
     FX_PLOGS(ERROR, status) << "Failed to get hypervisor resource";
     return status;
   }
   status = zx::guest::create(hypervisor_resource, 0, &guest_, &vmar_);
   if (status != ZX_OK) {
     FX_PLOGS(ERROR, status) << "Failed to create guest";
     return status;
   }

   // If unaligned, round up to the nearest page.
   guest_memory = Guest::GetPageAlignedGuestMemory(guest_memory);

   // Generate guest memory regions, avoiding device memory.
   if (!Guest::GenerateGuestMemoryRegions(
           guest_memory, Guest::GetDefaultRestrictionsForArchitecture(), &memory_regions_)) {
     FX_PLOGS(ERROR, ZX_ERR_INVALID_ARGS) << "Failed to place guest memory avoiding device memory "
                                             "ranges. Try requesting less memory.";
   }

   if (pluggable_region_size > 0) {
     uint64_t pluggable_region_base = memory_regions_.back().base + memory_regions_.back().size;
     // Calculate the position of the pluggable memory region
     if (!Guest::FitPluggableRegionBase(Guest::GetDefaultRestrictionsForArchitecture(),
                                        pluggable_region_base, pluggable_region_size,
                                        pluggable_region_alignment, &mem_pluggable_region_addr_)) {
       status = ZX_ERR_INVALID_ARGS;
       FX_PLOGS(ERROR, status) << "Failed to place pluggable memory region avoiding device memory "
                                  "ranges. Try requesting smaller pluggable region size.";
       return status;
     } else {
       memory_regions_.push_back({mem_pluggable_region_addr_, pluggable_region_size});
     }
   }
   uint64_t vmo_size = memory_regions_.back().base + memory_regions_.back().size;

   zx::vmo vmo;
   status = zx::vmo::create(vmo_size, 0, &vmo);
   if (status != ZX_OK) {
     FX_PLOGS(ERROR, status) << "Failed to create VMO of size " << vmo_size;
     return status;
   }

   zx::resource vmex_resource;
   status = get_vmex_resource(&vmex_resource);
   if (status != ZX_OK) {
     FX_PLOGS(ERROR, status) << "Failed to get VMEX resource";
     return status;
   }
   status = vmo.replace_as_executable(vmex_resource, &vmo);
   if (status != ZX_OK) {
     FX_PLOGS(ERROR, status) << "Failed to make VMO executable";
     return status;
   }

   std::vector<GuestMemoryRegion> vmar_regions = memory_regions_;
   if (pluggable_region_size > 0) {
     // We do want vmar mapping for the pluggable memory region but we don't want
     // the pluggable memory region to be a part of the e820 memory map.
     // So, leave the pluggable memory region part of vmar_regions and remove it
     // from memory_regions_ which will be later used to set up e820 map
     //
     // TODO(https://fxbug.dev/42051237): Get virtio-mem to take the guest and host vmars,
     // keep all pluggable memory unmapped and only map plugged regions. This
     // would require adding EXECUTE and perhaps other flags to the vmo which is
     // passed to the virtio-mem. Mapping only plugged regions can make existing
     // virtio-mem tests flaky because test is not guaranteed to use the plugged
     // memory during its test allocate for something which requires EXECUTE.
     // Need to write a stress test for virtio-mem before making this change.
     FX_CHECK(!memory_regions_.empty());
     FX_CHECK(memory_regions_.back().base == mem_pluggable_region_addr_);
     memory_regions_.pop_back();
   }
 #if __x86_64__
   // x86 has reserved memory from 0 to 32KiB, and 512KiB to 1MiB. While we will not allocate guest
   // memory in those regions, we still want to map these regions into the guest VMAR as they are
   // not devices and we do not wish to trap on them.
   vmar_regions.push_back({0, 32 * kOneKibibyte});
   vmar_regions.push_back({512 * kOneKibibyte, 512 * kOneKibibyte});
 #endif

   for (const GuestMemoryRegion& region : vmar_regions) {
     zx_gpaddr_t addr;
     status = vmar_.map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | ZX_VM_PERM_EXECUTE | ZX_VM_SPECIFIC |
                            ZX_VM_REQUIRE_NON_RESIZABLE,
                        region.base, vmo, region.base, region.size, &addr);
     if (status != ZX_OK) {
       FX_PLOGS(ERROR, status) << "Failed to map guest physical memory region " << region.base
                               << " - " << region.base + region.size;
       return status;
     }
   }

   status = phys_mem_.Init(vmar_regions, std::move(vmo));
   if (status != ZX_OK) {
     FX_PLOGS(ERROR, status) << "Failed to initialize guest physical memory";
     return status;
   }

   return ZX_OK;
 }

 zx_status_t Guest::CreateMapping(TrapType type, uint64_t addr, size_t size, uint64_t offset,
                                  IoHandler* handler, async_dispatcher_t* dispatcher) {
   uint32_t kind = trap_kind(type);
   mappings_.emplace_front(kind, addr, size, offset, handler);
   zx_status_t status = mappings_.front().SetTrap(this, dispatcher);
   if (status != ZX_OK) {
     mappings_.pop_front();
     return status;
   }
   return ZX_OK;
 }

 zx_status_t Guest::CreateSubVmar(uint64_t addr, size_t size, zx::vmar* vmar) {
   uintptr_t guest_addr;
   return vmar_.allocate(ZX_VM_CAN_MAP_READ | ZX_VM_CAN_MAP_WRITE | ZX_VM_SPECIFIC, addr, size, vmar,
                         &guest_addr);
 }

 zx_status_t Guest::StartVcpu(uint64_t id, zx_gpaddr_t entry, zx_gpaddr_t boot_ptr) {
   if (id >= kMaxVcpus) {
     FX_PLOGS(ERROR, ZX_ERR_OUT_OF_RANGE)
         << "Failed to start VCPU-" << id << ", up to " << kMaxVcpus << " VCPUs are supported";
     return ZX_ERR_OUT_OF_RANGE;
   }

   std::lock_guard<std::shared_mutex> lock(mutex_);
   if (!vcpus_[0].has_value() && id != 0) {
     FX_PLOGS(ERROR, ZX_ERR_BAD_STATE) << "VCPU-0 must be started before other VCPUs";
     return ZX_ERR_BAD_STATE;
   }
   if (vcpus_[id].has_value()) {
     // The guest might make multiple requests to start a particular VCPU. On
     // x86, the guest should send two START_UP IPIs but we initialize the VCPU
     // on the first. So, we ignore subsequent requests.
     return ZX_OK;
   }
   vcpus_[id].emplace(id, this, entry, boot_ptr);
   return vcpus_[id]->Start();
 }

 zx_status_t Guest::Interrupt(uint64_t mask, uint32_t vector) {
   std::shared_lock<std::shared_mutex> lock(mutex_);
   for (size_t id = 0; id != kMaxVcpus; ++id) {
     if (!(mask & (1ul << id)) || !vcpus_[id]) {
       continue;
     }
     zx_status_t status = vcpus_[id]->Interrupt(vector);
     if (status != ZX_OK) {
       return status;
     }
 #if __aarch64__
     if (vector >= kSpiBase) {
       break;
     }
 #endif
   }
   return ZX_OK;
 }

 void Guest::set_stop_callback(
     fit::function<void(fit::result<::fuchsia::virtualization::GuestError>)> stop_callback) {
   stop_callback_ = std::move(stop_callback);
 }

 void Guest::Stop(fit::result<::fuchsia::virtualization::GuestError> result) {
   FX_CHECK(stop_callback_);
   stop_callback_(result);
 }
	// Copyright 2017 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "src/virtualization/bin/vmm/guest.h"

	#include <lib/syslog/cpp/macros.h>
	#include <lib/zircon-internal/align.h>
	#include <zircon/assert.h>
	#include <zircon/status.h>
	#include <zircon/syscalls/hypervisor.h>
	#include <zircon/threads.h>

	#include "src/lib/fxl/strings/string_printf.h"
	#include "src/virtualization/bin/vmm/pci.h"
	#include "src/virtualization/bin/vmm/sysinfo.h"

	namespace {

	#if __aarch64__
	constexpr uint8_t kSpiBase = 32;
	#endif

	constexpr GuestMemoryRegion RestrictUntilEnd(zx_gpaddr_t start) {
	return {start, kGuestMemoryAllRemainingRange};
	}

	#if __x86_64__
	constexpr uint64_t kOneKibibyte = 1ul << 10;
	constexpr uint64_t kOneMebibyte = 1ul << 20;
	constexpr uint64_t kOneGibibyte = 1ul << 30;

	constexpr GuestMemoryRegion RestrictRegion(zx_gpaddr_t start, zx_gpaddr_t end) {
	return {start, end - start};
	}
	#endif

	// Ranges to avoid allocating guest memory in. These regions must not overlap and must be
	// sorted by increasing base address. These requirements are enforced by a static_assert
	// below.
	constexpr std::array kRestrictedRegions = {
	#if __aarch64__
	// For ARM PCI devices are mapped in at a relatively high address, so it's reasonable to just
	// block off the rest of guest memory.
	RestrictUntilEnd(std::min(kDevicePhysBase, kFirstDynamicDeviceAddr)),
	#elif __x86_64__
	// Reserve regions in the first MiB for use by the BIOS.
	RestrictRegion(0x0, 32 * kOneKibibyte),
	RestrictRegion(512 * kOneKibibyte, kOneMebibyte),
	// For x86 PCI devices are mapped in somewhere below 4 GiB, and the range extends to 4 GiB.
	RestrictRegion(kDevicePhysBase, 4 * kOneGibibyte),
	// Dynamic devices are mapped in at a very high address, so everything beyond that point
	// can be blocked off.
	RestrictUntilEnd(kFirstDynamicDeviceAddr),
	#endif
	};

	constexpr bool CheckForOverlappingRestrictedRegions() {
	auto overlaps = [](const GuestMemoryRegion& first, const GuestMemoryRegion& second) -> bool {
	const auto& begin = std::min(first, second, GuestMemoryRegion::CompareMinByBase);
	const auto& end = std::max(first, second, GuestMemoryRegion::CompareMinByBase);
	return begin.base + begin.size >= end.base;
	};

	for (auto curr = kRestrictedRegions.begin(); curr != kRestrictedRegions.end(); curr++) {
	for (auto next = std::next(curr); next != kRestrictedRegions.end(); next++) {
	if (overlaps(curr, next)) {
	return false;
	}
	}
	}

	return true;
	}

	// Compile time check that no regions overlap in kRestrictedRegions. If adding a region that
	// overlaps with another, just merge them into one larger region.
	static_assert(CheckForOverlappingRestrictedRegions());

	constexpr bool CheckRestrictedRegionsAreSorted() {
	for (auto curr = kRestrictedRegions.begin(); curr != kRestrictedRegions.end(); curr++) {
	if (std::next(curr) == kRestrictedRegions.end()) {
	break;
	}
	if (!GuestMemoryRegion::CompareMinByBase(curr, std::next(curr))) {
	return false;
	}
	}

	return true;
	}

	// Compile time check that regions in kRestrictedRegions are sorted by increasing base address.
	static_assert(CheckRestrictedRegionsAreSorted());

	constexpr uint32_t trap_kind(TrapType type) {
	switch (type) {
	case TrapType::MMIO_SYNC:
	return ZX_GUEST_TRAP_MEM;
	case TrapType::MMIO_BELL:
	return ZX_GUEST_TRAP_BELL;
	case TrapType::PIO_SYNC:
	return ZX_GUEST_TRAP_IO;
	default:
	ZX_PANIC("Unhandled TrapType %d", static_cast<int>(type));
	return 0;
	}
	}

	} // namespace

	// Static.
	cpp20::span<const GuestMemoryRegion> Guest::GetDefaultRestrictionsForArchitecture() {
	return kRestrictedRegions;
	}

	// Static.
	uint64_t Guest::GetPageAlignedGuestMemory(uint64_t guest_memory) {
	const uint32_t page_size = zx_system_get_page_size();
	uint32_t page_alignment = guest_memory % page_size;
	if (page_alignment != 0) {
	uint32_t padding = page_size - page_alignment;
	FX_LOGS(INFO) << "The requested guest memory (" << guest_memory
	<< " bytes) is not a multiple of system page size (" << page_size
	<< " bytes), so increasing guest memory by " << padding << " bytes.";
	guest_memory += padding;
	}

	return guest_memory;
	}

	// Static.
	bool Guest::PageAlignGuestMemoryRegion(GuestMemoryRegion& region) {
	const uint32_t page_size = zx_system_get_page_size();

	// This guest region is bounded by restricted regions, so size cannot be increased. If this
	// region is smaller than a page this region must just be discarded.
	if (region.size < page_size) {
	return false;
	}

	zx_gpaddr_t start = region.base;
	zx_gpaddr_t end = region.base + region.size;

	// Round the starting address up to the nearest page, and the ending address down to the nearest
	// page.
	if (start % page_size != 0) {
	start += page_size - (start % page_size);
	}
	if (end % page_size != 0) {
	end -= end % page_size;
	}

	// Require a valid region to be at least a single page in size after adjustments. Both start and
	// end have just been page aligned.
	if (start >= end) {
	return false;
	}

	region.base = start;
	region.size = end - start;

	return true;
	}

	// Static.
	bool Guest::GenerateGuestMemoryRegions(uint64_t guest_memory,
	cpp20::span<const GuestMemoryRegion> restrictions,
	std::vector<GuestMemoryRegion>* regions) {
	// Special case where there's no restrictions. Currently this isn't true for any production
	// architecture due to the need to assign dynamic device addresses.
	if (restrictions.empty()) {
	regions->push_back({.base = 0x0, .size = guest_memory});
	return true;
	}

	bool first_region = true;
	GuestMemoryRegion current_region;
	auto restriction = restrictions.begin();
	fit::function<bool()> next_range = [&]() -> bool {
	if (first_region) {
	first_region = false;
	if (restriction->base != 0) {
	current_region = {0x0, restriction->base};
	} else {
	return next_range();
	}
	} else {
	if (restriction->size == kGuestMemoryAllRemainingRange) {
	return false; // No remaining valid guest memory regions.
	}

	// The current unrestricted region extends from the end of the current restriction to the
	// start of the next restriction, or if this is the last restriction it extends to a very
	// large number.
	zx_gpaddr_t unrestricted_base_address = restriction->base + restriction->size;
	uint64_t unrestricted_size = std::next(restriction) == restrictions.end()
	? kGuestMemoryAllRemainingRange - unrestricted_base_address
	: std::next(restriction)->base - unrestricted_base_address;

	current_region = {unrestricted_base_address, unrestricted_size};
	restriction++;
	}

	if (!Guest::PageAlignGuestMemoryRegion(current_region)) {
	return next_range();
	}

	return true;
	};

	uint64_t mem_required = guest_memory;
	while (mem_required > 0) {
	if (!next_range()) {
	FX_LOGS(ERROR) << "Unable to allocate enough guest memory due to guest memory restrictions. "
	"Managed to allocate "
	<< guest_memory - mem_required << " of " << guest_memory << " bytes";
	return false;
	}

	uint64_t mem_used = std::min(current_region.size, mem_required);
	regions->push_back({current_region.base, mem_used});
	mem_required -= mem_used;
	}

	return true;
	}

	bool Guest::FitPluggableRegionBase(cpp20::span<const GuestMemoryRegion> restrictions, uint64_t base,
	uint64_t size, uint64_t alignment, uint64_t* result_base) {
	base = ZX_ALIGN(base, alignment);
	for (auto& restriction : restrictions) {
	if (restriction.HasOverlap(GuestMemoryRegion{base, size})) {
	if (restriction.size == kGuestMemoryAllRemainingRange) {
	return false;
	}
	base = ZX_ALIGN(restriction.base + restriction.size, alignment);
	}
	}
	*result_base = base;
	return true;
	}

	zx_status_t Guest::Init(uint64_t guest_memory, uint64_t pluggable_region_size,
	uint64_t pluggable_region_alignment) {
	zx::resource hypervisor_resource;
	zx_status_t status = get_hypervisor_resource(&hypervisor_resource);
	if (status != ZX_OK) {
	FX_PLOGS(ERROR, status) << "Failed to get hypervisor resource";
	return status;
	}
	status = zx::guest::create(hypervisor_resource, 0, &guest_, &vmar_);
	if (status != ZX_OK) {
	FX_PLOGS(ERROR, status) << "Failed to create guest";
	return status;
	}

	// If unaligned, round up to the nearest page.
	guest_memory = Guest::GetPageAlignedGuestMemory(guest_memory);

	// Generate guest memory regions, avoiding device memory.
	if (!Guest::GenerateGuestMemoryRegions(
	guest_memory, Guest::GetDefaultRestrictionsForArchitecture(), &memory_regions_)) {
	FX_PLOGS(ERROR, ZX_ERR_INVALID_ARGS) << "Failed to place guest memory avoiding device memory "
	"ranges. Try requesting less memory.";
	}

	if (pluggable_region_size > 0) {
	uint64_t pluggable_region_base = memory_regions_.back().base + memory_regions_.back().size;
	// Calculate the position of the pluggable memory region
	if (!Guest::FitPluggableRegionBase(Guest::GetDefaultRestrictionsForArchitecture(),
	pluggable_region_base, pluggable_region_size,
	pluggable_region_alignment, &mem_pluggable_region_addr_)) {
	status = ZX_ERR_INVALID_ARGS;
	FX_PLOGS(ERROR, status) << "Failed to place pluggable memory region avoiding device memory "
	"ranges. Try requesting smaller pluggable region size.";
	return status;
	} else {
	memory_regions_.push_back({mem_pluggable_region_addr_, pluggable_region_size});
	}
	}
	uint64_t vmo_size = memory_regions_.back().base + memory_regions_.back().size;

	zx::vmo vmo;
	status = zx::vmo::create(vmo_size, 0, &vmo);
	if (status != ZX_OK) {
	FX_PLOGS(ERROR, status) << "Failed to create VMO of size " << vmo_size;
	return status;
	}

	zx::resource vmex_resource;
	status = get_vmex_resource(&vmex_resource);
	if (status != ZX_OK) {
	FX_PLOGS(ERROR, status) << "Failed to get VMEX resource";
	return status;
	}
	status = vmo.replace_as_executable(vmex_resource, &vmo);
	if (status != ZX_OK) {
	FX_PLOGS(ERROR, status) << "Failed to make VMO executable";
	return status;
	}

	std::vector<GuestMemoryRegion> vmar_regions = memory_regions_;
	if (pluggable_region_size > 0) {
	// We do want vmar mapping for the pluggable memory region but we don't want
	// the pluggable memory region to be a part of the e820 memory map.
	// So, leave the pluggable memory region part of vmar_regions and remove it
	// from memory_regions_ which will be later used to set up e820 map
	//
	// TODO(https://fxbug.dev/42051237): Get virtio-mem to take the guest and host vmars,
	// keep all pluggable memory unmapped and only map plugged regions. This
	// would require adding EXECUTE and perhaps other flags to the vmo which is
	// passed to the virtio-mem. Mapping only plugged regions can make existing
	// virtio-mem tests flaky because test is not guaranteed to use the plugged
	// memory during its test allocate for something which requires EXECUTE.
	// Need to write a stress test for virtio-mem before making this change.
	FX_CHECK(!memory_regions_.empty());
	FX_CHECK(memory_regions_.back().base == mem_pluggable_region_addr_);
	memory_regions_.pop_back();
	}
	#if __x86_64__
	// x86 has reserved memory from 0 to 32KiB, and 512KiB to 1MiB. While we will not allocate guest
	// memory in those regions, we still want to map these regions into the guest VMAR as they are
	// not devices and we do not wish to trap on them.
	vmar_regions.push_back({0, 32 * kOneKibibyte});
	vmar_regions.push_back({512 * kOneKibibyte, 512 * kOneKibibyte});
	#endif

	for (const GuestMemoryRegion& region : vmar_regions) {
	zx_gpaddr_t addr;
	status = vmar_.map(ZX_VM_PERM_READ \| ZX_VM_PERM_WRITE \| ZX_VM_PERM_EXECUTE \| ZX_VM_SPECIFIC \|
	ZX_VM_REQUIRE_NON_RESIZABLE,
	region.base, vmo, region.base, region.size, &addr);
	if (status != ZX_OK) {
	FX_PLOGS(ERROR, status) << "Failed to map guest physical memory region " << region.base
	<< " - " << region.base + region.size;
	return status;
	}
	}

	status = phys_mem_.Init(vmar_regions, std::move(vmo));
	if (status != ZX_OK) {
	FX_PLOGS(ERROR, status) << "Failed to initialize guest physical memory";
	return status;
	}

	return ZX_OK;
	}

	zx_status_t Guest::CreateMapping(TrapType type, uint64_t addr, size_t size, uint64_t offset,
	IoHandler* handler, async_dispatcher_t* dispatcher) {
	uint32_t kind = trap_kind(type);
	mappings_.emplace_front(kind, addr, size, offset, handler);
	zx_status_t status = mappings_.front().SetTrap(this, dispatcher);
	if (status != ZX_OK) {
	mappings_.pop_front();
	return status;
	}
	return ZX_OK;
	}

	zx_status_t Guest::CreateSubVmar(uint64_t addr, size_t size, zx::vmar* vmar) {
	uintptr_t guest_addr;
	return vmar_.allocate(ZX_VM_CAN_MAP_READ \| ZX_VM_CAN_MAP_WRITE \| ZX_VM_SPECIFIC, addr, size, vmar,
	&guest_addr);
	}

	zx_status_t Guest::StartVcpu(uint64_t id, zx_gpaddr_t entry, zx_gpaddr_t boot_ptr) {
	if (id >= kMaxVcpus) {
	FX_PLOGS(ERROR, ZX_ERR_OUT_OF_RANGE)
	<< "Failed to start VCPU-" << id << ", up to " << kMaxVcpus << " VCPUs are supported";
	return ZX_ERR_OUT_OF_RANGE;
	}

	std::lock_guard<std::shared_mutex> lock(mutex_);
	if (!vcpus_[0].has_value() && id != 0) {
	FX_PLOGS(ERROR, ZX_ERR_BAD_STATE) << "VCPU-0 must be started before other VCPUs";
	return ZX_ERR_BAD_STATE;
	}
	if (vcpus_[id].has_value()) {
	// The guest might make multiple requests to start a particular VCPU. On
	// x86, the guest should send two START_UP IPIs but we initialize the VCPU
	// on the first. So, we ignore subsequent requests.
	return ZX_OK;
	}
	vcpus_[id].emplace(id, this, entry, boot_ptr);
	return vcpus_[id]->Start();
	}

	zx_status_t Guest::Interrupt(uint64_t mask, uint32_t vector) {
	std::shared_lock<std::shared_mutex> lock(mutex_);
	for (size_t id = 0; id != kMaxVcpus; ++id) {
	if (!(mask & (1ul << id)) \|\| !vcpus_[id]) {
	continue;
	}
	zx_status_t status = vcpus_[id]->Interrupt(vector);
	if (status != ZX_OK) {
	return status;
	}
	#if __aarch64__
	if (vector >= kSpiBase) {
	break;
	}
	#endif
	}
	return ZX_OK;
	}

	void Guest::set_stop_callback(
	fit::function<void(fit::result<::fuchsia::virtualization::GuestError>)> stop_callback) {
	stop_callback_ = std::move(stop_callback);
	}

	void Guest::Stop(fit::result<::fuchsia::virtualization::GuestError> result) {
	FX_CHECK(stop_callback_);
	stop_callback_(result);
	}