zircon/kernel/platform/generic-arm/platform.cc - fuchsia - Git at Google

 // Copyright 2016 The Fuchsia Authors
 // Copyright (c) 2015 Travis Geiselbrecht
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT

 #include <arch.h>
 #include <debug.h>
 #include <lib/arch/intrin.h>
 #include <lib/boot-options/boot-options.h>
 #include <lib/console.h>
 #include <lib/crashlog.h>
 #include <lib/debuglog.h>
 #include <lib/instrumentation/asan.h>
 #include <lib/jtrace/jtrace.h>
 #include <lib/memalloc/range.h>
 #include <lib/persistent-debuglog.h>
 #include <lib/system-topology.h>
 #include <lib/zbi-format/kernel.h>
 #include <mexec.h>
 #include <platform.h>
 #include <reg.h>
 #include <string-file.h>
 #include <trace.h>

 #include <arch/arch_ops.h>
 #include <arch/arm64.h>
 #include <arch/arm64/mmu.h>
 #include <arch/arm64/mp.h>
 #include <arch/arm64/periphmap.h>
 #include <arch/mp.h>
 #include <dev/hw_rng.h>
 #include <dev/interrupt.h>
 #include <dev/power.h>
 #include <dev/psci.h>
 #include <explicit-memory/bytes.h>
 #include <fbl/ref_ptr.h>
 #include <kernel/cpu.h>
 #include <kernel/cpu_distance_map.h>
 #include <kernel/dpc.h>
 #include <kernel/persistent_ram.h>
 #include <kernel/spinlock.h>
 #include <kernel/topology.h>
 #include <ktl/algorithm.h>
 #include <ktl/atomic.h>
 #include <ktl/byte.h>
 #include <ktl/span.h>
 #include <ktl/variant.h>
 #include <lk/init.h>
 #include <object/resource_dispatcher.h>
 #include <phys/handoff.h>
 #include <platform/crashlog.h>
 #include <platform/debug.h>
 #include <vm/kstack.h>
 #include <vm/physmap.h>
 #include <vm/vm.h>
 #include <vm/vm_aspace.h>

 #include <ktl/enforce.h>

 #if WITH_PANIC_BACKTRACE
 #include <kernel/thread.h>
 #endif

 #include <lib/arch/intrin.h>
 #include <lib/zbi-format/zbi.h>
 #include <zircon/errors.h>
 #include <zircon/rights.h>
 #include <zircon/syscalls/smc.h>
 #include <zircon/types.h>

 #include <platform/ram_mappable_crashlog.h>

 static ktl::atomic<int> panic_started;
 static ktl::atomic<int> halted;

 namespace {

 lazy_init::LazyInit<RamMappableCrashlog, lazy_init::CheckType::None,
                     lazy_init::Destructor::Disabled>
     ram_mappable_crashlog;

 }  // namespace

 static void halt_other_cpus(void) {
   if (halted.exchange(1) == 0) {
     // stop the other cpus
     printf("stopping other cpus\n");
     arch_mp_send_ipi(MP_IPI_TARGET_ALL_BUT_LOCAL, 0, MP_IPI_HALT);

     // spin for a while
     // TODO: find a better way to spin at this low level
     for (int i = 0; i < 100000000; i = i + 1) {
       arch::Yield();
     }
   }
 }

 // Difference on SMT systems is that the AFF0 (cpu_id) level is implicit and not stored in the info.
 static uint64_t ToSmtMpid(const zbi_topology_processor_t& processor, uint8_t cpu_id) {
   DEBUG_ASSERT(processor.architecture_info.discriminant == ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64);
   const auto& info = processor.architecture_info.arm64;
   return (uint64_t)info.cluster_3_id << 32 | info.cluster_2_id << 16 | info.cluster_1_id << 8 |
          cpu_id;
 }

 static uint64_t ToMpid(const zbi_topology_processor_t& processor) {
   DEBUG_ASSERT(processor.architecture_info.discriminant == ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64);
   const auto& info = processor.architecture_info.arm64;
   return (uint64_t)info.cluster_3_id << 32 | info.cluster_2_id << 16 | info.cluster_1_id << 8 |
          info.cpu_id;
 }

 // TODO(https://fxbug.dev/42180675): Refactor platform_panic_start.
 void platform_panic_start(PanicStartHaltOtherCpus option) {
   arch_disable_ints();
   dlog_panic_start();

   if (option == PanicStartHaltOtherCpus::Yes) {
     halt_other_cpus();
   }

   if (panic_started.exchange(1) == 0) {
     dlog_bluescreen_init();
     // Attempt to dump the current debug trace buffer, if we have one.
     jtrace_dump(jtrace::TraceBufferType::Current);
   }
 }

 void platform_halt_cpu(void) {
   uint32_t result = power_cpu_off();
   // should have never returned
   panic("power_cpu_off returned %u\n", result);
 }

 zx_status_t platform_start_cpu(cpu_num_t cpu_id, uint64_t mpid) {
   auto clean_data_object = [](auto& mem) {
     ZX_ASSERT(sizeof(mem) < arm64_dcache_size);
     __asm__ volatile("dc cvac, %0" ::"r"(&mem), "m"(mem));
   };

   // We must ensure that the memory the secondary CPUs will touch before
   // enabling its caches (i.e., at .Lmmu_enable in start.S) is coherent with
   // the boot CPU. Beyond the instruction memory itself, this is comprised of
   // the variables modified by the boot CPU and read by the secondaries in
   // this stage, which are those holding the root bootstrap and kernel page
   // tables. We explicitly clean that memory to the point of coherency and
   // issue a memory barrier to commit them.
   //
   // TODO(https://fxbug.dev/42164859): Explicitly clean the pre-caches-on
   // instruction memory as well.
   clean_data_object(root_lower_page_table_phys);
   clean_data_object(root_kernel_page_table_phys);
   arch::ThreadMemoryBarrier();

   uintptr_t kernel_secondary_entry_paddr =
       KernelPhysicalLoadAddress() + (reinterpret_cast<uintptr_t>(&arm64_secondary_start) -
                                      reinterpret_cast<uintptr_t>(__executable_start));

   uint32_t ret = power_cpu_on(mpid, kernel_secondary_entry_paddr, 0);
   dprintf(INFO, "Trying to start cpu %u, mpid %#" PRIx64 " returned: %d\n", cpu_id, mpid, (int)ret);
   if (ret != 0) {
     return ZX_ERR_INTERNAL;
   }
   return ZX_OK;
 }

 zx::result<power_cpu_state> platform_get_cpu_state(cpu_num_t cpu_id) {
   DEBUG_ASSERT(cpu_id < SMP_MAX_CPUS);
   return power_get_cpu_state(arch_cpu_num_to_mpidr(cpu_id));
 }

 static void topology_cpu_init(void) {
   for (auto* node : system_topology::GetSystemTopology().processors()) {
     if (node->entity.discriminant != ZBI_TOPOLOGY_ENTITY_PROCESSOR ||
         node->entity.processor.architecture_info.discriminant !=
             ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64) {
       panic("Invalid processor node.");
     }

     zx_status_t status;
     const auto& processor = node->entity.processor;
     for (uint8_t i = 0; i < processor.logical_id_count; i++) {
       const uint64_t mpid =
           (processor.logical_id_count > 1) ? ToSmtMpid(processor, i) : ToMpid(processor);
       arch_register_mpid(processor.logical_ids[i], mpid);

       // Skip processor 0, we are only starting secondary processors.
       if (processor.logical_ids[i] == 0) {
         continue;
       }

       status = arm64_create_secondary_stack(processor.logical_ids[i], mpid);
       DEBUG_ASSERT(status == ZX_OK);

       // start the cpu
       status = platform_start_cpu(processor.logical_ids[i], mpid);

       if (status != ZX_OK) {
         // TODO(maniscalco): Is continuing really the right thing to do here?

         // start failed, free the stack
         status = arm64_free_secondary_stack(processor.logical_ids[i]);
         DEBUG_ASSERT(status == ZX_OK);
         continue;
       }
     }
   }
 }

 static constexpr zbi_topology_node_t fallback_topology = {
     .entity = {.discriminant = ZBI_TOPOLOGY_ENTITY_PROCESSOR,
                .processor =
                    {
                        .architecture_info =
                            {
                                .discriminant = ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64,
                                .arm64 =
                                    {
                                        .cluster_1_id = 0,
                                        .cluster_2_id = 0,
                                        .cluster_3_id = 0,
                                        .cpu_id = 0,
                                        .gic_id = 0,
                                    },
                            },
                        .flags = 0,
                        .logical_ids = {0},
                        .logical_id_count = 1,

                    }},
     .parent_index = ZBI_TOPOLOGY_NO_PARENT,
 };

 static void init_topology(uint level) {
   ktl::span handoff = gPhysHandoff->cpu_topology.get();

   auto result = system_topology::Graph::InitializeSystemTopology(handoff.data(), handoff.size());
   if (result != ZX_OK) {
     printf("Failed to initialize system topology! error: %d\n", result);

     // Try to fallback to a topology of just this processor.
     result = system_topology::Graph::InitializeSystemTopology(&fallback_topology, 1);
     ASSERT(result == ZX_OK);
   }

   arch_set_num_cpus(static_cast<uint>(system_topology::GetSystemTopology().processor_count()));

   if (DPRINTF_ENABLED_FOR_LEVEL(INFO)) {
     for (auto* proc : system_topology::GetSystemTopology().processors()) {
       auto& info = proc->entity.processor.architecture_info.arm64;
       dprintf(INFO, "System topology: CPU %u:%u:%u:%u\n", info.cluster_3_id, info.cluster_2_id,
               info.cluster_1_id, info.cpu_id);
     }
   }
 }

 LK_INIT_HOOK(init_topology, init_topology, LK_INIT_LEVEL_VM)

 static void allocate_persistent_ram(paddr_t pa, size_t length) {
   // Figure out how to divide up our persistent RAM.  Right now there are
   // three potential users:
   //
   // 1) The crashlog.
   // 2) Persistent debug logging.
   // 3) Persistent debug tracing.
   //
   // Persistent debug logging and tracing have target amounts of RAM they would
   // _like_ to have, and crash-logging has a minimum amount it is guaranteed to
   // get.  Additionally, all allocated are made in a chunks of the minimum
   // persistent RAM allocation granularity.
   //
   // Make sure that the crashlog gets as much of its minimum allocation as is
   // possible.  Then attempt to satisfy the target for persistent debug logging,
   // followed by persistent debug tracing.  Finally, give anything leftovers to
   // the crashlog.
   size_t crashlog_size = 0;
   size_t pdlog_size = 0;
   size_t jtrace_size = 0;
   {
     // start by figuring out how many chunks of RAM we have available to
     // us total.
     size_t persistent_chunks_available = length / kPersistentRamAllocationGranularity;

     // If we have not already configured a non-trivial crashlog implementation
     // for the platform, make sure that crashlog gets its minimum allocation, or
     // all of the RAM if it cannot meet even its minimum allocation.
     size_t crashlog_chunks = !PlatformCrashlog::HasNonTrivialImpl()
                                  ? ktl::min(persistent_chunks_available,
                                             kMinCrashlogSize / kPersistentRamAllocationGranularity)
                                  : 0;
     persistent_chunks_available -= crashlog_chunks;

     // Next in line is persistent debug logging.
     size_t pdlog_chunks =
         ktl::min(persistent_chunks_available,
                  kTargetPersistentDebugLogSize / kPersistentRamAllocationGranularity);
     persistent_chunks_available -= pdlog_chunks;

     // Next up is persistent debug tracing.
     size_t jtrace_chunks =
         ktl::min(persistent_chunks_available,
                  kJTraceTargetPersistentBufferSize / kPersistentRamAllocationGranularity);
     persistent_chunks_available -= jtrace_chunks;

     // Finally, anything left over can go to the crashlog.
     crashlog_chunks += persistent_chunks_available;

     crashlog_size = crashlog_chunks * kPersistentRamAllocationGranularity;
     pdlog_size = pdlog_chunks * kPersistentRamAllocationGranularity;
     jtrace_size = jtrace_chunks * kPersistentRamAllocationGranularity;
   }

   // Configure up the crashlog RAM
   if (crashlog_size > 0) {
     dprintf(INFO, "Crashlog configured with %" PRIu64 " bytes\n", crashlog_size);
     ram_mappable_crashlog.Initialize(pa, crashlog_size);
     PlatformCrashlog::Bind(ram_mappable_crashlog.Get());
   }
   size_t offset = crashlog_size;

   // Configure the persistent debuglog RAM (if we have any)
   if (pdlog_size > 0) {
     dprintf(INFO, "Persistent debug logging enabled and configured with %" PRIu64 " bytes\n",
             pdlog_size);
     persistent_dlog_set_location(paddr_to_physmap(pa + offset), pdlog_size);
     offset += pdlog_size;
   }

   // Do _not_ attempt to set the location of the debug trace buffer if this is
   // not a persistent debug trace buffer.  The location of a non-persistent
   // trace buffer would have been already set during (very) early init.
   if constexpr (kJTraceIsPersistent == jtrace::IsPersistent::Yes) {
     jtrace_set_location(paddr_to_physmap(pa + offset), jtrace_size);
     offset += jtrace_size;
   }
 }

 void platform_early_init(void) {
   if (gPhysHandoff->nvram) {
     const zbi_nvram_t& nvram = gPhysHandoff->nvram.value();
     dprintf(INFO, "NVRAM range: phys base %#" PRIx64 " length %#" PRIx64 "\n", nvram.base,
             nvram.length);
     allocate_persistent_ram(nvram.base, nvram.length);
   }

   // is the cmdline option to bypass dlog set ?
   dlog_bypass_init();

   // Initialize the PmmChecker now that the cmdline has been parsed.
   pmm_checker_init_from_cmdline();

   arm64_boot_map_init(reinterpret_cast<uintptr_t>(__executable_start) -
                       reinterpret_cast<uintptr_t>(KernelPhysicalLoadAddress()));
   for (const memalloc::Range& range : gPhysHandoff->memory.get()) {
     if (range.type == memalloc::Type::kPeripheral) {
       dprintf(INFO, "ZBI: peripheral range [%#" PRIx64 ", %#" PRIx64 ")\n", range.addr,
               range.end());
       auto status = add_periph_range(range.addr, range.size);
       ASSERT(status == ZX_OK);
     }
   }

   ASSERT(pmm_init(gPhysHandoff->memory.get()) == ZX_OK);

   // give the mmu code a chance to do some bookkeeping
   arm64_mmu_early_init();
 }

 void platform_prevm_init() {}

 void platform_init(void) { topology_cpu_init(); }

 // after the fact create a region to reserve the peripheral map(s)
 static void platform_init_postvm(uint level) { reserve_periph_ranges(); }

 LK_INIT_HOOK(platform_postvm, platform_init_postvm, LK_INIT_LEVEL_VM)

 zx_status_t platform_mp_prep_cpu_unplug(cpu_num_t cpu_id) {
   return arch_mp_prep_cpu_unplug(cpu_id);
 }

 zx_status_t platform_mp_cpu_unplug(cpu_num_t cpu_id) { return arch_mp_cpu_unplug(cpu_id); }

 void platform_specific_halt(platform_halt_action suggested_action, zircon_crash_reason_t reason,
                             bool halt_on_panic) {
   if (suggested_action == HALT_ACTION_REBOOT) {
     power_reboot(power_reboot_flags::REBOOT_NORMAL);
     printf("reboot failed\n");
   } else if (suggested_action == HALT_ACTION_REBOOT_BOOTLOADER) {
     power_reboot(power_reboot_flags::REBOOT_BOOTLOADER);
     printf("reboot-bootloader failed\n");
   } else if (suggested_action == HALT_ACTION_REBOOT_RECOVERY) {
     power_reboot(power_reboot_flags::REBOOT_RECOVERY);
     printf("reboot-recovery failed\n");
   } else if (suggested_action == HALT_ACTION_SHUTDOWN) {
     power_shutdown();
   }

   if (reason == ZirconCrashReason::Panic) {
     Backtrace bt;
     Thread::Current::GetBacktrace(bt);
     bt.Print();
     if (!halt_on_panic) {
       power_reboot(power_reboot_flags::REBOOT_NORMAL);
       printf("reboot failed\n");
     }
 #if ENABLE_PANIC_SHELL
     dprintf(ALWAYS, "CRASH: starting debug shell... (reason = %d)\n", static_cast<int>(reason));
     arch_disable_ints();
     panic_shell_start();
 #endif  // ENABLE_PANIC_SHELL
   }

   dprintf(ALWAYS, "HALT: spinning forever... (reason = %d)\n", static_cast<int>(reason));

   // catch all fallthrough cases
   arch_disable_ints();

   for (;;) {
     __wfi();
   }
 }

 void platform_mexec_prep(uintptr_t new_bootimage_addr, size_t new_bootimage_len) {
   DEBUG_ASSERT(!arch_ints_disabled());
   DEBUG_ASSERT(mp_get_online_mask() == cpu_num_to_mask(BOOT_CPU_ID));
 }

 // This function requires NO_ASAN because it accesses ops, which is memory
 // that lives outside of the kernel address space (comes from IdAllocator).
 NO_ASAN void platform_mexec(mexec_asm_func mexec_assembly, memmov_ops_t* ops,
                             uintptr_t new_bootimage_addr, size_t new_bootimage_len,
                             uintptr_t new_kernel_entry) {
   DEBUG_ASSERT(arch_ints_disabled());
   DEBUG_ASSERT(mp_get_online_mask() == cpu_num_to_mask(BOOT_CPU_ID));

   mexec_assembly((uintptr_t)new_bootimage_addr, 0, 0, arm64_get_boot_el(), ops, new_kernel_entry);
 }

 // Initialize Resource system after the heap is initialized.
 static void arm_resource_dispatcher_init_hook(unsigned int rl) {
   // 64 bit address space for MMIO on ARM64
   zx_status_t status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_MMIO, 0, UINT64_MAX);
   if (status != ZX_OK) {
     printf("Resources: Failed to initialize MMIO allocator: %d\n", status);
   }
   // Set up IRQs based on values from the GIC
   status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_IRQ, interrupt_get_base_vector(),
                                                    interrupt_get_max_vector());
   if (status != ZX_OK) {
     printf("Resources: Failed to initialize IRQ allocator: %d\n", status);
   }
   // Set up SMC valid service call range
   status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_SMC, 0,
                                                    ARM_SMC_SERVICE_CALL_NUM_MAX + 1);
   if (status != ZX_OK) {
     printf("Resources: Failed to initialize SMC allocator: %d\n", status);
   }
   // Set up range of valid system resources.
   status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_SYSTEM, 0, ZX_RSRC_SYSTEM_COUNT);
   if (status != ZX_OK) {
     printf("Resources: Failed to initialize system allocator: %d\n", status);
   }
 }
 LK_INIT_HOOK(arm_resource_init, arm_resource_dispatcher_init_hook, LK_INIT_LEVEL_HEAP)

 void topology_init() {
   // Check MPIDR_EL1.MT to determine how to interpret AFF0 (i.e. cpu_id). For
   // now, assume that MT is set consistently across all PEs in the system. When
   // MT is set, use the next affinity level for the first cache depth element.
   // This approach should be adjusted if we find examples of systems that do not
   // set MT uniformly, and may require delaying cache-aware load balancing until
   // all PEs are initialized.
   const bool cpu_id_is_thread_id = __arm_rsr64("mpidr_el1") & (1 << 24);
   printf("topology_init: MPIDR_EL1.MT=%d\n", cpu_id_is_thread_id);

   // This platform initializes the topology earlier than this standard hook.
   // Setup the CPU distance map with the already initialized topology.
   const auto processor_count =
       static_cast<uint>(system_topology::GetSystemTopology().processor_count());
   CpuDistanceMap::Initialize(processor_count, [cpu_id_is_thread_id](cpu_num_t from_id,
                                                                     cpu_num_t to_id) {
     using system_topology::Node;
     using system_topology::Graph;

     const Graph& topology = system_topology::GetSystemTopology();

     Node* from_node = nullptr;
     if (topology.ProcessorByLogicalId(from_id, &from_node) != ZX_OK) {
       printf("Failed to get processor node for CPU %u\n", from_id);
       return -1;
     }
     DEBUG_ASSERT(from_node != nullptr);

     Node* to_node = nullptr;
     if (topology.ProcessorByLogicalId(to_id, &to_node) != ZX_OK) {
       printf("Failed to get processor node for CPU %u\n", to_id);
       return -1;
     }
     DEBUG_ASSERT(to_node != nullptr);

     const zbi_topology_arm64_info_t& from_info =
         from_node->entity.processor.architecture_info.arm64;
     const zbi_topology_arm64_info_t& to_info = to_node->entity.processor.architecture_info.arm64;

     // Return the maximum cache depth not shared when multithreaded.
     if (cpu_id_is_thread_id) {
       return ktl::max({1 * int{from_info.cluster_1_id != to_info.cluster_1_id},
                        2 * int{from_info.cluster_2_id != to_info.cluster_2_id},
                        3 * int{from_info.cluster_3_id != to_info.cluster_3_id}});
     }

     // Return the maximum cache depth not shared when single threaded.
     return ktl::max({1 * int{from_info.cpu_id != to_info.cpu_id},
                      2 * int{from_info.cluster_1_id != to_info.cluster_1_id},
                      3 * int{from_info.cluster_2_id != to_info.cluster_2_id},
                      4 * int{from_info.cluster_3_id != to_info.cluster_3_id}});
   });

   // TODO(eieio): Determine automatically or provide a way to specify in the
   // ZBI. The current value matches the depth of the first significant cache
   // above.
   const CpuDistanceMap::Distance kDistanceThreshold = 2u;
   CpuDistanceMap::Get().set_distance_threshold(kDistanceThreshold);

   CpuDistanceMap::Get().Dump();
 }
	// Copyright 2016 The Fuchsia Authors
	// Copyright (c) 2015 Travis Geiselbrecht
	//
	// Use of this source code is governed by a MIT-style
	// license that can be found in the LICENSE file or at
	// https://opensource.org/licenses/MIT

	#include <arch.h>
	#include <debug.h>
	#include <lib/arch/intrin.h>
	#include <lib/boot-options/boot-options.h>
	#include <lib/console.h>
	#include <lib/crashlog.h>
	#include <lib/debuglog.h>
	#include <lib/instrumentation/asan.h>
	#include <lib/jtrace/jtrace.h>
	#include <lib/memalloc/range.h>
	#include <lib/persistent-debuglog.h>
	#include <lib/system-topology.h>
	#include <lib/zbi-format/kernel.h>
	#include <mexec.h>
	#include <platform.h>
	#include <reg.h>
	#include <string-file.h>
	#include <trace.h>

	#include <arch/arch_ops.h>
	#include <arch/arm64.h>
	#include <arch/arm64/mmu.h>
	#include <arch/arm64/mp.h>
	#include <arch/arm64/periphmap.h>
	#include <arch/mp.h>
	#include <dev/hw_rng.h>
	#include <dev/interrupt.h>
	#include <dev/power.h>
	#include <dev/psci.h>
	#include <explicit-memory/bytes.h>
	#include <fbl/ref_ptr.h>
	#include <kernel/cpu.h>
	#include <kernel/cpu_distance_map.h>
	#include <kernel/dpc.h>
	#include <kernel/persistent_ram.h>
	#include <kernel/spinlock.h>
	#include <kernel/topology.h>
	#include <ktl/algorithm.h>
	#include <ktl/atomic.h>
	#include <ktl/byte.h>
	#include <ktl/span.h>
	#include <ktl/variant.h>
	#include <lk/init.h>
	#include <object/resource_dispatcher.h>
	#include <phys/handoff.h>
	#include <platform/crashlog.h>
	#include <platform/debug.h>
	#include <vm/kstack.h>
	#include <vm/physmap.h>
	#include <vm/vm.h>
	#include <vm/vm_aspace.h>

	#include <ktl/enforce.h>

	#if WITH_PANIC_BACKTRACE
	#include <kernel/thread.h>
	#endif

	#include <lib/arch/intrin.h>
	#include <lib/zbi-format/zbi.h>
	#include <zircon/errors.h>
	#include <zircon/rights.h>
	#include <zircon/syscalls/smc.h>
	#include <zircon/types.h>

	#include <platform/ram_mappable_crashlog.h>

	static ktl::atomic<int> panic_started;
	static ktl::atomic<int> halted;

	namespace {

	lazy_init::LazyInit<RamMappableCrashlog, lazy_init::CheckType::None,
	lazy_init::Destructor::Disabled>
	ram_mappable_crashlog;

	} // namespace

	static void halt_other_cpus(void) {
	if (halted.exchange(1) == 0) {
	// stop the other cpus
	printf("stopping other cpus\n");
	arch_mp_send_ipi(MP_IPI_TARGET_ALL_BUT_LOCAL, 0, MP_IPI_HALT);

	// spin for a while
	// TODO: find a better way to spin at this low level
	for (int i = 0; i < 100000000; i = i + 1) {
	arch::Yield();
	}
	}
	}

	// Difference on SMT systems is that the AFF0 (cpu_id) level is implicit and not stored in the info.
	static uint64_t ToSmtMpid(const zbi_topology_processor_t& processor, uint8_t cpu_id) {
	DEBUG_ASSERT(processor.architecture_info.discriminant == ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64);
	const auto& info = processor.architecture_info.arm64;
	return (uint64_t)info.cluster_3_id << 32 \| info.cluster_2_id << 16 \| info.cluster_1_id << 8 \|
	cpu_id;
	}

	static uint64_t ToMpid(const zbi_topology_processor_t& processor) {
	DEBUG_ASSERT(processor.architecture_info.discriminant == ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64);
	const auto& info = processor.architecture_info.arm64;
	return (uint64_t)info.cluster_3_id << 32 \| info.cluster_2_id << 16 \| info.cluster_1_id << 8 \|
	info.cpu_id;
	}

	// TODO(https://fxbug.dev/42180675): Refactor platform_panic_start.
	void platform_panic_start(PanicStartHaltOtherCpus option) {
	arch_disable_ints();
	dlog_panic_start();

	if (option == PanicStartHaltOtherCpus::Yes) {
	halt_other_cpus();
	}

	if (panic_started.exchange(1) == 0) {
	dlog_bluescreen_init();
	// Attempt to dump the current debug trace buffer, if we have one.
	jtrace_dump(jtrace::TraceBufferType::Current);
	}
	}

	void platform_halt_cpu(void) {
	uint32_t result = power_cpu_off();
	// should have never returned
	panic("power_cpu_off returned %u\n", result);
	}

	zx_status_t platform_start_cpu(cpu_num_t cpu_id, uint64_t mpid) {
	auto clean_data_object = [](auto& mem) {
	ZX_ASSERT(sizeof(mem) < arm64_dcache_size);
	__asm__ volatile("dc cvac, %0" ::"r"(&mem), "m"(mem));
	};

	// We must ensure that the memory the secondary CPUs will touch before
	// enabling its caches (i.e., at .Lmmu_enable in start.S) is coherent with
	// the boot CPU. Beyond the instruction memory itself, this is comprised of
	// the variables modified by the boot CPU and read by the secondaries in
	// this stage, which are those holding the root bootstrap and kernel page
	// tables. We explicitly clean that memory to the point of coherency and
	// issue a memory barrier to commit them.
	//
	// TODO(https://fxbug.dev/42164859): Explicitly clean the pre-caches-on
	// instruction memory as well.
	clean_data_object(root_lower_page_table_phys);
	clean_data_object(root_kernel_page_table_phys);
	arch::ThreadMemoryBarrier();

	uintptr_t kernel_secondary_entry_paddr =
	KernelPhysicalLoadAddress() + (reinterpret_cast<uintptr_t>(&arm64_secondary_start) -
	reinterpret_cast<uintptr_t>(__executable_start));

	uint32_t ret = power_cpu_on(mpid, kernel_secondary_entry_paddr, 0);
	dprintf(INFO, "Trying to start cpu %u, mpid %#" PRIx64 " returned: %d\n", cpu_id, mpid, (int)ret);
	if (ret != 0) {
	return ZX_ERR_INTERNAL;
	}
	return ZX_OK;
	}

	zx::result<power_cpu_state> platform_get_cpu_state(cpu_num_t cpu_id) {
	DEBUG_ASSERT(cpu_id < SMP_MAX_CPUS);
	return power_get_cpu_state(arch_cpu_num_to_mpidr(cpu_id));
	}

	static void topology_cpu_init(void) {
	for (auto* node : system_topology::GetSystemTopology().processors()) {
	if (node->entity.discriminant != ZBI_TOPOLOGY_ENTITY_PROCESSOR \|\|
	node->entity.processor.architecture_info.discriminant !=
	ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64) {
	panic("Invalid processor node.");
	}

	zx_status_t status;
	const auto& processor = node->entity.processor;
	for (uint8_t i = 0; i < processor.logical_id_count; i++) {
	const uint64_t mpid =
	(processor.logical_id_count > 1) ? ToSmtMpid(processor, i) : ToMpid(processor);
	arch_register_mpid(processor.logical_ids[i], mpid);

	// Skip processor 0, we are only starting secondary processors.
	if (processor.logical_ids[i] == 0) {
	continue;
	}

	status = arm64_create_secondary_stack(processor.logical_ids[i], mpid);
	DEBUG_ASSERT(status == ZX_OK);

	// start the cpu
	status = platform_start_cpu(processor.logical_ids[i], mpid);

	if (status != ZX_OK) {
	// TODO(maniscalco): Is continuing really the right thing to do here?

	// start failed, free the stack
	status = arm64_free_secondary_stack(processor.logical_ids[i]);
	DEBUG_ASSERT(status == ZX_OK);
	continue;
	}
	}
	}
	}

	static constexpr zbi_topology_node_t fallback_topology = {
	.entity = {.discriminant = ZBI_TOPOLOGY_ENTITY_PROCESSOR,
	.processor =
	{
	.architecture_info =
	{
	.discriminant = ZBI_TOPOLOGY_ARCHITECTURE_INFO_ARM64,
	.arm64 =
	{
	.cluster_1_id = 0,
	.cluster_2_id = 0,
	.cluster_3_id = 0,
	.cpu_id = 0,
	.gic_id = 0,
	},
	},
	.flags = 0,
	.logical_ids = {0},
	.logical_id_count = 1,

	}},
	.parent_index = ZBI_TOPOLOGY_NO_PARENT,
	};

	static void init_topology(uint level) {
	ktl::span handoff = gPhysHandoff->cpu_topology.get();

	auto result = system_topology::Graph::InitializeSystemTopology(handoff.data(), handoff.size());
	if (result != ZX_OK) {
	printf("Failed to initialize system topology! error: %d\n", result);

	// Try to fallback to a topology of just this processor.
	result = system_topology::Graph::InitializeSystemTopology(&fallback_topology, 1);
	ASSERT(result == ZX_OK);
	}

	arch_set_num_cpus(static_cast<uint>(system_topology::GetSystemTopology().processor_count()));

	if (DPRINTF_ENABLED_FOR_LEVEL(INFO)) {
	for (auto* proc : system_topology::GetSystemTopology().processors()) {
	auto& info = proc->entity.processor.architecture_info.arm64;
	dprintf(INFO, "System topology: CPU %u:%u:%u:%u\n", info.cluster_3_id, info.cluster_2_id,
	info.cluster_1_id, info.cpu_id);
	}
	}
	}

	LK_INIT_HOOK(init_topology, init_topology, LK_INIT_LEVEL_VM)

	static void allocate_persistent_ram(paddr_t pa, size_t length) {
	// Figure out how to divide up our persistent RAM. Right now there are
	// three potential users:
	//
	// 1) The crashlog.
	// 2) Persistent debug logging.
	// 3) Persistent debug tracing.
	//
	// Persistent debug logging and tracing have target amounts of RAM they would
	// _like_ to have, and crash-logging has a minimum amount it is guaranteed to
	// get. Additionally, all allocated are made in a chunks of the minimum
	// persistent RAM allocation granularity.
	//
	// Make sure that the crashlog gets as much of its minimum allocation as is
	// possible. Then attempt to satisfy the target for persistent debug logging,
	// followed by persistent debug tracing. Finally, give anything leftovers to
	// the crashlog.
	size_t crashlog_size = 0;
	size_t pdlog_size = 0;
	size_t jtrace_size = 0;
	{
	// start by figuring out how many chunks of RAM we have available to
	// us total.
	size_t persistent_chunks_available = length / kPersistentRamAllocationGranularity;

	// If we have not already configured a non-trivial crashlog implementation
	// for the platform, make sure that crashlog gets its minimum allocation, or
	// all of the RAM if it cannot meet even its minimum allocation.
	size_t crashlog_chunks = !PlatformCrashlog::HasNonTrivialImpl()
	? ktl::min(persistent_chunks_available,
	kMinCrashlogSize / kPersistentRamAllocationGranularity)
	: 0;
	persistent_chunks_available -= crashlog_chunks;

	// Next in line is persistent debug logging.
	size_t pdlog_chunks =
	ktl::min(persistent_chunks_available,
	kTargetPersistentDebugLogSize / kPersistentRamAllocationGranularity);
	persistent_chunks_available -= pdlog_chunks;

	// Next up is persistent debug tracing.
	size_t jtrace_chunks =
	ktl::min(persistent_chunks_available,
	kJTraceTargetPersistentBufferSize / kPersistentRamAllocationGranularity);
	persistent_chunks_available -= jtrace_chunks;

	// Finally, anything left over can go to the crashlog.
	crashlog_chunks += persistent_chunks_available;

	crashlog_size = crashlog_chunks * kPersistentRamAllocationGranularity;
	pdlog_size = pdlog_chunks * kPersistentRamAllocationGranularity;
	jtrace_size = jtrace_chunks * kPersistentRamAllocationGranularity;
	}

	// Configure up the crashlog RAM
	if (crashlog_size > 0) {
	dprintf(INFO, "Crashlog configured with %" PRIu64 " bytes\n", crashlog_size);
	ram_mappable_crashlog.Initialize(pa, crashlog_size);
	PlatformCrashlog::Bind(ram_mappable_crashlog.Get());
	}
	size_t offset = crashlog_size;

	// Configure the persistent debuglog RAM (if we have any)
	if (pdlog_size > 0) {
	dprintf(INFO, "Persistent debug logging enabled and configured with %" PRIu64 " bytes\n",
	pdlog_size);
	persistent_dlog_set_location(paddr_to_physmap(pa + offset), pdlog_size);
	offset += pdlog_size;
	}

	// Do _not_ attempt to set the location of the debug trace buffer if this is
	// not a persistent debug trace buffer. The location of a non-persistent
	// trace buffer would have been already set during (very) early init.
	if constexpr (kJTraceIsPersistent == jtrace::IsPersistent::Yes) {
	jtrace_set_location(paddr_to_physmap(pa + offset), jtrace_size);
	offset += jtrace_size;
	}
	}

	void platform_early_init(void) {
	if (gPhysHandoff->nvram) {
	const zbi_nvram_t& nvram = gPhysHandoff->nvram.value();
	dprintf(INFO, "NVRAM range: phys base %#" PRIx64 " length %#" PRIx64 "\n", nvram.base,
	nvram.length);
	allocate_persistent_ram(nvram.base, nvram.length);
	}

	// is the cmdline option to bypass dlog set ?
	dlog_bypass_init();

	// Initialize the PmmChecker now that the cmdline has been parsed.
	pmm_checker_init_from_cmdline();

	arm64_boot_map_init(reinterpret_cast<uintptr_t>(__executable_start) -
	reinterpret_cast<uintptr_t>(KernelPhysicalLoadAddress()));
	for (const memalloc::Range& range : gPhysHandoff->memory.get()) {
	if (range.type == memalloc::Type::kPeripheral) {
	dprintf(INFO, "ZBI: peripheral range [%#" PRIx64 ", %#" PRIx64 ")\n", range.addr,
	range.end());
	auto status = add_periph_range(range.addr, range.size);
	ASSERT(status == ZX_OK);
	}
	}

	ASSERT(pmm_init(gPhysHandoff->memory.get()) == ZX_OK);

	// give the mmu code a chance to do some bookkeeping
	arm64_mmu_early_init();
	}

	void platform_prevm_init() {}

	void platform_init(void) { topology_cpu_init(); }

	// after the fact create a region to reserve the peripheral map(s)
	static void platform_init_postvm(uint level) { reserve_periph_ranges(); }

	LK_INIT_HOOK(platform_postvm, platform_init_postvm, LK_INIT_LEVEL_VM)

	zx_status_t platform_mp_prep_cpu_unplug(cpu_num_t cpu_id) {
	return arch_mp_prep_cpu_unplug(cpu_id);
	}

	zx_status_t platform_mp_cpu_unplug(cpu_num_t cpu_id) { return arch_mp_cpu_unplug(cpu_id); }

	void platform_specific_halt(platform_halt_action suggested_action, zircon_crash_reason_t reason,
	bool halt_on_panic) {
	if (suggested_action == HALT_ACTION_REBOOT) {
	power_reboot(power_reboot_flags::REBOOT_NORMAL);
	printf("reboot failed\n");
	} else if (suggested_action == HALT_ACTION_REBOOT_BOOTLOADER) {
	power_reboot(power_reboot_flags::REBOOT_BOOTLOADER);
	printf("reboot-bootloader failed\n");
	} else if (suggested_action == HALT_ACTION_REBOOT_RECOVERY) {
	power_reboot(power_reboot_flags::REBOOT_RECOVERY);
	printf("reboot-recovery failed\n");
	} else if (suggested_action == HALT_ACTION_SHUTDOWN) {
	power_shutdown();
	}

	if (reason == ZirconCrashReason::Panic) {
	Backtrace bt;
	Thread::Current::GetBacktrace(bt);
	bt.Print();
	if (!halt_on_panic) {
	power_reboot(power_reboot_flags::REBOOT_NORMAL);
	printf("reboot failed\n");
	}
	#if ENABLE_PANIC_SHELL
	dprintf(ALWAYS, "CRASH: starting debug shell... (reason = %d)\n", static_cast<int>(reason));
	arch_disable_ints();
	panic_shell_start();
	#endif // ENABLE_PANIC_SHELL
	}

	dprintf(ALWAYS, "HALT: spinning forever... (reason = %d)\n", static_cast<int>(reason));

	// catch all fallthrough cases
	arch_disable_ints();

	for (;;) {
	__wfi();
	}
	}

	void platform_mexec_prep(uintptr_t new_bootimage_addr, size_t new_bootimage_len) {
	DEBUG_ASSERT(!arch_ints_disabled());
	DEBUG_ASSERT(mp_get_online_mask() == cpu_num_to_mask(BOOT_CPU_ID));
	}

	// This function requires NO_ASAN because it accesses ops, which is memory
	// that lives outside of the kernel address space (comes from IdAllocator).
	NO_ASAN void platform_mexec(mexec_asm_func mexec_assembly, memmov_ops_t* ops,
	uintptr_t new_bootimage_addr, size_t new_bootimage_len,
	uintptr_t new_kernel_entry) {
	DEBUG_ASSERT(arch_ints_disabled());
	DEBUG_ASSERT(mp_get_online_mask() == cpu_num_to_mask(BOOT_CPU_ID));

	mexec_assembly((uintptr_t)new_bootimage_addr, 0, 0, arm64_get_boot_el(), ops, new_kernel_entry);
	}

	// Initialize Resource system after the heap is initialized.
	static void arm_resource_dispatcher_init_hook(unsigned int rl) {
	// 64 bit address space for MMIO on ARM64
	zx_status_t status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_MMIO, 0, UINT64_MAX);
	if (status != ZX_OK) {
	printf("Resources: Failed to initialize MMIO allocator: %d\n", status);
	}
	// Set up IRQs based on values from the GIC
	status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_IRQ, interrupt_get_base_vector(),
	interrupt_get_max_vector());
	if (status != ZX_OK) {
	printf("Resources: Failed to initialize IRQ allocator: %d\n", status);
	}
	// Set up SMC valid service call range
	status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_SMC, 0,
	ARM_SMC_SERVICE_CALL_NUM_MAX + 1);
	if (status != ZX_OK) {
	printf("Resources: Failed to initialize SMC allocator: %d\n", status);
	}
	// Set up range of valid system resources.
	status = ResourceDispatcher::InitializeAllocator(ZX_RSRC_KIND_SYSTEM, 0, ZX_RSRC_SYSTEM_COUNT);
	if (status != ZX_OK) {
	printf("Resources: Failed to initialize system allocator: %d\n", status);
	}
	}
	LK_INIT_HOOK(arm_resource_init, arm_resource_dispatcher_init_hook, LK_INIT_LEVEL_HEAP)

	void topology_init() {
	// Check MPIDR_EL1.MT to determine how to interpret AFF0 (i.e. cpu_id). For
	// now, assume that MT is set consistently across all PEs in the system. When
	// MT is set, use the next affinity level for the first cache depth element.
	// This approach should be adjusted if we find examples of systems that do not
	// set MT uniformly, and may require delaying cache-aware load balancing until
	// all PEs are initialized.
	const bool cpu_id_is_thread_id = __arm_rsr64("mpidr_el1") & (1 << 24);
	printf("topology_init: MPIDR_EL1.MT=%d\n", cpu_id_is_thread_id);

	// This platform initializes the topology earlier than this standard hook.
	// Setup the CPU distance map with the already initialized topology.
	const auto processor_count =
	static_cast<uint>(system_topology::GetSystemTopology().processor_count());
	CpuDistanceMap::Initialize(processor_count, [cpu_id_is_thread_id](cpu_num_t from_id,
	cpu_num_t to_id) {
	using system_topology::Node;
	using system_topology::Graph;

	const Graph& topology = system_topology::GetSystemTopology();

	Node* from_node = nullptr;
	if (topology.ProcessorByLogicalId(from_id, &from_node) != ZX_OK) {
	printf("Failed to get processor node for CPU %u\n", from_id);
	return -1;
	}
	DEBUG_ASSERT(from_node != nullptr);

	Node* to_node = nullptr;
	if (topology.ProcessorByLogicalId(to_id, &to_node) != ZX_OK) {
	printf("Failed to get processor node for CPU %u\n", to_id);
	return -1;
	}
	DEBUG_ASSERT(to_node != nullptr);

	const zbi_topology_arm64_info_t& from_info =
	from_node->entity.processor.architecture_info.arm64;
	const zbi_topology_arm64_info_t& to_info = to_node->entity.processor.architecture_info.arm64;

	// Return the maximum cache depth not shared when multithreaded.
	if (cpu_id_is_thread_id) {
	return ktl::max({1 * int{from_info.cluster_1_id != to_info.cluster_1_id},
	2 * int{from_info.cluster_2_id != to_info.cluster_2_id},
	3 * int{from_info.cluster_3_id != to_info.cluster_3_id}});
	}

	// Return the maximum cache depth not shared when single threaded.
	return ktl::max({1 * int{from_info.cpu_id != to_info.cpu_id},
	2 * int{from_info.cluster_1_id != to_info.cluster_1_id},
	3 * int{from_info.cluster_2_id != to_info.cluster_2_id},
	4 * int{from_info.cluster_3_id != to_info.cluster_3_id}});
	});

	// TODO(eieio): Determine automatically or provide a way to specify in the
	// ZBI. The current value matches the depth of the first significant cache
	// above.
	const CpuDistanceMap::Distance kDistanceThreshold = 2u;
	CpuDistanceMap::Get().set_distance_threshold(kDistanceThreshold);

	CpuDistanceMap::Get().Dump();
	}