src/devices/sysmem/drivers/sysmem/contiguous_pooled_memory_allocator.h - fuchsia - Git at Google

 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef SRC_DEVICES_SYSMEM_DRIVERS_SYSMEM_CONTIGUOUS_POOLED_MEMORY_ALLOCATOR_H_
 #define SRC_DEVICES_SYSMEM_DRIVERS_SYSMEM_CONTIGUOUS_POOLED_MEMORY_ALLOCATOR_H_

 #include <lib/async/wait.h>
 #include <lib/inspect/cpp/inspect.h>
 #include <lib/zx/bti.h>
 #include <lib/zx/event.h>
 #include <zircon/errors.h>
 #include <zircon/limits.h>

 #include <fbl/algorithm.h>
 #include <fbl/vector.h>
 #include <region-alloc/region-alloc.h>

 #include "allocator.h"
 #include "protected_ranges.h"

 namespace sysmem_driver {

 class ContiguousPooledMemoryAllocator : public MemoryAllocator {
  public:
   ContiguousPooledMemoryAllocator(Owner* parent_device, const char* allocation_name,
                                   inspect::Node* parent_node, uint64_t pool_id, uint64_t size,
                                   bool is_always_cpu_accessible, bool is_ever_cpu_accessible,
                                   bool is_ready, bool can_be_torn_down,
                                   async_dispatcher_t* dispatcher);

   ~ContiguousPooledMemoryAllocator();

   // Alignment gets rounded up to system page alignment, so any low number will default to system
   // page alignment.
   zx_status_t Init(uint32_t alignment_log2 = 0);

   // Initializes the guard regions. Must be called after Init. If
   // internal_guard_regions is not set, there will be only guard regions at the
   // begin and end of the buffer.
   void InitGuardRegion(size_t guard_region_size, bool unused_pages_guarded,
                        zx::duration unused_page_check_cycle_period, bool internal_guard_regions,
                        bool crash_on_guard_failure, async_dispatcher_t* dispatcher);
   void FillUnusedRangeWithGuard(uint64_t start_offset, uint64_t size);

   // If is_cpu_accessibe_, called after InitGuardRegion() (if any), but during the same dispatcher
   // call-out, before returning to the dispatcher, because is_ready_ is already true.
   //
   // If !is_cpu_accessibe_, called during set_ready().
   void SetupUnusedPages();

   // This uses a physical VMO as the parent VMO.  This is used for VDEC as we learn the physical
   // range of VDEC from the TEE.
   zx_status_t InitPhysical(zx_paddr_t paddr);

   zx_status_t Allocate(uint64_t size, std::optional<std::string> name,
                        zx::vmo* parent_vmo) override;
   zx_status_t SetupChildVmo(const zx::vmo& parent_vmo, const zx::vmo& child_vmo,
                             fuchsia_sysmem2::wire::SingleBufferSettings buffer_settings) override;
   void Delete(zx::vmo parent_vmo) override;
   bool is_empty() override {
     // If the contiguous VMO has been marked as secure there's no way to unmark it as secure, so
     // unbinding would never be safe.
     return regions_.empty() && (can_be_torn_down_ || !is_ready_);
   }

   zx_status_t GetPhysicalMemoryInfo(uint64_t* base, uint64_t* size) override {
     *base = phys_start_;
     *size = size_;
     return ZX_OK;
   }

   void set_ready() override;
   bool is_ready() override;

   const zx::vmo& GetPoolVmoForTest() { return contiguous_vmo_; }
   // Gets the offset of a VMO from the beginning of a pool.
   uint64_t GetVmoRegionOffsetForTest(const zx::vmo& vmo);

   uint64_t failed_guard_region_checks() const { return failed_guard_region_checks_; }

   bool is_already_cleared_on_allocate() override;

   // When this is set from unit tests only, we skip any operation that's only allowed on contiguous
   // VMOs, since we don't have a real contiguous VMO, since a fake BTI can't be used to create one.
   // This ends up limiting the fidelity of the unit tests somewhat; in the long run we probably
   // should plumb a real BTI to the unit tests somehow.
   void SetBtiFakeForUnitTests() {
     ZX_ASSERT(!is_ready());
     is_bti_fake_ = true;
   }
   bool is_bti_fake() { return is_bti_fake_; }

   fuchsia_sysmem2::wire::HeapType heap_type() {
     return static_cast<fuchsia_sysmem2::wire::HeapType>(pool_id_);
   }

   // loanable pages / un-used pages
   //
   // We count pages we choose to pattern instead of loan as loanable, despite not actually loaning
   // those pages.  In other words we don't count patterned pages against efficiency.
   double GetLoanableEfficiency();

   // loanable pages / total pages
   //
   // We count pages we choose to pattern instead of loan as loanable, despite not actually loaning
   // those pages.  In other words we don't count patterned pages against the loaned ratio.
   double GetLoanableRatio();

   // loanable bytes
   //
   // We count pages we choose to pattern instead of loan as loanable, despite not actually loaning
   // those pages.  In other words we don't count patterned pages against the loaned ratio.
   uint64_t GetLoanableBytes();

   static constexpr zx::duration kDefaultUnusedPageCheckCyclePeriod = zx::sec(600);

   static constexpr zx::duration kUnusedRecentlyPageCheckPeriod = zx::sec(2);
   static constexpr zx::duration kUnusedRecentlyAgeThreshold = zx::sec(5);

   static constexpr zx::duration kStepTowardOptimalProtectedRangesPeriod = zx::msec(1000);

   // Keep < 1% of pages aside for being unused page guard pattern.  The rest get loaned back to
   // Zircon.
   static constexpr uint64_t kUnusedGuardPatternPeriodPages = 128;

  private:
   struct RegionData {
     std::string name;
     zx_koid_t koid;
     inspect::Node node;
     inspect::UintProperty size_property;
     inspect::UintProperty koid_property;
     RegionAllocator::Region::UPtr ptr;
   };

   struct DeletedRegion {
     ralloc_region_t region;
     zx::time when_freed;
     std::string name;
   };

   class RangesControl : public protected_ranges::ProtectedRangesControl {
    public:
     RangesControl(ContiguousPooledMemoryAllocator* parent) : parent_(parent) {}

     // protected_ranges::ProtectedRangesControl implementation
     bool IsDynamic() override;
     uint64_t MaxRangeCount() override;
     uint64_t GetRangeGranularity() override;
     bool HasModProtectedRange() override;
     void AddProtectedRange(const protected_ranges::Range& range) override;
     void DelProtectedRange(const protected_ranges::Range& range) override;
     void ModProtectedRange(const protected_ranges::Range& old_range,
                            const protected_ranges::Range& new_range) override;
     void ZeroProtectedSubRange(bool is_covering_range_explicit,
                                const protected_ranges::Range& range) override;
     uint64_t GetBase() override;
     uint64_t GetSize() override;
     bool UseRange(const protected_ranges::Range& range) override;
     void UnUseRange(const protected_ranges::Range& range) override;

    private:
     ContiguousPooledMemoryAllocator* parent_{};
   };

   zx_status_t InitCommon(zx::vmo local_contiguous_vmo);
   void TraceObserverCallback(async_dispatcher_t* dispatcher, async::WaitBase* wait,
                              zx_status_t status, const zx_packet_signal_t* signal);

   void CheckGuardPageCallback(async_dispatcher_t* dispatcher, async::TaskBase* task,
                               zx_status_t status);
   void CheckUnusedPagesCallback(async_dispatcher_t* dispatcher, async::TaskBase* task,
                                 zx_status_t status);
   void CheckUnusedRecentlyPagesCallback(async_dispatcher_t* dispatcher, async::TaskBase* task,
                                         zx_status_t status);
   void CheckGuardRegion(const char* region_name, size_t region_size, bool pre,
                         uint64_t start_offset);
   void IncrementGuardRegionFailureInspectData();
   void CheckGuardRegionData(const RegionData& region);
   void CheckExternalGuardRegions();
   void CheckAnyUnusedPages(uint64_t start_offset, uint64_t end_offset);
   void CheckUnusedRange(uint64_t offset, uint64_t size, bool and_also_zero);
   void DumpPoolStats();
   void DumpPoolHighWaterMark();
   void TracePoolSize(bool initial_trace);
   uint64_t CalculateLargeContiguousRegionSize();
   void UpdateLoanableMetrics();

   // This method iterates over all the sub-regions of an unused region.  The sub-regions are regions
   // we need to pattern and keep, loan to zircon, or zero.  Any given page that's unused will always
   // (in any given boot) be pattern, loan, or zero, regardless of the alignment of the unused
   // region.  This way we'll know which pages are supposed to be patterned, loaned, or zeroed
   // despite unused regions getting merged/split.
   //
   // Depending on settings, some sub-region types won't exist, so their corresponding callable won't
   // be called.
   //
   // The pattern_func, loan_func, and zero_func take different actions depending on calling context,
   // but generally each func is supposed to handle the pages that are supposed to be patterned,
   // loaned, or zeroed.  For example, write the pattern or check the pattern, loan the page or
   // un-loan the page, zero the page or nop.
   //
   // If a page is protected it'll be skipped, and not processed by any of the passed-in funcs.
   //
   // All the funcs take const ralloc_region_t&.
   template <typename F1, typename F2, typename F3>
   void ForUnusedGuardPatternRanges(const ralloc_region_t& region, F1 pattern_func, F2 loan_func,
                                    F3 zero_func);

   // This handles the unprotected portions of the region passed to ForUnusedGuardPatternRanges().
   template <typename F1, typename F2, typename F3>
   void ForUnusedGuardPatternRangesInternal(const ralloc_region_t& region, F1 pattern_func,
                                            F2 loan_func, F3 zero_func);

   void StashDeletedRegion(const RegionData& region_data);
   DeletedRegion* FindMostRecentDeletedRegion(uint64_t offset);
   // Log DeletedRegion info and fairly detailed diff info for a range that's detected to differ from
   // the pattern that was previously written.
   //
   // TODO(dustingreen): With some refactoring we could have common code for diff reporting, for all
   // of per-reserved-range guard pages, per-allocation guard pages, and unused page guard pages.
   void ReportPatternCheckFailedRange(const ralloc_region_t& failed_range, const char* which_type);

   void OnRegionUnused(const ralloc_region_t& region);
   zx_status_t CommitRegion(const ralloc_region_t& region);

   void EnsureSteppingTowardOptimalProtectedRanges();
   void StepTowardOptimalProtectedRanges(async_dispatcher_t* dispatcher, async::TaskBase* task,
                                         zx_status_t status);

   void DumpRanges() const;

   Owner* const parent_device_{};
   async_dispatcher_t* dispatcher_{};
   const char* const allocation_name_{};
   const uint64_t pool_id_{};
   char child_name_[ZX_MAX_NAME_LEN] = {};

   uint64_t guard_region_size_ = 0;
   // Holds the default data to be placed into the guard region.
   std::vector<uint8_t> guard_region_data_;
   // Holds a copy of the guard region data that's compared with the real value.
   std::vector<uint8_t> guard_region_copy_;

   bool crash_on_guard_failure_ = false;
   // Internal guard regions are around every allocation, and not just the beginning and end of the
   // contiguous VMO.
   bool has_internal_guard_regions_ = false;

   zx::vmo contiguous_vmo_;
   zx::pmt pool_pmt_;
   RegionAllocator region_allocator_;
   uint64_t allocated_bytes_ = 0;

   // We run protected_ranges_ in the same [0, size_) space as region_allocator_, and convert to
   // physical ranges in protected_ranges_control_ (adding phys_start_).
   std::optional<protected_ranges::ProtectedRanges> protected_ranges_;
   // When allocating/deallocating a buffer, we immediately make the necessary/possible changes via
   // protection_ranges_ to make that buffer space usable/best-effort-reclaimable, but to really
   // optimize the protection ranges we need to spread out the changes in time to avoid churning all
   // the loaned pages at once.  This timer does that.
   async::TaskMethod<ContiguousPooledMemoryAllocator,
                     &ContiguousPooledMemoryAllocator::StepTowardOptimalProtectedRanges>
       step_toward_optimal_protected_ranges_{this};
   // We effectively reset the timer any time there's new allocate/deallocate activity, since that
   // activity is also churn in some sense, so we avoid compounding that churn with optimizing
   // steps until more time has passed, even if the timer had previously been set to go off soon.
   zx::time step_toward_optimal_protected_ranges_min_time_ = zx::time::infinite_past();
   // The bottom edge of protected_ranges_ uses protected_ranges_control_ to effect actual changes.
   // This delegates to ContiguousPooledMemoryAllocator or Device (and then SecureMem) to do the
   // changes.
   std::optional<RangesControl> protected_ranges_control_;

   // From parent_vmo handle to std::unique_ptr<>
   std::map<zx_handle_t, RegionData> regions_;
   zx_paddr_t phys_start_{};
   uint64_t size_{};
   // True if the CPU can always touch these pages.  False if these pages are under a HW protected
   // range at least sometimes.
   bool is_always_cpu_accessible_{};
   // True if the CPU can sometimes touch these pages.  False if these pages are under a HW protected
   // range 100% of the time).
   bool is_ever_cpu_accessible_{};
   // True if the VMO is a normal contiguous VMO.  False if the VMO is a physical VMO, which doesn't
   // support decommit (and we don't need it to, since a physical VMO is only use when
   // !is_ever_cpu_accessible_).
   bool can_decommit_{};
   bool is_ready_{};
   // True if the allocator can be deleted after it's marked ready.
   bool can_be_torn_down_{};
   bool is_setup_unused_pages_called_{};

   uint64_t failed_guard_region_checks_{};

   uint64_t high_water_mark_used_size_{};
   uint64_t max_free_size_at_high_water_mark_{};

   inspect::Node node_;
   inspect::ValueList properties_;
   inspect::UintProperty size_property_;
   inspect::UintProperty high_water_mark_property_;
   inspect::UintProperty used_size_property_;
   inspect::UintProperty allocations_failed_property_;
   inspect::UintProperty last_allocation_failed_timestamp_ns_property_;
   inspect::UintProperty commits_failed_property_;
   inspect::UintProperty last_commit_failed_timestamp_ns_property_;
   // Keeps track of how many allocations would have succeeded but failed due to fragmentation.
   inspect::UintProperty allocations_failed_fragmentation_property_;
   // This is the size of a the largest free contiguous region when high_water_mark_property_ was
   // last modified. It can be used to determine how much space was wasted due to fragmentation.
   inspect::UintProperty max_free_at_high_water_property_;
   // size - high_water_mark. This is used for cobalt reporting.
   inspect::UintProperty free_at_high_water_mark_property_;
   inspect::BoolProperty is_ready_property_;
   inspect::UintProperty failed_guard_region_checks_property_;
   inspect::UintProperty last_failed_guard_region_check_timestamp_ns_property_;
   // This tracks the sum of the size of the 10 largest free regions.
   inspect::UintProperty large_contiguous_region_sum_property_;

   // CMM / PCMM properties regarding loaning of pages to Zircon.
   //
   // The minimum efficiency since this class was created.
   double min_efficiency_ = 1.0;
   inspect::DoubleProperty loanable_efficiency_property_;
   inspect::DoubleProperty loanable_ratio_property_;
   inspect::UintProperty loanable_bytes_property_;
   inspect::UintProperty loanable_mebibytes_property_;

   zx::event trace_observer_event_;
   async::WaitMethod<ContiguousPooledMemoryAllocator,
                     &ContiguousPooledMemoryAllocator::TraceObserverCallback>
       wait_{this};

   async::TaskMethod<ContiguousPooledMemoryAllocator,
                     &ContiguousPooledMemoryAllocator::CheckGuardPageCallback>
       guard_checker_{this};

   // Split up the unused page check into relatively small pieces to avoid spiking the CPU or
   // causing latency spikes for normal sysmem requests.
   static constexpr uint32_t kUnusedCheckPartialCount = 64;
   // We do this one page at a time to hopefully stay within L1 on all devices, since in the allocate
   // path we're checking this amount of buffer space with memcmp(), then also zeroing the same space
   // with memset().  If we did so in chunks larger than L1, we'd be spilling cache lines to L2
   // or RAM during memcmp(), then pulling them back in during memset().  Cache sizes and tiers can
   // vary of course.  This also determines the granularity at which we report pattern mismatch
   // failures, so 1 page is best here for that also.
   const uint64_t unused_guard_data_size_ = zx_system_get_page_size();
   bool unused_pages_guarded_ = false;
   zx::duration unused_page_check_cycle_period_ = kDefaultUnusedPageCheckCyclePeriod;
   uint64_t unused_check_phase_ = 0;
   async::TaskMethod<ContiguousPooledMemoryAllocator,
                     &ContiguousPooledMemoryAllocator::CheckUnusedPagesCallback>
       unused_checker_{this};
   async::TaskMethod<ContiguousPooledMemoryAllocator,
                     &ContiguousPooledMemoryAllocator::CheckUnusedRecentlyPagesCallback>
       unused_recently_checker_{this};
   SysmemMetrics& metrics_;

   // Regardless of is_ever_cpu_accessible_, we create a mapping of the whole vmo.  When
   // is_always_cpu_accessible_ we can use the mapping to zero new buffers.  When
   // is_ever_cpu_accessible_ we can use the mapping to write and check patterns in unused pages.
   uint8_t* mapping_ = nullptr;

   // While we'll typically pattern only 1 page per pattern period and adjust the pattern period to
   // get the % we want, being able to vary this might potentially help catch a suspected problem
   // faster; in any case it's simple enough to allow this to be adjusted.
   static constexpr uint64_t kUnusedToPatternPages = 1;
   const uint64_t unused_guard_pattern_period_bytes_ =
       kUnusedGuardPatternPeriodPages * zx_system_get_page_size();
   const uint64_t unused_to_pattern_bytes_ = kUnusedToPatternPages * zx_system_get_page_size();

   bool is_bti_fake_ = false;

   // We cap the number of DeletedRegion we're willing to track; otherwise the overhead could get a
   // bit excessive in pathological cases if we were to allow tracking a DeletedRegion per page for
   // example.  This is optimized for update, not (at all) for lookup, since we only do lookups if
   // a page just failed a pattern check, which should never happen.  If it does happen, we want to
   // know the paddr_t range and name of the most-recently-deleted region, and possibly the 2nd most
   // recently deleted region also, if it comes to that.
   static constexpr int32_t kNumDeletedRegions = 512;
   int32_t deleted_regions_count_ = 0;
   int32_t deleted_regions_next_ = 0;
   // Only allocate if we'll be checking unused pages.
   std::vector<DeletedRegion> deleted_regions_;

   // This is Zircon's zero page mapped a few times, read-only.
   uint64_t zero_page_vmo_size_ = fbl::round_up(64ull * 1024, zx_system_get_page_size());
   zx::vmo zero_page_vmo_;
   uint8_t* zero_page_vmo_base_ = nullptr;
 };

 }  // namespace sysmem_driver

 #endif  // SRC_DEVICES_SYSMEM_DRIVERS_SYSMEM_CONTIGUOUS_POOLED_MEMORY_ALLOCATOR_H_
	// Copyright 2019 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef SRC_DEVICES_SYSMEM_DRIVERS_SYSMEM_CONTIGUOUS_POOLED_MEMORY_ALLOCATOR_H_
	#define SRC_DEVICES_SYSMEM_DRIVERS_SYSMEM_CONTIGUOUS_POOLED_MEMORY_ALLOCATOR_H_

	#include <lib/async/wait.h>
	#include <lib/inspect/cpp/inspect.h>
	#include <lib/zx/bti.h>
	#include <lib/zx/event.h>
	#include <zircon/errors.h>
	#include <zircon/limits.h>

	#include <fbl/algorithm.h>
	#include <fbl/vector.h>
	#include <region-alloc/region-alloc.h>

	#include "allocator.h"
	#include "protected_ranges.h"

	namespace sysmem_driver {

	class ContiguousPooledMemoryAllocator : public MemoryAllocator {
	public:
	ContiguousPooledMemoryAllocator(Owner* parent_device, const char* allocation_name,
	inspect::Node* parent_node, uint64_t pool_id, uint64_t size,
	bool is_always_cpu_accessible, bool is_ever_cpu_accessible,
	bool is_ready, bool can_be_torn_down,
	async_dispatcher_t* dispatcher);

	~ContiguousPooledMemoryAllocator();

	// Alignment gets rounded up to system page alignment, so any low number will default to system
	// page alignment.
	zx_status_t Init(uint32_t alignment_log2 = 0);

	// Initializes the guard regions. Must be called after Init. If
	// internal_guard_regions is not set, there will be only guard regions at the
	// begin and end of the buffer.
	void InitGuardRegion(size_t guard_region_size, bool unused_pages_guarded,
	zx::duration unused_page_check_cycle_period, bool internal_guard_regions,
	bool crash_on_guard_failure, async_dispatcher_t* dispatcher);
	void FillUnusedRangeWithGuard(uint64_t start_offset, uint64_t size);

	// If is_cpu_accessibe_, called after InitGuardRegion() (if any), but during the same dispatcher
	// call-out, before returning to the dispatcher, because is_ready_ is already true.
	//
	// If !is_cpu_accessibe_, called during set_ready().
	void SetupUnusedPages();

	// This uses a physical VMO as the parent VMO. This is used for VDEC as we learn the physical
	// range of VDEC from the TEE.
	zx_status_t InitPhysical(zx_paddr_t paddr);

	zx_status_t Allocate(uint64_t size, std::optional<std::string> name,
	zx::vmo* parent_vmo) override;
	zx_status_t SetupChildVmo(const zx::vmo& parent_vmo, const zx::vmo& child_vmo,
	fuchsia_sysmem2::wire::SingleBufferSettings buffer_settings) override;
	void Delete(zx::vmo parent_vmo) override;
	bool is_empty() override {
	// If the contiguous VMO has been marked as secure there's no way to unmark it as secure, so
	// unbinding would never be safe.
	return regions_.empty() && (can_be_torn_down_ \|\| !is_ready_);
	}

	zx_status_t GetPhysicalMemoryInfo(uint64_t* base, uint64_t* size) override {
	*base = phys_start_;
	*size = size_;
	return ZX_OK;
	}

	void set_ready() override;
	bool is_ready() override;

	const zx::vmo& GetPoolVmoForTest() { return contiguous_vmo_; }
	// Gets the offset of a VMO from the beginning of a pool.
	uint64_t GetVmoRegionOffsetForTest(const zx::vmo& vmo);

	uint64_t failed_guard_region_checks() const { return failed_guard_region_checks_; }

	bool is_already_cleared_on_allocate() override;

	// When this is set from unit tests only, we skip any operation that's only allowed on contiguous
	// VMOs, since we don't have a real contiguous VMO, since a fake BTI can't be used to create one.
	// This ends up limiting the fidelity of the unit tests somewhat; in the long run we probably
	// should plumb a real BTI to the unit tests somehow.
	void SetBtiFakeForUnitTests() {
	ZX_ASSERT(!is_ready());
	is_bti_fake_ = true;
	}
	bool is_bti_fake() { return is_bti_fake_; }

	fuchsia_sysmem2::wire::HeapType heap_type() {
	return static_cast<fuchsia_sysmem2::wire::HeapType>(pool_id_);
	}

	// loanable pages / un-used pages
	//
	// We count pages we choose to pattern instead of loan as loanable, despite not actually loaning
	// those pages. In other words we don't count patterned pages against efficiency.
	double GetLoanableEfficiency();

	// loanable pages / total pages
	//
	// We count pages we choose to pattern instead of loan as loanable, despite not actually loaning
	// those pages. In other words we don't count patterned pages against the loaned ratio.
	double GetLoanableRatio();

	// loanable bytes
	//
	// We count pages we choose to pattern instead of loan as loanable, despite not actually loaning
	// those pages. In other words we don't count patterned pages against the loaned ratio.
	uint64_t GetLoanableBytes();

	static constexpr zx::duration kDefaultUnusedPageCheckCyclePeriod = zx::sec(600);

	static constexpr zx::duration kUnusedRecentlyPageCheckPeriod = zx::sec(2);
	static constexpr zx::duration kUnusedRecentlyAgeThreshold = zx::sec(5);

	static constexpr zx::duration kStepTowardOptimalProtectedRangesPeriod = zx::msec(1000);

	// Keep < 1% of pages aside for being unused page guard pattern. The rest get loaned back to
	// Zircon.
	static constexpr uint64_t kUnusedGuardPatternPeriodPages = 128;

	private:
	struct RegionData {
	std::string name;
	zx_koid_t koid;
	inspect::Node node;
	inspect::UintProperty size_property;
	inspect::UintProperty koid_property;
	RegionAllocator::Region::UPtr ptr;
	};

	struct DeletedRegion {
	ralloc_region_t region;
	zx::time when_freed;
	std::string name;
	};

	class RangesControl : public protected_ranges::ProtectedRangesControl {
	public:
	RangesControl(ContiguousPooledMemoryAllocator* parent) : parent_(parent) {}

	// protected_ranges::ProtectedRangesControl implementation
	bool IsDynamic() override;
	uint64_t MaxRangeCount() override;
	uint64_t GetRangeGranularity() override;
	bool HasModProtectedRange() override;
	void AddProtectedRange(const protected_ranges::Range& range) override;
	void DelProtectedRange(const protected_ranges::Range& range) override;
	void ModProtectedRange(const protected_ranges::Range& old_range,
	const protected_ranges::Range& new_range) override;
	void ZeroProtectedSubRange(bool is_covering_range_explicit,
	const protected_ranges::Range& range) override;
	uint64_t GetBase() override;
	uint64_t GetSize() override;
	bool UseRange(const protected_ranges::Range& range) override;
	void UnUseRange(const protected_ranges::Range& range) override;

	private:
	ContiguousPooledMemoryAllocator* parent_{};
	};

	zx_status_t InitCommon(zx::vmo local_contiguous_vmo);
	void TraceObserverCallback(async_dispatcher_t* dispatcher, async::WaitBase* wait,
	zx_status_t status, const zx_packet_signal_t* signal);

	void CheckGuardPageCallback(async_dispatcher_t* dispatcher, async::TaskBase* task,
	zx_status_t status);
	void CheckUnusedPagesCallback(async_dispatcher_t* dispatcher, async::TaskBase* task,
	zx_status_t status);
	void CheckUnusedRecentlyPagesCallback(async_dispatcher_t* dispatcher, async::TaskBase* task,
	zx_status_t status);
	void CheckGuardRegion(const char* region_name, size_t region_size, bool pre,
	uint64_t start_offset);
	void IncrementGuardRegionFailureInspectData();
	void CheckGuardRegionData(const RegionData& region);
	void CheckExternalGuardRegions();
	void CheckAnyUnusedPages(uint64_t start_offset, uint64_t end_offset);
	void CheckUnusedRange(uint64_t offset, uint64_t size, bool and_also_zero);
	void DumpPoolStats();
	void DumpPoolHighWaterMark();
	void TracePoolSize(bool initial_trace);
	uint64_t CalculateLargeContiguousRegionSize();
	void UpdateLoanableMetrics();

	// This method iterates over all the sub-regions of an unused region. The sub-regions are regions
	// we need to pattern and keep, loan to zircon, or zero. Any given page that's unused will always
	// (in any given boot) be pattern, loan, or zero, regardless of the alignment of the unused
	// region. This way we'll know which pages are supposed to be patterned, loaned, or zeroed
	// despite unused regions getting merged/split.
	//
	// Depending on settings, some sub-region types won't exist, so their corresponding callable won't
	// be called.
	//
	// The pattern_func, loan_func, and zero_func take different actions depending on calling context,
	// but generally each func is supposed to handle the pages that are supposed to be patterned,
	// loaned, or zeroed. For example, write the pattern or check the pattern, loan the page or
	// un-loan the page, zero the page or nop.
	//
	// If a page is protected it'll be skipped, and not processed by any of the passed-in funcs.
	//
	// All the funcs take const ralloc_region_t&.
	template <typename F1, typename F2, typename F3>
	void ForUnusedGuardPatternRanges(const ralloc_region_t& region, F1 pattern_func, F2 loan_func,
	F3 zero_func);

	// This handles the unprotected portions of the region passed to ForUnusedGuardPatternRanges().
	template <typename F1, typename F2, typename F3>
	void ForUnusedGuardPatternRangesInternal(const ralloc_region_t& region, F1 pattern_func,
	F2 loan_func, F3 zero_func);

	void StashDeletedRegion(const RegionData& region_data);
	DeletedRegion* FindMostRecentDeletedRegion(uint64_t offset);
	// Log DeletedRegion info and fairly detailed diff info for a range that's detected to differ from
	// the pattern that was previously written.
	//
	// TODO(dustingreen): With some refactoring we could have common code for diff reporting, for all
	// of per-reserved-range guard pages, per-allocation guard pages, and unused page guard pages.
	void ReportPatternCheckFailedRange(const ralloc_region_t& failed_range, const char* which_type);

	void OnRegionUnused(const ralloc_region_t& region);
	zx_status_t CommitRegion(const ralloc_region_t& region);

	void EnsureSteppingTowardOptimalProtectedRanges();
	void StepTowardOptimalProtectedRanges(async_dispatcher_t* dispatcher, async::TaskBase* task,
	zx_status_t status);

	void DumpRanges() const;

	Owner* const parent_device_{};
	async_dispatcher_t* dispatcher_{};
	const char* const allocation_name_{};
	const uint64_t pool_id_{};
	char child_name_[ZX_MAX_NAME_LEN] = {};

	uint64_t guard_region_size_ = 0;
	// Holds the default data to be placed into the guard region.
	std::vector<uint8_t> guard_region_data_;
	// Holds a copy of the guard region data that's compared with the real value.
	std::vector<uint8_t> guard_region_copy_;

	bool crash_on_guard_failure_ = false;
	// Internal guard regions are around every allocation, and not just the beginning and end of the
	// contiguous VMO.
	bool has_internal_guard_regions_ = false;

	zx::vmo contiguous_vmo_;
	zx::pmt pool_pmt_;
	RegionAllocator region_allocator_;
	uint64_t allocated_bytes_ = 0;

	// We run protected_ranges_ in the same [0, size_) space as region_allocator_, and convert to
	// physical ranges in protected_ranges_control_ (adding phys_start_).
	std::optional<protected_ranges::ProtectedRanges> protected_ranges_;
	// When allocating/deallocating a buffer, we immediately make the necessary/possible changes via
	// protection_ranges_ to make that buffer space usable/best-effort-reclaimable, but to really
	// optimize the protection ranges we need to spread out the changes in time to avoid churning all
	// the loaned pages at once. This timer does that.
	async::TaskMethod<ContiguousPooledMemoryAllocator,
	&ContiguousPooledMemoryAllocator::StepTowardOptimalProtectedRanges>
	step_toward_optimal_protected_ranges_{this};
	// We effectively reset the timer any time there's new allocate/deallocate activity, since that
	// activity is also churn in some sense, so we avoid compounding that churn with optimizing
	// steps until more time has passed, even if the timer had previously been set to go off soon.
	zx::time step_toward_optimal_protected_ranges_min_time_ = zx::time::infinite_past();
	// The bottom edge of protected_ranges_ uses protected_ranges_control_ to effect actual changes.
	// This delegates to ContiguousPooledMemoryAllocator or Device (and then SecureMem) to do the
	// changes.
	std::optional<RangesControl> protected_ranges_control_;

	// From parent_vmo handle to std::unique_ptr<>
	std::map<zx_handle_t, RegionData> regions_;
	zx_paddr_t phys_start_{};
	uint64_t size_{};
	// True if the CPU can always touch these pages. False if these pages are under a HW protected
	// range at least sometimes.
	bool is_always_cpu_accessible_{};
	// True if the CPU can sometimes touch these pages. False if these pages are under a HW protected
	// range 100% of the time).
	bool is_ever_cpu_accessible_{};
	// True if the VMO is a normal contiguous VMO. False if the VMO is a physical VMO, which doesn't
	// support decommit (and we don't need it to, since a physical VMO is only use when
	// !is_ever_cpu_accessible_).
	bool can_decommit_{};
	bool is_ready_{};
	// True if the allocator can be deleted after it's marked ready.
	bool can_be_torn_down_{};
	bool is_setup_unused_pages_called_{};

	uint64_t failed_guard_region_checks_{};

	uint64_t high_water_mark_used_size_{};
	uint64_t max_free_size_at_high_water_mark_{};

	inspect::Node node_;
	inspect::ValueList properties_;
	inspect::UintProperty size_property_;
	inspect::UintProperty high_water_mark_property_;
	inspect::UintProperty used_size_property_;
	inspect::UintProperty allocations_failed_property_;
	inspect::UintProperty last_allocation_failed_timestamp_ns_property_;
	inspect::UintProperty commits_failed_property_;
	inspect::UintProperty last_commit_failed_timestamp_ns_property_;
	// Keeps track of how many allocations would have succeeded but failed due to fragmentation.
	inspect::UintProperty allocations_failed_fragmentation_property_;
	// This is the size of a the largest free contiguous region when high_water_mark_property_ was
	// last modified. It can be used to determine how much space was wasted due to fragmentation.
	inspect::UintProperty max_free_at_high_water_property_;
	// size - high_water_mark. This is used for cobalt reporting.
	inspect::UintProperty free_at_high_water_mark_property_;
	inspect::BoolProperty is_ready_property_;
	inspect::UintProperty failed_guard_region_checks_property_;
	inspect::UintProperty last_failed_guard_region_check_timestamp_ns_property_;
	// This tracks the sum of the size of the 10 largest free regions.
	inspect::UintProperty large_contiguous_region_sum_property_;

	// CMM / PCMM properties regarding loaning of pages to Zircon.
	//
	// The minimum efficiency since this class was created.
	double min_efficiency_ = 1.0;
	inspect::DoubleProperty loanable_efficiency_property_;
	inspect::DoubleProperty loanable_ratio_property_;
	inspect::UintProperty loanable_bytes_property_;
	inspect::UintProperty loanable_mebibytes_property_;

	zx::event trace_observer_event_;
	async::WaitMethod<ContiguousPooledMemoryAllocator,
	&ContiguousPooledMemoryAllocator::TraceObserverCallback>
	wait_{this};

	async::TaskMethod<ContiguousPooledMemoryAllocator,
	&ContiguousPooledMemoryAllocator::CheckGuardPageCallback>
	guard_checker_{this};

	// Split up the unused page check into relatively small pieces to avoid spiking the CPU or
	// causing latency spikes for normal sysmem requests.
	static constexpr uint32_t kUnusedCheckPartialCount = 64;
	// We do this one page at a time to hopefully stay within L1 on all devices, since in the allocate
	// path we're checking this amount of buffer space with memcmp(), then also zeroing the same space
	// with memset(). If we did so in chunks larger than L1, we'd be spilling cache lines to L2
	// or RAM during memcmp(), then pulling them back in during memset(). Cache sizes and tiers can
	// vary of course. This also determines the granularity at which we report pattern mismatch
	// failures, so 1 page is best here for that also.
	const uint64_t unused_guard_data_size_ = zx_system_get_page_size();
	bool unused_pages_guarded_ = false;
	zx::duration unused_page_check_cycle_period_ = kDefaultUnusedPageCheckCyclePeriod;
	uint64_t unused_check_phase_ = 0;
	async::TaskMethod<ContiguousPooledMemoryAllocator,
	&ContiguousPooledMemoryAllocator::CheckUnusedPagesCallback>
	unused_checker_{this};
	async::TaskMethod<ContiguousPooledMemoryAllocator,
	&ContiguousPooledMemoryAllocator::CheckUnusedRecentlyPagesCallback>
	unused_recently_checker_{this};
	SysmemMetrics& metrics_;

	// Regardless of is_ever_cpu_accessible_, we create a mapping of the whole vmo. When
	// is_always_cpu_accessible_ we can use the mapping to zero new buffers. When
	// is_ever_cpu_accessible_ we can use the mapping to write and check patterns in unused pages.
	uint8_t* mapping_ = nullptr;

	// While we'll typically pattern only 1 page per pattern period and adjust the pattern period to
	// get the % we want, being able to vary this might potentially help catch a suspected problem
	// faster; in any case it's simple enough to allow this to be adjusted.
	static constexpr uint64_t kUnusedToPatternPages = 1;
	const uint64_t unused_guard_pattern_period_bytes_ =
	kUnusedGuardPatternPeriodPages * zx_system_get_page_size();
	const uint64_t unused_to_pattern_bytes_ = kUnusedToPatternPages * zx_system_get_page_size();

	bool is_bti_fake_ = false;

	// We cap the number of DeletedRegion we're willing to track; otherwise the overhead could get a
	// bit excessive in pathological cases if we were to allow tracking a DeletedRegion per page for
	// example. This is optimized for update, not (at all) for lookup, since we only do lookups if
	// a page just failed a pattern check, which should never happen. If it does happen, we want to
	// know the paddr_t range and name of the most-recently-deleted region, and possibly the 2nd most
	// recently deleted region also, if it comes to that.
	static constexpr int32_t kNumDeletedRegions = 512;
	int32_t deleted_regions_count_ = 0;
	int32_t deleted_regions_next_ = 0;
	// Only allocate if we'll be checking unused pages.
	std::vector<DeletedRegion> deleted_regions_;

	// This is Zircon's zero page mapped a few times, read-only.
	uint64_t zero_page_vmo_size_ = fbl::round_up(64ull * 1024, zx_system_get_page_size());
	zx::vmo zero_page_vmo_;
	uint8_t* zero_page_vmo_base_ = nullptr;
	};

	} // namespace sysmem_driver

	#endif // SRC_DEVICES_SYSMEM_DRIVERS_SYSMEM_CONTIGUOUS_POOLED_MEMORY_ALLOCATOR_H_