| // Copyright 2020 The Fuchsia Authors |
| // |
| // Use of this source code is governed by a MIT-style |
| // license that can be found in the LICENSE file or at |
| // https://opensource.org/licenses/MIT |
| |
| #ifndef ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_CACHE_H_ |
| #define ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_CACHE_H_ |
| |
| #include <lib/arch/x86/cpuid.h> |
| |
| #include <optional> |
| #include <type_traits> |
| |
| namespace arch { |
| |
| // Represents a single cache. |
| struct CpuCacheLevelInfo { |
| size_t level; |
| X86CacheType type; |
| |
| // The size, in KiB, of the cache available to each processor. In the case of |
| // the last-level cache, however, this field might report the aggregate size |
| // of all such caches on the package. |
| size_t size_kb; |
| |
| // The number of sets in the cache available to each processor. In the case |
| // of the last-level cache, however, this field might report the aggregate |
| // number of sets across all such caches in the package. |
| size_t number_of_sets; // Indeterminate if zero. |
| size_t ways_of_associativity; // Indeterminate if zero. |
| std::optional<bool> fully_associative; // Indeterminate if std::nullopt. |
| |
| // The number of bits to shift an APIC ID to get the associated "share ID": |
| // processors with coinciding share IDs share this cache. If std::nullopt, |
| // then it is indeterminate what the cache's shift is. |
| std::optional<size_t> share_id_shift; |
| }; |
| |
| // Gives information on the set of caches in a package. |
| class CpuCacheInfo { |
| public: |
| using iterator = const CpuCacheLevelInfo*; |
| using const_iterator = iterator; |
| |
| template <typename CpuidIoProvider, |
| // To avoid precedence over copy and move constructors. |
| typename = std::enable_if_t<!std::is_same_v<CpuidIoProvider, CpuCacheInfo>>> |
| explicit CpuCacheInfo(CpuidIoProvider&& io) { |
| // We first try the Intel v2 leaves - and then the AMD v2 leaves. |
| // Hypervisors on AMD hosts might lay CPUID values in the Intel style - and |
| // there is no harm in doing this in general as AMD hardware will tend to |
| // reserve these Intel leaves as zero. |
| |
| if (TryV2Topology<CpuidIntelCacheTopologyA, CpuidIntelCacheTopologyB, CpuidIntelCacheTopologyC, |
| CpuidMaximumLeaf>(io)) { |
| return; |
| } |
| |
| // [amd/vol3]: E.4.15 Function 8000_001Dh—Cache Topology Information. |
| // |
| // CpuidAmdCacheTopologyA's leaf (0x8000'001d) is reserved if the |
| // topology extension feature is not implemented. |
| if (io.template Read<CpuidAmdFeatureFlagsC>().topology_extensions() && |
| TryV2Topology<CpuidAmdCacheTopologyA, CpuidAmdCacheTopologyB, // |
| CpuidAmdCacheTopologyC, CpuidMaximumExtendedLeaf>(io)) { |
| return; |
| } |
| |
| // The extended leaves explicitly enumerate information about L1d, L1i, L2, |
| // and L3, which was the original means of figuring out cache topology on |
| // AMD. |
| if (io.template Read<CpuidMaximumExtendedLeaf>().leaf() >= CpuidL3CacheInformation::kLeaf) { |
| const auto l1d = io.template Read<CpuidL1DataCacheInformation>(); |
| const auto l1i = io.template Read<CpuidL1InstructionCacheInformation>(); |
| const auto l2 = io.template Read<CpuidL2CacheInformation>(); |
| const auto l3 = io.template Read<CpuidL3CacheInformation>(); |
| |
| caches_[0] = { |
| .level = 1, |
| .type = X86CacheType::kData, |
| .size_kb = l1d.size_kb(), |
| .ways_of_associativity = l1d.ways_of_associativity(), |
| .fully_associative = l1d.fully_associative(), |
| }; |
| caches_[1] = { |
| .level = 1, |
| .type = X86CacheType::kInstruction, |
| .size_kb = l1i.size_kb(), |
| .ways_of_associativity = l1i.ways_of_associativity(), |
| .fully_associative = l1i.fully_associative(), |
| }; |
| caches_[2] = { |
| .level = 2, |
| .type = X86CacheType::kUnified, |
| .size_kb = l2.size_kb(), |
| .ways_of_associativity = l2.ways_of_associativity(), |
| .fully_associative = l2.fully_associative(), |
| }; |
| size_ = 3; |
| |
| if (l3.size()) { |
| caches_[3] = { |
| .level = 3, |
| .type = X86CacheType::kUnified, |
| // [amd/vol3]: E.4.5 Function 8000_0006h—L2 Cache and TLB and L3 Cache Information. |
| // |
| // `l3.size()` actually provides bounds for the total size of L3 |
| // cache across the package, in terms of 512 KiB blocks: |
| // l3.size() * 512 ≤ total size KiB < (l3.size() + 1) * 512 |
| // In practice, the total size is a multiple of 512 and this |
| // reports the actual total size. |
| .size_kb = 512 * l3.size(), |
| .ways_of_associativity = l3.ways_of_associativity(), |
| .fully_associative = l3.fully_associative(), |
| }; |
| size_ = 4; |
| } |
| } |
| } |
| |
| CpuCacheInfo() = delete; |
| |
| iterator begin() const { return caches_; } |
| |
| iterator end() const { return caches_ + size_; } |
| |
| size_t size() const { return size_; } |
| |
| bool empty() const { return size_ == 0; } |
| |
| // Returns information on the last-level cache. |
| const CpuCacheLevelInfo& back() const { |
| ZX_DEBUG_ASSERT(size_ > 0); |
| return caches_[size_ - 1]; |
| } |
| |
| private: |
| // A split L1 and unified L2, L3, L4 caches makes five. |
| static constexpr size_t kMaxNumCaches = 5; |
| |
| // We templatize this so that we can supply either the Intel or AMD V2 cache |
| // topology leaves (0x4 and 0x8000'001d, respectively), which are identically |
| // laid out. |
| template <template <uint32_t> class CacheTopologyA, // |
| template <uint32_t> class CacheTopologyB, // |
| template <uint32_t> class CacheTopologyC, // |
| typename MaximumLeaf, // |
| typename CpuidIoProvider> |
| bool TryV2Topology(CpuidIoProvider&& io) { |
| if (io.template Read<MaximumLeaf>().leaf() < CacheTopologyA<0>::kLeaf) { |
| return false; |
| } |
| |
| for (size_t i = 0; i < kMaxNumCaches; ++i, ++size_) { |
| const auto eax = Read<CacheTopologyA>(io, i); |
| const auto ebx = Read<CacheTopologyB>(io, i); |
| const auto ecx = Read<CacheTopologyC>(io, i); |
| |
| if (eax.cache_type() == X86CacheType::kNull) { |
| break; |
| } |
| const size_t size_bytes = (ebx.ways() + 1) * (ebx.physical_line_partitions() + 1) * |
| (ebx.system_coherency_line_size() + 1) * (ecx.sets() + 1); |
| caches_[i] = { |
| .level = eax.cache_level(), |
| .type = eax.cache_type(), |
| .size_kb = size_bytes / 1024, |
| .number_of_sets = ecx.sets() + 1, |
| .ways_of_associativity = ebx.ways() + 1, |
| .fully_associative = eax.fully_associative() != 0, |
| .share_id_shift = CeilLog2(eax.max_sharing_logical_processors() + 1), |
| }; |
| } |
| // We expect at least split L1 caches and a L2 cache. If, for whatever reason, |
| // less than expected was encoded, fall back to other means to populate |
| // `caches`. |
| return size_ >= 3; |
| } |
| |
| // A shim to dynamically look up statically parametrized values. |
| template <template <uint32_t> class CpuidValueType, // |
| typename CpuidIoProvider> |
| static auto Read(CpuidIoProvider&& io, size_t n) { |
| switch (n) { |
| case 0: |
| return io.template Read<CpuidValueType<0>>(); |
| case 1: |
| return io.template Read<CpuidValueType<1>>(); |
| case 2: |
| return io.template Read<CpuidValueType<2>>(); |
| case 3: |
| return io.template Read<CpuidValueType<3>>(); |
| case 4: |
| return io.template Read<CpuidValueType<4>>(); |
| default: |
| static_assert(kMaxNumCaches == 5); |
| ZX_DEBUG_ASSERT(n < kMaxNumCaches); |
| __UNREACHABLE; |
| } |
| } |
| |
| // TODO(C++20): use `cpp20::bit_ceil()` instead. |
| static size_t CeilLog2(size_t n) { |
| ZX_DEBUG_ASSERT(n > 0); |
| size_t exp = 0; |
| while ((size_t{1} << exp) < n) { |
| exp++; |
| } |
| return exp; |
| } |
| |
| CpuCacheLevelInfo caches_[kMaxNumCaches]; |
| // Gives the actual number of `caches_` on which we have information. |
| size_t size_ = 0; |
| }; |
| |
| } // namespace arch |
| |
| #endif // ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_CACHE_H_ |