zircon/kernel/lib/arch/include/lib/arch/x86/apic-id.h - fuchsia - Git at Google

 // Copyright 2020 The Fuchsia Authors
 //
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file or at
 // https://opensource.org/licenses/MIT

 #ifndef ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_APIC_ID_H_
 #define ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_APIC_ID_H_

 #include <lib/arch/x86/cpuid.h>
 #include <lib/stdcompat/bit.h>

 namespace arch {

 // Returns the APIC ID - x2APIC if supported - associated with the logical
 // processor in turn associated with the provided CpuidIoProvider.
 template <typename CpuidIoProvider>
 inline uint32_t GetApicId(CpuidIoProvider&& io) {
   using LevelType = CpuidTopologyEnumerationC::TopologyLevelType;

   // [intel/vol3]: 8.9.2  Hierarchical Mapping of CPUID Extended Topology Leaf.
   //
   // For extended topology enumeration, if the first level does not encode the
   // "SMT" level (a spec'ed expecation), then we assume the associated leaves
   // to be invalid.
   if (CpuidSupports<CpuidV2TopologyEnumerationA<0>>(io) &&
       io.template Read<CpuidV2TopologyEnumerationC<0>>().level_type() == LevelType::kSmt) {
     return io.template Read<CpuidV2TopologyEnumerationD<0>>().x2apic_id();
   }
   if (CpuidSupports<CpuidV1TopologyEnumerationA<0>>(io) &&
       io.template Read<CpuidV1TopologyEnumerationC<0>>().level_type() == LevelType::kSmt) {
     return io.template Read<CpuidV1TopologyEnumerationD<0>>().x2apic_id();
   }

   if (CpuidSupports<CpuidExtendedApicId>(io)) {
     return io.template Read<CpuidExtendedApicId>().x2apic_id();
   }

   return io.template Read<CpuidProcessorInfo>().initial_apic_id();
 }

 // ApicIdDecoder is a utility for extracting particular topological level
 // IDs from an (x2)APIC ID.
 //
 // In full generality, an APIC ID might decompose as follows:
 //
 // [intel/vol3]: Figure 8-5.  Generalized Seven Level Interpretation of the APIC ID.
 // -----------------------------------------------------------------------------
 // | CLUSTER ID | PACKAGE ID | DIE ID | TILE ID | MODULE ID | CORE ID | SMT ID |
 // -----------------------------------------------------------------------------
 //
 // where the full ID width is 32-bit (if x2APIC) or 8-bit.
 //
 // This, however, is higher fidelity than we are able to make use of. Since
 // CLUSTER ID and PACKAGE_ID are not directly enumerable from CPUID, we elide
 // the two IDs into a single PACKAGE ID, defined as the rest of the ID above
 // DIE. Moreover, the system currently has no use for enumerating tiles and
 // modules directly (which is also a practice that AMD does not do): we elide
 // the TILE and MODULE IDs into DIE ID alone. Accordingly, ApicIdDecoder
 // partitions up the APIC address space as
 // ------------------------------------------
 // | PACKAGE ID | DIE ID | CORE ID | SMT ID |
 // ------------------------------------------
 class ApicIdDecoder {
  public:
   template <typename CpuidIoProvider,
             // To avoid precedence over copy and move constructors.
             typename = std::enable_if_t<!std::is_same_v<CpuidIoProvider, ApicIdDecoder>>>
   explicit ApicIdDecoder(CpuidIoProvider&& io) {
     // [intel/vol3]: Example 8-21.  Support Routines for Identifying Package,
     // Core and Logical Processors from 8-bit Initial APIC ID.
     // [amd/vol3]: E.5.1  Legacy Method.
     //
     // When HTT ("Hyper-Threading Technology") is not advertised, the package
     // contains a single logical processor. This is counter-intuitive, but
     // Intel cores that do not actually have SMT available may still present
     // HTT == 1; moreover, in the case of AMD, HTT means "either that there is
     // more than one thread per core or more than one core per compute unit".
     if (!io.template Read<CpuidFeatureFlagsD>().htt()) {
       return;
     }

     // First try the extended topology leaves, which may work with older AMD
     // models. The "V2" leaf 0x1f is preferred - if available - to the "V1"
     // leaf 0xb.
     if (TryExtendedTopology<CpuidV2TopologyEnumerationA, CpuidV2TopologyEnumerationC>(io) ||
         TryExtendedTopology<CpuidV1TopologyEnumerationA, CpuidV1TopologyEnumerationC>(io)) {
       // The DIE level might not have been explicitly enumerated. If it does
       // not seem so, redefine the cumulatve die-and-below ID width to be the
       // rounded binary order of the maximum number of addressable logical
       // processors per package, which should always coincide in general.
       if (die_id_cumulative_width_ == core_id_cumulative_width_) {
         die_id_cumulative_width_ = CeilLog2(MaxNumLogicalProcessors(io));
       }
       return;
     }

     // Maximum per package, that is.
     size_t max_logical_processors = MaxNumLogicalProcessors(io);
     size_t max_cores = 1;
     size_t max_dies = 1;
     auto finalize = [&]() {
       if (!(max_logical_processors >= max_cores && max_cores >= max_dies && max_dies > 0)) {
         return;
       }

       smt_id_width_ = CeilLog2(max_logical_processors / max_cores);
       core_id_cumulative_width_ = CeilLog2(max_cores / max_dies) + smt_id_width_;
       die_id_cumulative_width_ = CeilLog2(max_logical_processors);
     };

     // [intel/vol3]: Example 8-21.  Support Routines for Identifying
     // Package, Core and Logical Processors from 8-bit Initial APIC ID.
     if (CpuidSupports<CpuidIntelCacheTopologyA<0>>(io)) {
       const auto zeroth_cache_topology = io.template Read<CpuidIntelCacheTopologyA<0>>();
       if (zeroth_cache_topology.cache_type() != X86CacheType::kNull) {
         // The field encodes one less than the real count.
         max_cores = zeroth_cache_topology.max_cores() + 1;
         finalize();
         return;
       }
     }

     // Unfortunately, the AMD spec does not give a general way of
     // determining the maximum number of addressable cores and dies per
     // package, respectively. If leaf 0x8000'001e is supported (which
     // requires the topology extension feature to be advertised), then we
     // can give best-effort guesses of these quanities based on the actual
     // counts of dies per package and logical processors per core.
     if (CpuidSupports<CpuidComputeUnitInfo>(io)) {
       // We translate "compute unit" and "node" here as core and die,
       // respectively.
       max_dies = io.template Read<CpuidNodeInfo>().nodes_per_package() + 1;
       const size_t threads_per_core =
           io.template Read<CpuidComputeUnitInfo>().threads_per_compute_unit() + 1;
       max_cores = max_logical_processors / threads_per_core;
     }
     finalize();
   }

   ApicIdDecoder() = delete;

   uint32_t smt_id(uint32_t apic_id) const { return apic_id & ToMask(smt_id_width_); }

   uint32_t core_id(uint32_t apic_id) const {
     return (apic_id & ToMask(core_id_cumulative_width_)) >> smt_id_width_;
   }

   uint32_t die_id(uint32_t apic_id) const {
     return (apic_id & ToMask(die_id_cumulative_width_)) >> core_id_cumulative_width_;
   }

   uint32_t package_id(uint32_t apic_id) const { return apic_id >> die_id_cumulative_width_; }

  private:
   using TopologyLevelType = CpuidTopologyEnumerationC::TopologyLevelType;

   static constexpr size_t kMaxTopologyLevel = static_cast<size_t>(TopologyLevelType::kDie);

   // [intel/vol3]: Example 8-18.  Support Routines for Identifying Package,
   // Die, Core and Logical Processors from 32-bit x2APIC ID.
   //
   // Attempts to perform Intel's extended topology enumeration routine and
   // returns whether the attempt was successful. We templatize this so that we
   // can supply the CPUID value types of either V1 and V2 leaves (0x1f and 0xb,
   // respectively), which are identically laid out.
   template <template <uint32_t> class TopologyEnumerationA,
             template <uint32_t> class TopologyEnumerationC,  //
             typename CpuidIoProvider>
   bool TryExtendedTopology(CpuidIoProvider&& io) {
     if (!CpuidSupports<TopologyEnumerationA<0>>(io)) {
       return false;
     }

     for (size_t i = 0; i < kMaxTopologyLevel; ++i) {
       const auto eax = Read<TopologyEnumerationA>(io, i);
       const auto ecx = Read<TopologyEnumerationC>(io, i);

       // The above reference explains that SMT is expected to be the first
       // level.
       const auto level_type = ecx.level_type();
       if (i == 0 && level_type != TopologyLevelType::kSmt) {
         return false;
       }
       const auto shift = eax.next_level_apic_id_shift();
       switch (level_type) {
         case TopologyLevelType::kInvalid:
           return true;  // Signals the end of iteration.
         case TopologyLevelType::kSmt:
           smt_id_width_ = shift;
           core_id_cumulative_width_ = shift;
           die_id_cumulative_width_ = shift;
           break;
         case TopologyLevelType::kCore:
           core_id_cumulative_width_ = shift;
           die_id_cumulative_width_ = shift;
           break;
         // See class documentation regarding the elision of MODULE and TILE.
         case TopologyLevelType::kModule:
         case TopologyLevelType::kTile:
         case TopologyLevelType::kDie:
           die_id_cumulative_width_ = shift;
           break;
       }
     }

     // Something went wrong; iteration should have finished in hitting on a
     // kInvalid level.
     return false;
   }

   // A shim to dynamically look up statically parametrized values.
   template <template <uint32_t> class TopologyEnumeration, typename CpuidIoProvider>
   static auto Read(CpuidIoProvider&& io, size_t n) {
     switch (n) {
       case 0:
         return io.template Read<TopologyEnumeration<0>>();
       case 1:
         return io.template Read<TopologyEnumeration<1>>();
       case 2:
         return io.template Read<TopologyEnumeration<2>>();
       case 3:
         return io.template Read<TopologyEnumeration<3>>();
       case 4:
         return io.template Read<TopologyEnumeration<4>>();
       case 5:
         return io.template Read<TopologyEnumeration<5>>();
       default:
         static_assert(kMaxTopologyLevel == 5);
         ZX_DEBUG_ASSERT(n <= kMaxTopologyLevel);
         __UNREACHABLE;
     }
   }

   // Returns the maximum addressible number of logical processors per package.
   // Both Intel and AMD spec ways to determine this quantity.
   template <typename CpuidIoProvider>
   size_t MaxNumLogicalProcessors(CpuidIoProvider&& io) {
     // The Intel max.
     size_t max = io.template Read<CpuidProcessorInfo>().max_logical_processors();

     // The AMD max. For AMD hardware, the quantity above gives the actual count
     // of logical processors instead of the maximum number of addressible ones.
     if (CpuidSupports<CpuidExtendedSizeInfo>(io)) {
       // [amd/vol3]: E.5.2  Extended Method.
       const auto size_ids = io.template Read<CpuidExtendedSizeInfo>();
       const size_t amd_max =
           size_ids.apic_id_size() ? (1 << size_ids.apic_id_size()) : size_ids.nc() + 1;
       max = std::max(amd_max, max);
     }
     return max;
   }

   static size_t CeilLog2(size_t n) { return cpp20::countr_zero(cpp20::bit_ceil(n)); }

   static uint32_t ToMask(size_t width) { return ~(uint32_t{0xffffffff} << width); }

   size_t smt_id_width_ = 0;
   // CORE ID width + SMT ID width.
   size_t core_id_cumulative_width_ = 0;
   // DIE ID width + CORE ID width + SMT ID width.
   size_t die_id_cumulative_width_ = 0;
 };

 }  // namespace arch

 #endif  // ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_APIC_ID_H_
	// Copyright 2020 The Fuchsia Authors
	//
	// Use of this source code is governed by a MIT-style
	// license that can be found in the LICENSE file or at
	// https://opensource.org/licenses/MIT

	#ifndef ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_APIC_ID_H_
	#define ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_APIC_ID_H_

	#include <lib/arch/x86/cpuid.h>
	#include <lib/stdcompat/bit.h>

	namespace arch {

	// Returns the APIC ID - x2APIC if supported - associated with the logical
	// processor in turn associated with the provided CpuidIoProvider.
	template <typename CpuidIoProvider>
	inline uint32_t GetApicId(CpuidIoProvider&& io) {
	using LevelType = CpuidTopologyEnumerationC::TopologyLevelType;

	// [intel/vol3]: 8.9.2 Hierarchical Mapping of CPUID Extended Topology Leaf.
	//
	// For extended topology enumeration, if the first level does not encode the
	// "SMT" level (a spec'ed expecation), then we assume the associated leaves
	// to be invalid.
	if (CpuidSupports<CpuidV2TopologyEnumerationA<0>>(io) &&
	io.template Read<CpuidV2TopologyEnumerationC<0>>().level_type() == LevelType::kSmt) {
	return io.template Read<CpuidV2TopologyEnumerationD<0>>().x2apic_id();
	}
	if (CpuidSupports<CpuidV1TopologyEnumerationA<0>>(io) &&
	io.template Read<CpuidV1TopologyEnumerationC<0>>().level_type() == LevelType::kSmt) {
	return io.template Read<CpuidV1TopologyEnumerationD<0>>().x2apic_id();
	}

	if (CpuidSupports<CpuidExtendedApicId>(io)) {
	return io.template Read<CpuidExtendedApicId>().x2apic_id();
	}

	return io.template Read<CpuidProcessorInfo>().initial_apic_id();
	}

	// ApicIdDecoder is a utility for extracting particular topological level
	// IDs from an (x2)APIC ID.
	//
	// In full generality, an APIC ID might decompose as follows:
	//
	// [intel/vol3]: Figure 8-5. Generalized Seven Level Interpretation of the APIC ID.
	// -----------------------------------------------------------------------------
	// \| CLUSTER ID \| PACKAGE ID \| DIE ID \| TILE ID \| MODULE ID \| CORE ID \| SMT ID \|
	// -----------------------------------------------------------------------------
	//
	// where the full ID width is 32-bit (if x2APIC) or 8-bit.
	//
	// This, however, is higher fidelity than we are able to make use of. Since
	// CLUSTER ID and PACKAGE_ID are not directly enumerable from CPUID, we elide
	// the two IDs into a single PACKAGE ID, defined as the rest of the ID above
	// DIE. Moreover, the system currently has no use for enumerating tiles and
	// modules directly (which is also a practice that AMD does not do): we elide
	// the TILE and MODULE IDs into DIE ID alone. Accordingly, ApicIdDecoder
	// partitions up the APIC address space as
	// ------------------------------------------
	// \| PACKAGE ID \| DIE ID \| CORE ID \| SMT ID \|
	// ------------------------------------------
	class ApicIdDecoder {
	public:
	template <typename CpuidIoProvider,
	// To avoid precedence over copy and move constructors.
	typename = std::enable_if_t<!std::is_same_v<CpuidIoProvider, ApicIdDecoder>>>
	explicit ApicIdDecoder(CpuidIoProvider&& io) {
	// [intel/vol3]: Example 8-21. Support Routines for Identifying Package,
	// Core and Logical Processors from 8-bit Initial APIC ID.
	// [amd/vol3]: E.5.1 Legacy Method.
	//
	// When HTT ("Hyper-Threading Technology") is not advertised, the package
	// contains a single logical processor. This is counter-intuitive, but
	// Intel cores that do not actually have SMT available may still present
	// HTT == 1; moreover, in the case of AMD, HTT means "either that there is
	// more than one thread per core or more than one core per compute unit".
	if (!io.template Read<CpuidFeatureFlagsD>().htt()) {
	return;
	}

	// First try the extended topology leaves, which may work with older AMD
	// models. The "V2" leaf 0x1f is preferred - if available - to the "V1"
	// leaf 0xb.
	if (TryExtendedTopology<CpuidV2TopologyEnumerationA, CpuidV2TopologyEnumerationC>(io) \|\|
	TryExtendedTopology<CpuidV1TopologyEnumerationA, CpuidV1TopologyEnumerationC>(io)) {
	// The DIE level might not have been explicitly enumerated. If it does
	// not seem so, redefine the cumulatve die-and-below ID width to be the
	// rounded binary order of the maximum number of addressable logical
	// processors per package, which should always coincide in general.
	if (die_id_cumulative_width_ == core_id_cumulative_width_) {
	die_id_cumulative_width_ = CeilLog2(MaxNumLogicalProcessors(io));
	}
	return;
	}

	// Maximum per package, that is.
	size_t max_logical_processors = MaxNumLogicalProcessors(io);
	size_t max_cores = 1;
	size_t max_dies = 1;
	auto finalize = [&]() {
	if (!(max_logical_processors >= max_cores && max_cores >= max_dies && max_dies > 0)) {
	return;
	}

	smt_id_width_ = CeilLog2(max_logical_processors / max_cores);
	core_id_cumulative_width_ = CeilLog2(max_cores / max_dies) + smt_id_width_;
	die_id_cumulative_width_ = CeilLog2(max_logical_processors);
	};

	// [intel/vol3]: Example 8-21. Support Routines for Identifying
	// Package, Core and Logical Processors from 8-bit Initial APIC ID.
	if (CpuidSupports<CpuidIntelCacheTopologyA<0>>(io)) {
	const auto zeroth_cache_topology = io.template Read<CpuidIntelCacheTopologyA<0>>();
	if (zeroth_cache_topology.cache_type() != X86CacheType::kNull) {
	// The field encodes one less than the real count.
	max_cores = zeroth_cache_topology.max_cores() + 1;
	finalize();
	return;
	}
	}

	// Unfortunately, the AMD spec does not give a general way of
	// determining the maximum number of addressable cores and dies per
	// package, respectively. If leaf 0x8000'001e is supported (which
	// requires the topology extension feature to be advertised), then we
	// can give best-effort guesses of these quanities based on the actual
	// counts of dies per package and logical processors per core.
	if (CpuidSupports<CpuidComputeUnitInfo>(io)) {
	// We translate "compute unit" and "node" here as core and die,
	// respectively.
	max_dies = io.template Read<CpuidNodeInfo>().nodes_per_package() + 1;
	const size_t threads_per_core =
	io.template Read<CpuidComputeUnitInfo>().threads_per_compute_unit() + 1;
	max_cores = max_logical_processors / threads_per_core;
	}
	finalize();
	}

	ApicIdDecoder() = delete;

	uint32_t smt_id(uint32_t apic_id) const { return apic_id & ToMask(smt_id_width_); }

	uint32_t core_id(uint32_t apic_id) const {
	return (apic_id & ToMask(core_id_cumulative_width_)) >> smt_id_width_;
	}

	uint32_t die_id(uint32_t apic_id) const {
	return (apic_id & ToMask(die_id_cumulative_width_)) >> core_id_cumulative_width_;
	}

	uint32_t package_id(uint32_t apic_id) const { return apic_id >> die_id_cumulative_width_; }

	private:
	using TopologyLevelType = CpuidTopologyEnumerationC::TopologyLevelType;

	static constexpr size_t kMaxTopologyLevel = static_cast<size_t>(TopologyLevelType::kDie);

	// [intel/vol3]: Example 8-18. Support Routines for Identifying Package,
	// Die, Core and Logical Processors from 32-bit x2APIC ID.
	//
	// Attempts to perform Intel's extended topology enumeration routine and
	// returns whether the attempt was successful. We templatize this so that we
	// can supply the CPUID value types of either V1 and V2 leaves (0x1f and 0xb,
	// respectively), which are identically laid out.
	template <template <uint32_t> class TopologyEnumerationA,
	template <uint32_t> class TopologyEnumerationC, //
	typename CpuidIoProvider>
	bool TryExtendedTopology(CpuidIoProvider&& io) {
	if (!CpuidSupports<TopologyEnumerationA<0>>(io)) {
	return false;
	}

	for (size_t i = 0; i < kMaxTopologyLevel; ++i) {
	const auto eax = Read<TopologyEnumerationA>(io, i);
	const auto ecx = Read<TopologyEnumerationC>(io, i);

	// The above reference explains that SMT is expected to be the first
	// level.
	const auto level_type = ecx.level_type();
	if (i == 0 && level_type != TopologyLevelType::kSmt) {
	return false;
	}
	const auto shift = eax.next_level_apic_id_shift();
	switch (level_type) {
	case TopologyLevelType::kInvalid:
	return true; // Signals the end of iteration.
	case TopologyLevelType::kSmt:
	smt_id_width_ = shift;
	core_id_cumulative_width_ = shift;
	die_id_cumulative_width_ = shift;
	break;
	case TopologyLevelType::kCore:
	core_id_cumulative_width_ = shift;
	die_id_cumulative_width_ = shift;
	break;
	// See class documentation regarding the elision of MODULE and TILE.
	case TopologyLevelType::kModule:
	case TopologyLevelType::kTile:
	case TopologyLevelType::kDie:
	die_id_cumulative_width_ = shift;
	break;
	}
	}

	// Something went wrong; iteration should have finished in hitting on a
	// kInvalid level.
	return false;
	}

	// A shim to dynamically look up statically parametrized values.
	template <template <uint32_t> class TopologyEnumeration, typename CpuidIoProvider>
	static auto Read(CpuidIoProvider&& io, size_t n) {
	switch (n) {
	case 0:
	return io.template Read<TopologyEnumeration<0>>();
	case 1:
	return io.template Read<TopologyEnumeration<1>>();
	case 2:
	return io.template Read<TopologyEnumeration<2>>();
	case 3:
	return io.template Read<TopologyEnumeration<3>>();
	case 4:
	return io.template Read<TopologyEnumeration<4>>();
	case 5:
	return io.template Read<TopologyEnumeration<5>>();
	default:
	static_assert(kMaxTopologyLevel == 5);
	ZX_DEBUG_ASSERT(n <= kMaxTopologyLevel);
	__UNREACHABLE;
	}
	}

	// Returns the maximum addressible number of logical processors per package.
	// Both Intel and AMD spec ways to determine this quantity.
	template <typename CpuidIoProvider>
	size_t MaxNumLogicalProcessors(CpuidIoProvider&& io) {
	// The Intel max.
	size_t max = io.template Read<CpuidProcessorInfo>().max_logical_processors();

	// The AMD max. For AMD hardware, the quantity above gives the actual count
	// of logical processors instead of the maximum number of addressible ones.
	if (CpuidSupports<CpuidExtendedSizeInfo>(io)) {
	// [amd/vol3]: E.5.2 Extended Method.
	const auto size_ids = io.template Read<CpuidExtendedSizeInfo>();
	const size_t amd_max =
	size_ids.apic_id_size() ? (1 << size_ids.apic_id_size()) : size_ids.nc() + 1;
	max = std::max(amd_max, max);
	}
	return max;
	}

	static size_t CeilLog2(size_t n) { return cpp20::countr_zero(cpp20::bit_ceil(n)); }

	static uint32_t ToMask(size_t width) { return ~(uint32_t{0xffffffff} << width); }

	size_t smt_id_width_ = 0;
	// CORE ID width + SMT ID width.
	size_t core_id_cumulative_width_ = 0;
	// DIE ID width + CORE ID width + SMT ID width.
	size_t die_id_cumulative_width_ = 0;
	};

	} // namespace arch

	#endif // ZIRCON_KERNEL_LIB_ARCH_INCLUDE_LIB_ARCH_X86_APIC_ID_H_