blob: fc17a6c6a94a88d1fc8fb85c865beec1c42a6c59 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "usage_pixel_format_cost.h"
#include <fidl/fuchsia.sysmem2/cpp/wire.h>
#include <lib/ddk/platform-defs.h>
#include <lib/fidl/llcpp/arena.h>
#include <lib/image-format/image_format.h>
#include <zircon/assert.h>
#include <list>
#include <map>
#include "macros.h"
// TODO(fxbug.dev/68491): This platform/board/etc-specific allocation/creation
// policy code belongs in a platform/board/etc-specific binary.
namespace sysmem_driver {
namespace {
// The local Platform definition is different than the PID(s) in platform-defs.h
// in that this local enum includes values that can be used as catch-all for any
// PID in a set of PIDs, typically named with a _GENERIC suffix.
//
// Membership of a PID in a more _GENERIC category is via the next_platform
// field.
//
// Some values of this enum are 1:1 with specific PID values, while others are
// essentially more generic categories (groupings) of PID values. This allows
// an entry for a more-specific Platform value to effectively share entries of
// a more-generic Platform value.
enum Platform {
kPlatform_None,
kPlatform_Generic,
kPlatform_Arm_Mali,
kPlatform_Amlogic_Generic,
kPlatform_Amlogic_S912,
kPlatform_Amlogic_S905D2,
kPlatform_Amlogic_T931,
kPlatform_Amlogic_A311D,
};
constexpr uint64_t MakeVidPidKey(uint32_t vid, uint32_t pid) {
return (static_cast<uint64_t>(vid) << 32) | pid;
}
// Map from PID (platform id) to Platform value.
const std::map<uint64_t, Platform> kPlatformTranslation = {
{MakeVidPidKey(PDEV_VID_AMLOGIC, PDEV_PID_AMLOGIC_S912), kPlatform_Amlogic_S912},
{MakeVidPidKey(PDEV_VID_AMLOGIC, PDEV_PID_AMLOGIC_S905D2), kPlatform_Amlogic_S905D2},
{MakeVidPidKey(PDEV_VID_AMLOGIC, PDEV_PID_AMLOGIC_T931), kPlatform_Amlogic_T931},
{MakeVidPidKey(PDEV_VID_AMLOGIC, PDEV_PID_AMLOGIC_A311D), kPlatform_Amlogic_A311D},
};
// A UsagePixelFormatCostEntry with more query usage bits in
// required_buffer_usage_bits is considered a closer match.
//
// If two UsagePixelFormatCostEntry(s) have equal number of corresponding usage
// bits in required_buffer_usage_bits, the entry occurring first in the overall
// effective list of CostEntry(s) for the PID is preferred. This causes any
// later-listed otherwise-equally-close-match to be ignored.
struct UsagePixelFormatCostEntry {
// The query's pixel_format must match for this entry to be considered.
fuchsia_sysmem2::wire::PixelFormat pixel_format;
// A query's usage bits must contain all these usage bits for this entry to
// be considered.
fuchsia_sysmem2::wire::BufferUsage required_buffer_usage_bits;
// First the entry that's the best match for the GetCost() query is
// determined. If this entry is selected as the best match for the query,
// this is the cost returned by GetCost().
const double cost;
};
struct PlatformCostsEntry {
// platform
const Platform platform;
// The next_platform can be kPlatform_None in which case the effective
// overall list is terminated, or next_pid can chain into another Platform
// enum value which will be considered part of this platform's list. In
// this way, more specific Platform values can chain into less-specific
// platform values.
const Platform next_platform;
const std::list<const UsagePixelFormatCostEntry>& costs;
};
static void AddRgbaPixelFormat(fidl::AnyArena& allocator, uint64_t format_modifier, double cost,
std::list<const UsagePixelFormatCostEntry>& result) {
// Both RGBA and BGRA versions have similar cost, if they're supported.
for (auto format : {fuchsia_sysmem2::wire::PixelFormatType::kBgra32,
fuchsia_sysmem2::wire::PixelFormatType::kR8G8B8A8}) {
fuchsia_sysmem2::wire::PixelFormat pixel_format(allocator);
pixel_format.set_type(format);
pixel_format.set_format_modifier_value(allocator, format_modifier);
fuchsia_sysmem2::wire::BufferUsage buffer_usage(allocator);
buffer_usage.set_none(0u);
buffer_usage.set_cpu(0u);
buffer_usage.set_vulkan(0u);
buffer_usage.set_display(0u);
buffer_usage.set_video(0u);
result.emplace_back(UsagePixelFormatCostEntry{
// .pixel_format
std::move(pixel_format),
// .required_buffer_usage_bits
std::move(buffer_usage),
// .cost
cost,
});
}
}
// Since we know exactly how much space we need to avoid using heap, and because this buffer is
// exactly full of stuff that has trivial dtor, we can (marginally) justify avoiding the heap for
// this stuff, since I happen to already know the appropriate size this time. However, it's a good
// idea to avoid spending the time to update this number each time more entries are added, since the
// opportunity cost of that time will almost certainly be more than any real savings from updating
// this number.
constexpr size_t kAllocatorSize = 3792;
fidl::Arena<kAllocatorSize> allocator;
const std::list<const UsagePixelFormatCostEntry> kArm_Mali_Cost_Entries = [] {
std::list<const UsagePixelFormatCostEntry> result;
// Split block is slightly worse than non-split-block for GPU<->GPU, but better for GPU->display.
constexpr double kSplitCost = 10.0;
constexpr double kNonYuvCost = 100.0;
// Tiled headers enable more optimizations and are more efficient, but alignment requirements make
// them take up more RAM. They're still worthwhile for our usecases.
constexpr double kNonTiledHeaderCost = 500.0;
// Formats without sparse set are substantially worse for the GPU than sparse formats.
constexpr double kNonSparseCost = 1000.0;
constexpr double kNonTeCost = 2000.0;
// Non-16X16 can have large advantages for the display, but it's much worse for the GPU.
constexpr double kNon16X16Cost = 4000.0;
uint64_t modifiers[] = {
fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16SplitBlockSparseYuvTeTiledHeader,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16Te,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc32X8Te,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16SplitBlockSparseYuvTe,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16SplitBlockSparseYuvTiledHeader,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16SplitBlockSparseYuv,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16YuvTiledHeader,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16,
fuchsia_sysmem2::wire::kFormatModifierArmAfbc32X8};
for (auto modifier : modifiers) {
double cost = 0.0;
if (!(modifier & fuchsia_sysmem2::wire::kFormatModifierArmYuvBit))
cost += kNonYuvCost;
if (!(modifier & fuchsia_sysmem2::wire::kFormatModifierArmTiledHeaderBit))
cost += kNonTiledHeaderCost;
if (modifier & fuchsia_sysmem2::wire::kFormatModifierArmTiledHeaderBit)
cost += kSplitCost;
if (!(modifier & fuchsia_sysmem2::wire::kFormatModifierArmSparseBit))
cost += kNonSparseCost;
if (!(modifier & fuchsia_sysmem2::wire::kFormatModifierArmTeBit))
cost += kNonTeCost;
constexpr uint64_t kAfbcTypeMask = 0xf;
if ((modifier & kAfbcTypeMask) !=
(fuchsia_sysmem2::wire::kFormatModifierArmAfbc16X16 & kAfbcTypeMask))
cost += kNon16X16Cost;
AddRgbaPixelFormat(allocator, modifier, cost, result);
}
// Should be higher cost than all AFBC formats.
AddRgbaPixelFormat(allocator, fuchsia_sysmem2::wire::kFormatModifierArmLinearTe, 30000.0, result);
return result;
}();
const PlatformCostsEntry kArm_Mali_Costs = {
.platform = kPlatform_Arm_Mali,
.next_platform = kPlatform_Generic,
.costs = kArm_Mali_Cost_Entries,
};
const std::list<const UsagePixelFormatCostEntry> kAmlogic_Generic_Cost_Entries = [] {
std::list<const UsagePixelFormatCostEntry> result;
// NV12 weakly preferred for VIDEO_USAGE_HW_DECODER.
fuchsia_sysmem2::wire::PixelFormat pixel_format(allocator);
pixel_format.set_type(fuchsia_sysmem2::wire::PixelFormatType::kNv12);
fuchsia_sysmem2::wire::BufferUsage buffer_usage(allocator);
buffer_usage.set_none(0u);
buffer_usage.set_cpu(0u);
buffer_usage.set_vulkan(0u);
buffer_usage.set_display(0u);
buffer_usage.set_video(fuchsia_sysmem2::wire::kVideoUsageHwDecoder);
result.emplace_back(UsagePixelFormatCostEntry{
// .pixel_format
std::move(pixel_format),
// .required_buffer_usage_bits
std::move(buffer_usage),
// .cost
100.0L,
});
return result;
}();
// These costs are expected to be true on every platform.
const std::list<const UsagePixelFormatCostEntry> kGeneric_Cost_Entries = [] {
std::list<const UsagePixelFormatCostEntry> result;
AddRgbaPixelFormat(allocator, fuchsia_sysmem2::wire::kFormatModifierIntelI915YfTiledCcs, 500.0,
result);
AddRgbaPixelFormat(allocator, fuchsia_sysmem2::wire::kFormatModifierIntelI915YTiledCcs, 600.0,
result);
AddRgbaPixelFormat(allocator, fuchsia_sysmem2::wire::kFormatModifierIntelI915YfTiled, 1000.0,
result);
AddRgbaPixelFormat(allocator, fuchsia_sysmem2::wire::kFormatModifierIntelI915YTiled, 2000.0,
result);
AddRgbaPixelFormat(allocator, fuchsia_sysmem2::wire::kFormatModifierIntelI915XTiled, 3000.0,
result);
// LOG(INFO, "usage_pixel_format_cost.cc - allocator.debug_needed_buffer_size(): %zu",
// allocator.inner_allocator().debug_needed_buffer_size());
return result;
}();
const PlatformCostsEntry kAmlogic_Generic_Costs = {
.platform = kPlatform_Amlogic_Generic,
.next_platform = kPlatform_Arm_Mali,
.costs = kAmlogic_Generic_Cost_Entries,
};
// kAmlogic_S912_Cost_Entries will go here.
const PlatformCostsEntry kAmlogic_S912_Costs = {
.platform = kPlatform_Amlogic_S912,
.next_platform = kPlatform_Amlogic_Generic,
.costs = std::list<const UsagePixelFormatCostEntry>(),
};
// kAmlogic_S905D2_Cost_Entries will go here.
const PlatformCostsEntry kAmlogic_S905D2_Costs = {
.platform = kPlatform_Amlogic_S905D2,
.next_platform = kPlatform_Amlogic_Generic,
.costs = std::list<const UsagePixelFormatCostEntry>(),
};
// kAmlogic_T931_Cost_Entries will go here.
const PlatformCostsEntry kAmlogic_T931_Costs = {
.platform = kPlatform_Amlogic_T931,
.next_platform = kPlatform_Amlogic_Generic,
.costs = std::list<const UsagePixelFormatCostEntry>(),
};
// kAmlogic_A311D_Cost_Entries will go here.
const PlatformCostsEntry kAmlogic_A311D_Costs = {
.platform = kPlatform_Amlogic_A311D,
.next_platform = kPlatform_Amlogic_Generic,
.costs = std::list<const UsagePixelFormatCostEntry>(),
};
const PlatformCostsEntry kGeneric_Costs = {
.platform = kPlatform_Generic,
.next_platform = kPlatform_None,
.costs = kGeneric_Cost_Entries,
};
const std::map<Platform, const PlatformCostsEntry*> kPlatformCosts = {
{kPlatform_Generic, &kGeneric_Costs},
{kPlatform_Arm_Mali, &kArm_Mali_Costs},
{kPlatform_Amlogic_Generic, &kAmlogic_Generic_Costs},
{kPlatform_Amlogic_S912, &kAmlogic_S912_Costs},
{kPlatform_Amlogic_S905D2, &kAmlogic_S905D2_Costs},
{kPlatform_Amlogic_T931, &kAmlogic_T931_Costs},
{kPlatform_Amlogic_A311D, &kAmlogic_A311D_Costs},
};
const double kDefaultCost = std::numeric_limits<double>::max();
Platform FindPlatform(uint32_t pdev_device_info_vid, uint32_t pdev_device_info_pid) {
auto iter = kPlatformTranslation.find(MakeVidPidKey(pdev_device_info_vid, pdev_device_info_pid));
if (iter == kPlatformTranslation.end()) {
return kPlatform_Generic;
}
return iter->second;
}
const PlatformCostsEntry* FindPlatformCosts(Platform platform) {
if (platform == kPlatform_None) {
return nullptr;
}
auto iter = kPlatformCosts.find(platform);
if (iter == kPlatformCosts.end()) {
return nullptr;
}
return iter->second;
}
// |a| to check
// |r| required bits
bool HasAllRequiredBits(uint32_t a, uint32_t r) { return (r & a) == r; }
// |a| to check
// |r| required bits
bool HasAllRequiredUsageBits(const fuchsia_sysmem2::wire::BufferUsage& a,
const fuchsia_sysmem2::wire::BufferUsage& r) {
const uint32_t a_cpu = a.has_cpu() ? a.cpu() : 0;
const uint32_t a_vulkan = a.has_vulkan() ? a.vulkan() : 0;
const uint32_t a_display = a.has_display() ? a.display() : 0;
const uint32_t a_video = a.has_video() ? a.video() : 0;
const uint32_t r_cpu = r.has_cpu() ? r.cpu() : 0;
const uint32_t r_vulkan = r.has_vulkan() ? r.vulkan() : 0;
const uint32_t r_display = r.has_display() ? r.display() : 0;
const uint32_t r_video = r.has_video() ? r.video() : 0;
return HasAllRequiredBits(a_cpu, r_cpu) && HasAllRequiredBits(a_vulkan, r_vulkan) &&
HasAllRequiredBits(a_display, r_display) && HasAllRequiredBits(a_video, r_video);
}
uint32_t SharedBitsCount(uint32_t a, uint32_t b) {
uint32_t set_in_both = a & b;
// TODO(dustingreen): Consider using popcount intrinsic (or equivalent).
uint32_t count = 0;
for (uint32_t i = 0; i < sizeof(uint32_t) * 8; ++i) {
if (set_in_both & (1 << i)) {
++count;
}
}
return count;
}
uint32_t SharedUsageBitsCount(const fuchsia_sysmem2::wire::BufferUsage& a,
const fuchsia_sysmem2::wire::BufferUsage& b) {
const uint32_t a_cpu = a.has_cpu() ? a.cpu() : 0;
const uint32_t a_vulkan = a.has_vulkan() ? a.vulkan() : 0;
const uint32_t a_display = a.has_display() ? a.display() : 0;
const uint32_t a_video = a.has_video() ? a.video() : 0;
const uint32_t b_cpu = b.has_cpu() ? b.cpu() : 0;
const uint32_t b_vulkan = b.has_vulkan() ? b.vulkan() : 0;
const uint32_t b_display = b.has_display() ? b.display() : 0;
const uint32_t b_video = b.has_video() ? b.video() : 0;
return SharedBitsCount(a_cpu, b_cpu) + SharedBitsCount(a_vulkan, b_vulkan) +
SharedBitsCount(a_display, b_display) + SharedBitsCount(a_video, b_video);
}
// This comparison has nothing to do with the cost of a or cost of b. This is
// only about finding the best-match UsagePixelFormatCostEntry for the given
// query.
//
// |constraints| the query's constraints
//
// |image_format_constraints_index| the query's image_format_constraints_index
//
// |a| the new UsagePixelFormatCostEntry to consider
//
// |b| the existing UsagePixelFormatCostEntry that a is being compared against
bool IsBetterMatch(const fuchsia_sysmem2::wire::BufferCollectionConstraints& constraints,
uint32_t image_format_constraints_index, const UsagePixelFormatCostEntry* a,
const UsagePixelFormatCostEntry* b) {
ZX_DEBUG_ASSERT(a);
ZX_DEBUG_ASSERT(image_format_constraints_index < constraints.image_format_constraints().count());
// We intentionally allow b to be nullptr.
if (!ImageFormatIsPixelFormatEqual(
a->pixel_format,
constraints.image_format_constraints()[image_format_constraints_index].pixel_format()))
return false;
fuchsia_sysmem2::wire::BufferUsage default_usage;
const fuchsia_sysmem2::wire::BufferUsage* usage_ptr;
if (constraints.has_usage()) {
usage_ptr = &constraints.usage();
} else {
usage_ptr = &default_usage;
}
const fuchsia_sysmem2::wire::BufferUsage& usage = *usage_ptr;
if (!HasAllRequiredUsageBits(usage, a->required_buffer_usage_bits)) {
return false;
}
ZX_DEBUG_ASSERT(HasAllRequiredUsageBits(usage, a->required_buffer_usage_bits));
// We intentionally allow b to be nullptr.
if (b == nullptr) {
return true;
}
ZX_DEBUG_ASSERT(HasAllRequiredUsageBits(usage, b->required_buffer_usage_bits));
uint32_t a_shared_bits = SharedUsageBitsCount(usage, a->required_buffer_usage_bits);
uint32_t b_shared_bits = SharedUsageBitsCount(usage, b->required_buffer_usage_bits);
return a_shared_bits > b_shared_bits;
}
double GetCostInternal(const fuchsia_sysmem2::wire::BufferCollectionConstraints& constraints,
uint32_t image_format_constraints_index, Platform platform) {
const PlatformCostsEntry* platform_costs = FindPlatformCosts(platform);
if (!platform_costs) {
return kDefaultCost;
}
const UsagePixelFormatCostEntry* best_match = nullptr;
while (platform_costs) {
for (const UsagePixelFormatCostEntry& cost : platform_costs->costs) {
if (IsBetterMatch(constraints, image_format_constraints_index, &cost, best_match)) {
best_match = &cost;
}
}
platform_costs = FindPlatformCosts(platform_costs->next_platform);
}
if (!best_match) {
return kDefaultCost;
}
ZX_DEBUG_ASSERT(best_match);
return best_match->cost;
}
double GetCost(uint32_t pdev_device_info_vid, uint32_t pdev_device_info_pid,
const fuchsia_sysmem2::wire::BufferCollectionConstraints& constraints,
uint32_t image_format_constraints_index) {
Platform platform = FindPlatform(pdev_device_info_vid, pdev_device_info_pid);
if (platform == kPlatform_None) {
return kDefaultCost;
}
return GetCostInternal(constraints, image_format_constraints_index, platform);
}
} // namespace
int32_t UsagePixelFormatCost::Compare(
uint32_t pdev_device_info_vid, uint32_t pdev_device_info_pid,
const fuchsia_sysmem2::wire::BufferCollectionConstraints& constraints,
uint32_t image_format_constraints_index_a, uint32_t image_format_constraints_index_b) {
double cost_a = GetCost(pdev_device_info_vid, pdev_device_info_pid, constraints,
image_format_constraints_index_a);
double cost_b = GetCost(pdev_device_info_vid, pdev_device_info_pid, constraints,
image_format_constraints_index_b);
if (cost_a < cost_b) {
return -1;
} else if (cost_a > cost_b) {
return 1;
} else {
return 0;
}
}
} // namespace sysmem_driver