blob: dc80e9a77d6b03f44dc9555859afd54f53310688 [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "msd_arm_connection.h"
#include <zircon/compiler.h>
#include <limits>
#include <vector>
#include "address_space.h"
#include "gpu_mapping.h"
#include "magma_arm_mali_types.h"
#include "magma_util/dlog.h"
#include "msd_arm_buffer.h"
#include "msd_arm_context.h"
#include "msd_arm_device.h"
#include "msd_arm_semaphore.h"
#include "platform_barriers.h"
#include "platform_semaphore.h"
#include "platform_trace.h"
// This definition of arraysize was stolen from fxl in order to avoid
// a dynamic library dependency on it.
template <typename T, size_t N>
char (&ArraySizeHelper(T (&array)[N]))[N];
#define arraysize(array) (sizeof(ArraySizeHelper(array)))
void msd_connection_close(msd_connection_t* connection)
delete MsdArmAbiConnection::cast(connection);
msd_context_t* msd_connection_create_context(msd_connection_t* abi_connection)
auto connection = MsdArmAbiConnection::cast(abi_connection);
auto context = std::make_unique<MsdArmContext>(connection->ptr());
return context.release();
void msd_context_destroy(msd_context_t* ctx)
auto context = static_cast<MsdArmContext*>(ctx);
auto connection = context->connection().lock();
if (connection)
delete context;
bool MsdArmConnection::ExecuteAtom(
volatile magma_arm_mali_atom* atom,
std::deque<std::shared_ptr<magma::PlatformSemaphore>>* semaphores)
uint8_t atom_number = atom->atom_number;
if (outstanding_atoms_[atom_number] &&
outstanding_atoms_[atom_number]->result_code() == kArmMaliResultRunning) {
magma::log(magma::LOG_WARNING, "Client %" PRIu64 ": Submitted atom number already in use",
return false;
uint32_t flags = atom->flags;
magma_arm_mali_user_data user_data;[0] = atom->[0];[1] = atom->[1];
std::shared_ptr<MsdArmAtom> msd_atom;
if (flags & kAtomFlagSoftware) {
if (flags != kAtomFlagSemaphoreSet && flags != kAtomFlagSemaphoreReset &&
flags != kAtomFlagSemaphoreWait && flags != kAtomFlagSemaphoreWaitAndReset) {
magma::log(magma::LOG_WARNING, "Client %" PRIu64 ": Invalid soft atom flags 0x%x\n",
client_id_, flags);
return false;
if (semaphores->empty()) {
magma::log(magma::LOG_WARNING, "Client %" PRIu64 ": No remaining semaphores",
return false;
msd_atom =
std::make_shared<MsdArmSoftAtom>(shared_from_this(), static_cast<AtomFlags>(flags),
semaphores->front(), atom_number, user_data);
} else {
uint32_t slot = flags & kAtomFlagRequireFragmentShader ? 0 : 1;
if (slot == 0 && (flags & (kAtomFlagRequireComputeShader | kAtomFlagRequireTiler))) {
magma::log(magma::LOG_WARNING, "Client %" PRIu64 ": Invalid atom flags 0x%x\n",
client_id_, flags);
return false;
flags ^= kAtomFlagProtected;
if ((flags & kAtomFlagProtected) && !owner_->IsProtectedModeSupported()) {
"Client %" PRIu64 ": Attempting to use protected mode when not supported\n",
return false;
msd_atom = std::make_shared<MsdArmAtom>(shared_from_this(), atom->job_chain_addr, slot,
atom_number, user_data, atom->priority,
if (flags & kAtomFlagRequireCycleCounter)
// Hold lock for using outstanding_atoms_.
std::lock_guard<std::mutex> lock(callback_lock_);
MsdArmAtom::DependencyList dependencies;
for (size_t i = 0; i < arraysize(atom->dependencies); i++) {
uint8_t dependency = atom->dependencies[i].atom_number;
if (dependency) {
if (!outstanding_atoms_[dependency]) {
"Client %" PRIu64
": Dependency on atom that hasn't been submitted yet",
return false;
auto type = static_cast<ArmMaliDependencyType>(atom->dependencies[i].type);
if (type != kArmMaliDependencyOrder && type != kArmMaliDependencyData) {
"Client %" PRIu64 ": Invalid dependency type: %d", client_id_, type);
return false;
MsdArmAtom::Dependency{type, outstanding_atoms_[dependency]});
static_assert(arraysize(outstanding_atoms_) - 1 ==
"outstanding_atoms_ size is incorrect");
outstanding_atoms_[atom_number] = msd_atom;
return true;
magma_status_t msd_context_execute_command_buffer(msd_context_t* ctx, msd_buffer_t* cmd_buf,
msd_buffer_t** exec_resources,
msd_semaphore_t** wait_semaphores,
msd_semaphore_t** signal_semaphores)
"msd_context_execute_command_buffer not implemented");
magma_status_t msd_context_execute_immediate_commands(msd_context_t* ctx, uint64_t commands_size,
void* commands, uint64_t semaphore_count,
msd_semaphore_t** msd_semaphores)
auto context = static_cast<MsdArmContext*>(ctx);
auto connection = context->connection().lock();
if (!connection)
return DRET_MSG(MAGMA_STATUS_INVALID_ARGS, "Connection not valid");
std::deque<std::shared_ptr<magma::PlatformSemaphore>> semaphores;
for (size_t i = 0; i < semaphore_count; i++) {
uint64_t offset = 0;
while (offset + sizeof(uint64_t) < commands_size) {
magma_arm_mali_atom* atom =
reinterpret_cast<magma_arm_mali_atom*>(static_cast<uint8_t*>(commands) + offset);
if (atom->size < sizeof(uint64_t)) {
return DRET_MSG(MAGMA_STATUS_CONTEXT_KILLED, "Atom size must be at least 8");
// Check for overflow (defined for unsigned types) or too large for buffer.
if ((offset + atom->size < offset) || offset + atom->size > commands_size) {
return DRET_MSG(MAGMA_STATUS_CONTEXT_KILLED, "Atom size %ld too large for buffer",
// This check could be changed to allow for backwards compatibility in
// future versions.
if (atom->size < sizeof(magma_arm_mali_atom)) {
return DRET_MSG(MAGMA_STATUS_CONTEXT_KILLED, "Atom size %ld too small", atom->size);
if (!connection->ExecuteAtom(atom, &semaphores))
offset += atom->size;
std::shared_ptr<MsdArmConnection> MsdArmConnection::Create(msd_client_id_t client_id, Owner* owner)
auto connection = std::shared_ptr<MsdArmConnection>(new MsdArmConnection(client_id, owner));
if (!connection->Init())
return DRETP(nullptr, "Couldn't create connection");
return connection;
bool MsdArmConnection::Init()
// If coherent memory is supported, use it for page tables to avoid
// unnecessary cache flushes.
address_space_ =
AddressSpace::Create(this, owner_->cache_coherency_status() == kArmMaliCacheCoherencyAce);
if (!address_space_)
return DRETF(false, "Couldn't create address space");
return true;
MsdArmConnection::MsdArmConnection(msd_client_id_t client_id, Owner* owner)
: client_id_(client_id), owner_(owner)
MsdArmConnection::~MsdArmConnection() { owner_->DeregisterConnection(); }
static bool access_flags_from_flags(uint64_t mapping_flags, bool cache_coherent,
uint64_t* flags_out)
uint64_t access_flags = 0;
if (mapping_flags & MAGMA_GPU_MAP_FLAG_READ)
access_flags |= kAccessFlagRead;
if (mapping_flags & MAGMA_GPU_MAP_FLAG_WRITE)
access_flags |= kAccessFlagWrite;
if (!(mapping_flags & MAGMA_GPU_MAP_FLAG_EXECUTE))
access_flags |= kAccessFlagNoExecute;
if (mapping_flags & kMagmaArmMaliGpuMapFlagInnerShareable)
access_flags |= kAccessFlagShareInner;
if (mapping_flags & kMagmaArmMaliGpuMapFlagBothShareable) {
if (!cache_coherent)
return DRETF(false, "Attempting to use cache coherency while disabled.");
access_flags |= kAccessFlagShareBoth;
if (mapping_flags &
MAGMA_GPU_MAP_FLAG_GROWABLE | kMagmaArmMaliGpuMapFlagInnerShareable |
return DRETF(false, "Unsupported map flags %lx\n", mapping_flags);
if (flags_out)
*flags_out = access_flags;
return true;
bool MsdArmConnection::AddMapping(std::unique_ptr<GpuMapping> mapping)
std::lock_guard<std::mutex> lock(address_lock_);
uint64_t gpu_va = mapping->gpu_va();
if (!magma::is_page_aligned(gpu_va))
return DRETF(false, "mapping not page aligned");
if (mapping->size() == 0)
return DRETF(false, "empty mapping");
uint64_t start_page = gpu_va >> PAGE_SHIFT;
if (mapping->size() > (1ul << AddressSpace::kVirtualAddressSize))
return DRETF(false, "size too large");
uint64_t page_count = magma::round_up(mapping->size(), PAGE_SIZE) >> PAGE_SHIFT;
if (start_page + page_count > ((1ul << AddressSpace::kVirtualAddressSize) >> PAGE_SHIFT))
return DRETF(false, "virtual address too large");
auto it = gpu_mappings_.upper_bound(gpu_va);
if (it != gpu_mappings_.end() && (gpu_va + mapping->size() > it->second->gpu_va()))
return DRETF(false, "Mapping overlaps existing mapping");
// Find the mapping with the highest VA that's <= this.
if (it != gpu_mappings_.begin()) {
// Check if the previous mapping overlaps this.
if (it->second->gpu_va() + it->second->size() > gpu_va)
return DRETF(false, "Mapping overlaps existing mapping");
auto buffer = mapping->buffer().lock();
if (mapping->page_offset() + page_count > buffer->platform_buffer()->size() / PAGE_SIZE)
return DRETF(false, "Buffer size %lx too small for map start %lx count %lx",
buffer->platform_buffer()->size(), mapping->page_offset(), page_count);
if (!access_flags_from_flags(mapping->flags(),
owner_->cache_coherency_status() == kArmMaliCacheCoherencyAce,
return false;
if (!UpdateCommittedMemory(mapping.get()))
return false;
gpu_mappings_[gpu_va] = std::move(mapping);
return true;
bool MsdArmConnection::RemoveMapping(uint64_t gpu_va)
std::lock_guard<std::mutex> lock(address_lock_);
auto it = gpu_mappings_.find(gpu_va);
if (it == gpu_mappings_.end())
return DRETF(false, "Mapping not found");
address_space_->Clear(it->second->gpu_va(), it->second->size());
return true;
// CommitMemoryForBuffer or PageInAddress will hold address_lock_ before calling this, but that's
// impossible to specify for the thread safety analysis.
bool MsdArmConnection::UpdateCommittedMemory(GpuMapping* mapping) __TA_NO_THREAD_SAFETY_ANALYSIS
uint64_t access_flags = 0;
if (!access_flags_from_flags(mapping->flags(),
owner_->cache_coherency_status() == kArmMaliCacheCoherencyAce,
return false;
auto buffer = mapping->buffer().lock();
if (buffer->start_committed_pages() != mapping->page_offset() &&
(buffer->committed_page_count() > 0 || mapping->pinned_page_count() > 0))
return DRETF(false, "start of commit should match page offset");
uint64_t prev_committed_page_count = mapping->pinned_page_count();
DASSERT(prev_committed_page_count <= mapping->size() / PAGE_SIZE);
uint64_t committed_page_count =
std::min(buffer->committed_page_count(), mapping->size() / PAGE_SIZE);
if (prev_committed_page_count == committed_page_count) {
// Sometimes an access to a growable region that was just grown can fault. Unlock the MMU
// if that's detected so the access can be retried.
if (committed_page_count > 0)
return true;
if (committed_page_count < prev_committed_page_count) {
uint64_t pages_to_remove = prev_committed_page_count - committed_page_count;
address_space_->Clear(mapping->gpu_va() + committed_page_count * PAGE_SIZE,
pages_to_remove * PAGE_SIZE);
// Technically if there's an IOMMU the new mapping might be at a different
// address, so we'd need to update the GPU address space to represent
// that. However, on current systems (amlogic and hikey960) that doesn't
// happen.
// TODO(ZX-2924): Shrink existing PMTs when that's supported.
std::unique_ptr<magma::PlatformBusMapper::BusMapping> bus_mapping =
mapping->page_offset(), committed_page_count);
// Call shrink_pinned_pages even if the bus mapping isn't created, because if the committed
// memory size is later increased, we need to ensure the previously-shrunk region is
// remapped into the GPU.
mapping->shrink_pinned_pages(pages_to_remove, std::move(bus_mapping));
if (!bus_mapping) {
DLOG("Failed to shrink bus mapping by %ld pages", pages_to_remove);
// The mapping is still usable, so don't count this as an error.
} else {
uint64_t pages_to_add = committed_page_count - prev_committed_page_count;
uint64_t page_offset_in_buffer = mapping->page_offset() + prev_committed_page_count;
std::unique_ptr<magma::PlatformBusMapper::BusMapping> bus_mapping =
page_offset_in_buffer, pages_to_add);
if (!bus_mapping)
return DRETF(false, "Couldn't pin 0x%lx pages", pages_to_add);
magma_cache_policy_t cache_policy;
magma_status_t status = buffer->platform_buffer()->GetCachePolicy(&cache_policy);
if (!(mapping->flags() & kMagmaArmMaliGpuMapFlagBothShareable) &&
(status != MAGMA_STATUS_OK || cache_policy == MAGMA_CACHE_POLICY_CACHED)) {
// Flushing the region must happen after the region is mapped to the bus, as otherwise
// the backing memory may not exist yet.
if (!buffer->EnsureRegionFlushed(page_offset_in_buffer * PAGE_SIZE,
(page_offset_in_buffer + pages_to_add) * PAGE_SIZE))
return DRETF(false, "EnsureRegionFlushed failed");
// Ensure mapping isn't put into the page table until the cache flush
// above completed.
if (!address_space_->Insert(mapping->gpu_va() + prev_committed_page_count * PAGE_SIZE,
bus_mapping.get(), page_offset_in_buffer * PAGE_SIZE,
pages_to_add * PAGE_SIZE, access_flags)) {
return DRETF(false, "Pages can't be inserted into address space");
return true;
bool MsdArmConnection::PageInMemory(uint64_t address)
std::lock_guard<std::mutex> lock(address_lock_);
if (gpu_mappings_.empty())
return false;
auto it = gpu_mappings_.upper_bound(address);
if (it == gpu_mappings_.begin())
return false;
GpuMapping& mapping = *it->second.get();
DASSERT(address >= mapping.gpu_va());
if (address >= mapping.gpu_va() + mapping.size())
return false;
if (!(mapping.flags() & MAGMA_GPU_MAP_FLAG_GROWABLE))
return DRETF(false, "Buffer mapping not growable");
auto buffer = mapping.buffer().lock();
// TODO(MA-417): Look into growing the buffer on a different thread.
// Try to grow in units of 64 pages to avoid needing to fault too often.
constexpr uint64_t kPagesToGrow = 64;
constexpr uint64_t kCacheLineSize = 64;
uint64_t offset_needed = address - mapping.gpu_va() + kCacheLineSize - 1;
// Don't shrink the amount being committed if there's a race and the
// client committed more memory between when the fault happened and this
// code.
uint64_t committed_page_count =
magma::round_up(offset_needed, PAGE_SIZE * kPagesToGrow) / PAGE_SIZE);
committed_page_count =
buffer->platform_buffer()->size() / PAGE_SIZE - buffer->start_committed_pages());
// The MMU command to update the page tables should automatically cause
// the atom to continue executing.
return buffer->SetCommittedPages(buffer->start_committed_pages(), committed_page_count);
bool MsdArmConnection::CommitMemoryForBuffer(MsdArmBuffer* buffer, uint64_t page_offset,
uint64_t page_count)
std::lock_guard<std::mutex> lock(address_lock_);
return buffer->SetCommittedPages(page_offset, page_count);
void MsdArmConnection::SetNotificationCallback(msd_connection_notification_callback_t callback,
void* token)
std::lock_guard<std::mutex> lock(callback_lock_);
callback_ = callback;
token_ = token;
void MsdArmConnection::SendNotificationData(MsdArmAtom* atom, ArmMaliResultCode result_code)
std::lock_guard<std::mutex> lock(callback_lock_);
// It may already have been destroyed on the main thread.
if (!token_)
msd_notification_t notification = {.type = MSD_CONNECTION_NOTIFICATION_CHANNEL_SEND};
static_assert(sizeof(magma_arm_mali_status) <= MSD_CHANNEL_SEND_MAX_SIZE,
"notification too large");
notification.u.channel_send.size = sizeof(magma_arm_mali_status);
auto status = reinterpret_cast<magma_arm_mali_status*>(;
status->result_code = result_code;
status->atom_number = atom->atom_number();
status->data = atom->user_data();
callback_(token_, &notification);
void MsdArmConnection::MarkDestroyed()
std::lock_guard<std::mutex> lock(callback_lock_);
if (!token_)
msd_notification_t notification = {.type = MSD_CONNECTION_NOTIFICATION_CHANNEL_SEND};
static_assert(sizeof(magma_arm_mali_status) <= MSD_CHANNEL_SEND_MAX_SIZE,
"notification too large");
notification.u.channel_send.size = sizeof(magma_arm_mali_status);
auto status = reinterpret_cast<magma_arm_mali_status*>(;
status->result_code = kArmMaliResultTerminated;
status->atom_number = {};
status->data = {};
callback_(token_, &notification);
// Don't send any completion messages after termination.
token_ = 0;
bool MsdArmConnection::GetVirtualAddressFromPhysical(uint64_t address,
uint64_t* virtual_address_out)
std::lock_guard<std::mutex> lock(address_lock_);
uint64_t page_address = address & ~(PAGE_SIZE - 1);
for (auto& mapping : gpu_mappings_) {
for (const std::unique_ptr<magma::PlatformBusMapper::BusMapping>& bus_mapping :
mapping.second->bus_mappings()) {
const std::vector<uint64_t>& page_list = bus_mapping->Get();
for (uint32_t i = 0; i < page_list.size(); i++) {
if (page_address == page_list[i]) {
// Offset in bytes from the start of the vmo.
uint64_t buffer_offset = (i + bus_mapping->page_offset()) * PAGE_SIZE;
// Offset in bytes of the start of the mapping from the start of the
// vmo.
uint64_t mapping_offset = mapping.second->page_offset() * PAGE_SIZE;
// The bus mapping shouldn't contain memory outside the gpu
// offset.
DASSERT(buffer_offset >= mapping_offset);
uint64_t offset_in_page = address - page_address;
*virtual_address_out =
mapping.second->gpu_va() + buffer_offset - mapping_offset + offset_in_page;
// Only return one virtual address.
return true;
return false;
magma_status_t msd_connection_map_buffer_gpu(msd_connection_t* abi_connection,
msd_buffer_t* abi_buffer, uint64_t gpu_va,
uint64_t page_offset, uint64_t page_count,
uint64_t flags)
TRACE_DURATION("magma", "msd_connection_map_buffer_gpu", "page_count", page_count);
MsdArmConnection* connection = MsdArmAbiConnection::cast(abi_connection)->ptr().get();
auto mapping =
std::make_unique<GpuMapping>(gpu_va, page_offset, page_count * PAGE_SIZE, flags, connection,
if (!connection->AddMapping(std::move(mapping)))
return DRET_MSG(MAGMA_STATUS_INTERNAL_ERROR, "AddMapping failed");
magma_status_t msd_connection_unmap_buffer_gpu(msd_connection_t* abi_connection,
msd_buffer_t* buffer, uint64_t gpu_va)
TRACE_DURATION("magma", "msd_connection_unmap_buffer_gpu");
if (!MsdArmAbiConnection::cast(abi_connection)->ptr()->RemoveMapping(gpu_va))
return DRET_MSG(MAGMA_STATUS_INTERNAL_ERROR, "RemoveMapping failed");
magma_status_t msd_connection_commit_buffer(msd_connection_t* abi_connection,
msd_buffer_t* abi_buffer, uint64_t page_offset,
uint64_t page_count)
MsdArmConnection* connection = MsdArmAbiConnection::cast(abi_connection)->ptr().get();
if (!connection->CommitMemoryForBuffer(MsdArmAbiBuffer::cast(abi_buffer)->base_ptr().get(),
page_offset, page_count))
return DRET_MSG(MAGMA_STATUS_INTERNAL_ERROR, "CommitMemoryForBuffer failed");
void msd_connection_set_notification_callback(msd_connection_t* abi_connection,
msd_connection_notification_callback_t callback,
void* token)
MsdArmAbiConnection::cast(abi_connection)->ptr()->SetNotificationCallback(callback, token);
void msd_connection_release_buffer(msd_connection_t* abi_connection, msd_buffer_t* abi_buffer) {}