blob: 85e886dafe411d0a237371090fd965e44632135c [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "command_buffer.h"
#include "address_space.h"
#include "instructions.h"
#include "msd_intel_connection.h"
#include "msd_intel_context.h"
#include "msd_intel_semaphore.h"
#include "platform_trace.h"
std::unique_ptr<CommandBuffer> CommandBuffer::Create(msd_buffer_t* abi_cmd_buf,
msd_buffer_t** msd_buffers,
std::weak_ptr<ClientContext> context,
msd_semaphore_t** msd_wait_semaphores,
msd_semaphore_t** msd_signal_semaphores)
{
auto command_buffer = std::unique_ptr<CommandBuffer>(
new CommandBuffer(MsdIntelAbiBuffer::cast(abi_cmd_buf)->ptr(), context));
if (!command_buffer->Initialize())
return DRETP(nullptr, "failed to initialize command buffer");
std::vector<std::shared_ptr<MsdIntelBuffer>> buffers;
buffers.reserve(command_buffer->num_resources());
for (uint32_t i = 0; i < command_buffer->num_resources(); i++) {
buffers.emplace_back(MsdIntelAbiBuffer::cast(msd_buffers[i])->ptr());
}
std::vector<std::shared_ptr<magma::PlatformSemaphore>> wait_semaphores;
wait_semaphores.reserve(command_buffer->wait_semaphore_count());
for (uint32_t i = 0; i < command_buffer->wait_semaphore_count(); i++) {
wait_semaphores.emplace_back(MsdIntelAbiSemaphore::cast(msd_wait_semaphores[i])->ptr());
}
std::vector<std::shared_ptr<magma::PlatformSemaphore>> signal_semaphores;
signal_semaphores.reserve(command_buffer->signal_semaphore_count());
for (uint32_t i = 0; i < command_buffer->signal_semaphore_count(); i++) {
signal_semaphores.emplace_back(MsdIntelAbiSemaphore::cast(msd_signal_semaphores[i])->ptr());
}
if (!command_buffer->InitializeResources(std::move(buffers), std::move(wait_semaphores),
std::move(signal_semaphores)))
return DRETP(nullptr, "failed to initialize command buffer resources");
return command_buffer;
}
CommandBuffer::CommandBuffer(std::shared_ptr<MsdIntelBuffer> abi_cmd_buf,
std::weak_ptr<ClientContext> context)
: abi_cmd_buf_(std::move(abi_cmd_buf)), context_(context), nonce_(TRACE_NONCE())
{
}
CommandBuffer::~CommandBuffer()
{
if (!prepared_to_execute_)
return;
{
TRACE_DURATION("magma", "Command Buffer End");
uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();
TRACE_FLOW_END("magma", "command_buffer", buffer_id);
}
UnmapResourcesGpu();
for (auto& semaphore : signal_semaphores_) {
semaphore->Signal();
}
std::shared_ptr<MsdIntelConnection> connection = locked_context_->connection().lock();
if (connection) {
std::vector<uint64_t> buffer_ids(num_resources());
for (uint32_t i = 0; i < num_resources(); i++) {
buffer_ids[i] = exec_resources_[i].buffer->platform_buffer()->id();
}
connection->SendNotification(buffer_ids);
}
TRACE_ASYNC_END("magma-exec", "CommandBuffer Exec", nonce_);
}
void CommandBuffer::SetSequenceNumber(uint32_t sequence_number)
{
uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();
TRACE_ASYNC_BEGIN("magma-exec", "CommandBuffer Exec", nonce_, "id", buffer_id);
sequence_number_ = sequence_number;
}
bool CommandBuffer::InitializeResources(
std::vector<std::shared_ptr<MsdIntelBuffer>> buffers,
std::vector<std::shared_ptr<magma::PlatformSemaphore>> wait_semaphores,
std::vector<std::shared_ptr<magma::PlatformSemaphore>> signal_semaphores)
{
TRACE_DURATION("magma", "InitializeResources");
if (!magma::CommandBuffer::initialized())
return DRETF(false, "base command buffer not initialized");
if (num_resources() != buffers.size())
return DRETF(false, "buffers size mismatch");
if (wait_semaphores.size() != wait_semaphore_count())
return DRETF(false, "wait semaphore count mismatch");
if (signal_semaphores.size() != signal_semaphore_count())
return DRETF(false, "wait semaphore count mismatch");
exec_resources_.clear();
exec_resources_.reserve(num_resources());
for (uint32_t i = 0; i < num_resources(); i++) {
exec_resources_.emplace_back(
ExecResource{buffers[i], resource(i).offset(), resource(i).length()});
{
TRACE_DURATION("magma", "CommitPages");
uint64_t num_pages = AddressSpace::GetMappedSize(resource(i).length()) >> PAGE_SHIFT;
DASSERT(magma::is_page_aligned(resource(i).offset()));
uint64_t page_offset = resource(i).offset() >> PAGE_SHIFT;
buffers[i]->platform_buffer()->CommitPages(page_offset, num_pages);
}
}
wait_semaphores_ = std::move(wait_semaphores);
signal_semaphores_ = std::move(signal_semaphores);
return true;
}
std::weak_ptr<MsdIntelContext> CommandBuffer::GetContext() { return context_; }
uint32_t CommandBuffer::GetPipeControlFlags()
{
uint32_t flags = MiPipeControl::kCommandStreamerStallEnableBit;
// Experimentally including this bit has been shown to resolve gpu faults where a batch
// completes; we clear gtt mappings for resources; then on the next batch,
// an invalid address is emitted corresponding to a cleared gpu mapping. This was
// first seen when a compute shader was introduced.
flags |= MiPipeControl::kGenericMediaStateClearBit;
// Similarly, including this bit was shown to resolve the emission of an invalid address.
flags |= MiPipeControl::kIndirectStatePointersDisableBit;
// This one is needed when l3 caching enabled via mocs (memory object control state).
flags |= MiPipeControl::kDcFlushEnableBit;
return flags;
}
bool CommandBuffer::GetGpuAddress(gpu_addr_t* gpu_addr_out)
{
if (!prepared_to_execute_)
return DRETF(false, "not prepared to execute");
*gpu_addr_out = exec_resource_mappings_[batch_buffer_index_]->gpu_addr() + batch_start_offset_;
return true;
}
uint64_t CommandBuffer::GetBatchBufferId()
{
if (batch_buffer_resource_index() < num_resources())
return resource(batch_buffer_resource_index()).buffer_id();
return 0;
}
void CommandBuffer::UnmapResourcesGpu() { exec_resource_mappings_.clear(); }
bool CommandBuffer::PrepareForExecution()
{
locked_context_ = context_.lock();
if (!locked_context_)
return DRETF(false, "context has already been deleted, aborting");
exec_resource_mappings_.clear();
exec_resource_mappings_.reserve(exec_resources_.size());
uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();
TRACE_FLOW_STEP("magma", "command_buffer", buffer_id);
if (!MapResourcesGpu(locked_context_->exec_address_space(), exec_resource_mappings_))
return DRETF(false, "failed to map execution resources");
if (!PatchRelocations(exec_resource_mappings_))
return DRETF(false, "failed to patch relocations");
batch_buffer_index_ = batch_buffer_resource_index();
batch_start_offset_ = batch_start_offset();
prepared_to_execute_ = true;
return true;
}
bool CommandBuffer::MapResourcesGpu(std::shared_ptr<AddressSpace> address_space,
std::vector<std::shared_ptr<GpuMapping>>& mappings)
{
TRACE_DURATION("magma", "MapResourcesGpu");
for (auto res : exec_resources_) {
std::shared_ptr<GpuMapping> mapping =
AddressSpace::GetSharedGpuMapping(address_space, res.buffer, res.offset, res.length);
if (!mapping)
return DRETF(false, "failed to map resource into GPU address space");
DLOG("MapResourcesGpu aspace %p buffer 0x%" PRIx64 " offset 0x%" PRIx64 " length 0x%" PRIx64
" gpu_addr 0x%" PRIx64,
address_space.get(), res.buffer->platform_buffer()->id(), res.offset, res.length,
mapping->gpu_addr());
mappings.push_back(mapping);
}
return true;
}
bool CommandBuffer::PatchRelocation(magma_system_relocation_entry* relocation,
ExecResource* exec_resource, gpu_addr_t target_gpu_address)
{
DLOG("PatchRelocation offset 0x%x exec_resource offset 0x%lx target_gpu_address 0x%lx "
"target_offset 0x%x",
relocation->offset, exec_resource->offset, target_gpu_address, relocation->target_offset);
TRACE_DURATION("magma", "PatchRelocation");
uint64_t dst_offset = exec_resource->offset + relocation->offset;
uint32_t reloc_page_index = dst_offset >> PAGE_SHIFT;
uint32_t offset_in_page = dst_offset & (PAGE_SIZE - 1);
DLOG("reloc_page_index 0x%x offset_in_page 0x%x", reloc_page_index, offset_in_page);
void* buffer_cpu_addr;
if (!exec_resource->buffer->platform_buffer()->MapCpu(&buffer_cpu_addr))
return DRETF(false, "failed to map buffer into CPU address space");
DASSERT(buffer_cpu_addr);
uint8_t* reloc_page_cpu_addr =
static_cast<uint8_t*>(buffer_cpu_addr) + reloc_page_index * PAGE_SIZE;
gpu_addr_t address_to_patch = target_gpu_address + relocation->target_offset;
static_assert(sizeof(gpu_addr_t) == sizeof(uint64_t), "gpu addr size mismatch");
memcpy(reloc_page_cpu_addr + offset_in_page, &address_to_patch, sizeof(uint64_t));
return true;
}
bool CommandBuffer::PatchRelocations(std::vector<std::shared_ptr<GpuMapping>>& mappings)
{
DASSERT(mappings.size() == num_resources());
TRACE_DURATION("magma", "PatchRelocations");
for (uint32_t res_index = 0; res_index < num_resources(); res_index++) {
auto resource = this->resource(res_index);
for (uint32_t reloc_index = 0; reloc_index < resource.num_relocations(); reloc_index++) {
auto reloc = resource.relocation(reloc_index);
DLOG("Patching relocation res_index %u reloc_index %u target_resource_index %u",
res_index, reloc_index, reloc->target_resource_index);
auto& mapping = mappings[reloc->target_resource_index];
if (!PatchRelocation(reloc, &exec_resources_[res_index], mapping->gpu_addr()))
return DRETF(false, "failed to patch relocation");
}
}
return true;
}