drivers/gpu/msd-intel-gen/src/command_buffer.cc - garnet - Git at Google

 // Copyright 2016 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "command_buffer.h"
 #include "address_space.h"
 #include "instructions.h"
 #include "msd_intel_connection.h"
 #include "msd_intel_context.h"
 #include "msd_intel_semaphore.h"
 #include "platform_trace.h"

 std::unique_ptr<CommandBuffer> CommandBuffer::Create(msd_buffer_t* abi_cmd_buf,
                                                      msd_buffer_t** msd_buffers,
                                                      std::weak_ptr<ClientContext> context,
                                                      msd_semaphore_t** msd_wait_semaphores,
                                                      msd_semaphore_t** msd_signal_semaphores)
 {
     auto command_buffer = std::unique_ptr<CommandBuffer>(
         new CommandBuffer(MsdIntelAbiBuffer::cast(abi_cmd_buf)->ptr(), context));

     if (!command_buffer->Initialize())
         return DRETP(nullptr, "failed to initialize command buffer");

     std::vector<std::shared_ptr<MsdIntelBuffer>> buffers;
     buffers.reserve(command_buffer->num_resources());
     for (uint32_t i = 0; i < command_buffer->num_resources(); i++) {
         buffers.emplace_back(MsdIntelAbiBuffer::cast(msd_buffers[i])->ptr());
     }

     std::vector<std::shared_ptr<magma::PlatformSemaphore>> wait_semaphores;
     wait_semaphores.reserve(command_buffer->wait_semaphore_count());
     for (uint32_t i = 0; i < command_buffer->wait_semaphore_count(); i++) {
         wait_semaphores.emplace_back(MsdIntelAbiSemaphore::cast(msd_wait_semaphores[i])->ptr());
     }

     std::vector<std::shared_ptr<magma::PlatformSemaphore>> signal_semaphores;
     signal_semaphores.reserve(command_buffer->signal_semaphore_count());
     for (uint32_t i = 0; i < command_buffer->signal_semaphore_count(); i++) {
         signal_semaphores.emplace_back(MsdIntelAbiSemaphore::cast(msd_signal_semaphores[i])->ptr());
     }

     if (!command_buffer->InitializeResources(std::move(buffers), std::move(wait_semaphores),
                                              std::move(signal_semaphores)))
         return DRETP(nullptr, "failed to initialize command buffer resources");

     return command_buffer;
 }

 CommandBuffer::CommandBuffer(std::shared_ptr<MsdIntelBuffer> abi_cmd_buf,
                              std::weak_ptr<ClientContext> context)
     : abi_cmd_buf_(std::move(abi_cmd_buf)), context_(context), nonce_(TRACE_NONCE())
 {
 }

 CommandBuffer::~CommandBuffer()
 {
     if (!prepared_to_execute_)
         return;

     {
         TRACE_DURATION("magma", "Command Buffer End");
         uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();
         TRACE_FLOW_END("magma", "command_buffer", buffer_id);
     }

     UnmapResourcesGpu();

     for (auto& semaphore : signal_semaphores_) {
         semaphore->Signal();
     }

     std::shared_ptr<MsdIntelConnection> connection = locked_context_->connection().lock();
     if (connection) {
         std::vector<uint64_t> buffer_ids(num_resources());
         for (uint32_t i = 0; i < num_resources(); i++) {
             buffer_ids[i] = exec_resources_[i].buffer->platform_buffer()->id();
         }
         connection->SendNotification(buffer_ids);
     }

     TRACE_ASYNC_END("magma-exec", "CommandBuffer Exec", nonce_);
 }

 void CommandBuffer::SetSequenceNumber(uint32_t sequence_number)
 {
     uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();

     TRACE_ASYNC_BEGIN("magma-exec", "CommandBuffer Exec", nonce_, "id", buffer_id);
     sequence_number_ = sequence_number;
 }

 bool CommandBuffer::InitializeResources(
     std::vector<std::shared_ptr<MsdIntelBuffer>> buffers,
     std::vector<std::shared_ptr<magma::PlatformSemaphore>> wait_semaphores,
     std::vector<std::shared_ptr<magma::PlatformSemaphore>> signal_semaphores)
 {
     TRACE_DURATION("magma", "InitializeResources");
     if (!magma::CommandBuffer::initialized())
         return DRETF(false, "base command buffer not initialized");

     if (num_resources() != buffers.size())
         return DRETF(false, "buffers size mismatch");

     if (wait_semaphores.size() != wait_semaphore_count())
         return DRETF(false, "wait semaphore count mismatch");

     if (signal_semaphores.size() != signal_semaphore_count())
         return DRETF(false, "wait semaphore count mismatch");

     exec_resources_.clear();
     exec_resources_.reserve(num_resources());
     for (uint32_t i = 0; i < num_resources(); i++) {
         exec_resources_.emplace_back(
             ExecResource{buffers[i], resource(i).offset(), resource(i).length()});
         {
             TRACE_DURATION("magma", "CommitPages");
             uint64_t num_pages = AddressSpace::GetMappedSize(resource(i).length()) >> PAGE_SHIFT;
             DASSERT(magma::is_page_aligned(resource(i).offset()));
             uint64_t page_offset = resource(i).offset() >> PAGE_SHIFT;
             buffers[i]->platform_buffer()->CommitPages(page_offset, num_pages);
         }
     }

     wait_semaphores_ = std::move(wait_semaphores);
     signal_semaphores_ = std::move(signal_semaphores);

     return true;
 }

 std::weak_ptr<MsdIntelContext> CommandBuffer::GetContext() { return context_; }

 uint32_t CommandBuffer::GetPipeControlFlags()
 {
     uint32_t flags = MiPipeControl::kCommandStreamerStallEnableBit;

     // Experimentally including this bit has been shown to resolve gpu faults where a batch
     // completes; we clear gtt mappings for resources; then on the next batch,
     // an invalid address is emitted corresponding to a cleared gpu mapping.  This was
     // first seen when a compute shader was introduced.
     flags |= MiPipeControl::kGenericMediaStateClearBit;

     // Similarly, including this bit was shown to resolve the emission of an invalid address.
     flags |= MiPipeControl::kIndirectStatePointersDisableBit;

     // This one is needed when l3 caching enabled via mocs (memory object control state).
     flags |= MiPipeControl::kDcFlushEnableBit;

     return flags;
 }

 bool CommandBuffer::GetGpuAddress(gpu_addr_t* gpu_addr_out)
 {
     if (!prepared_to_execute_)
         return DRETF(false, "not prepared to execute");

     *gpu_addr_out = exec_resource_mappings_[batch_buffer_index_]->gpu_addr() + batch_start_offset_;
     return true;
 }

 uint64_t CommandBuffer::GetBatchBufferId()
 {
     if (batch_buffer_resource_index() < num_resources())
         return resource(batch_buffer_resource_index()).buffer_id();
     return 0;
 }

 void CommandBuffer::UnmapResourcesGpu() { exec_resource_mappings_.clear(); }

 bool CommandBuffer::PrepareForExecution()
 {
     locked_context_ = context_.lock();
     if (!locked_context_)
         return DRETF(false, "context has already been deleted, aborting");

     exec_resource_mappings_.clear();
     exec_resource_mappings_.reserve(exec_resources_.size());

     uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();
     TRACE_FLOW_STEP("magma", "command_buffer", buffer_id);

     if (!MapResourcesGpu(locked_context_->exec_address_space(), exec_resource_mappings_))
         return DRETF(false, "failed to map execution resources");

     if (!PatchRelocations(exec_resource_mappings_))
         return DRETF(false, "failed to patch relocations");

     batch_buffer_index_ = batch_buffer_resource_index();
     batch_start_offset_ = batch_start_offset();

     prepared_to_execute_ = true;

     return true;
 }

 bool CommandBuffer::MapResourcesGpu(std::shared_ptr<AddressSpace> address_space,
                                     std::vector<std::shared_ptr<GpuMapping>>& mappings)
 {
     TRACE_DURATION("magma", "MapResourcesGpu");

     for (auto res : exec_resources_) {
         std::shared_ptr<GpuMapping> mapping =
             AddressSpace::GetSharedGpuMapping(address_space, res.buffer, res.offset, res.length);
         if (!mapping)
             return DRETF(false, "failed to map resource into GPU address space");
         DLOG("MapResourcesGpu aspace %p buffer 0x%" PRIx64 " offset 0x%" PRIx64 " length 0x%" PRIx64
              " gpu_addr 0x%" PRIx64,
              address_space.get(), res.buffer->platform_buffer()->id(), res.offset, res.length,
              mapping->gpu_addr());
         mappings.push_back(mapping);
     }

     return true;
 }

 bool CommandBuffer::PatchRelocation(magma_system_relocation_entry* relocation,
                                     ExecResource* exec_resource, gpu_addr_t target_gpu_address)
 {
     DLOG("PatchRelocation offset 0x%x exec_resource offset 0x%lx target_gpu_address 0x%lx "
          "target_offset 0x%x",
          relocation->offset, exec_resource->offset, target_gpu_address, relocation->target_offset);

     TRACE_DURATION("magma", "PatchRelocation");

     uint64_t dst_offset = exec_resource->offset + relocation->offset;

     uint32_t reloc_page_index = dst_offset >> PAGE_SHIFT;
     uint32_t offset_in_page = dst_offset & (PAGE_SIZE - 1);

     DLOG("reloc_page_index 0x%x offset_in_page 0x%x", reloc_page_index, offset_in_page);

     void* buffer_cpu_addr;
     if (!exec_resource->buffer->platform_buffer()->MapCpu(&buffer_cpu_addr))
         return DRETF(false, "failed to map buffer into CPU address space");
     DASSERT(buffer_cpu_addr);

     uint8_t* reloc_page_cpu_addr =
         static_cast<uint8_t*>(buffer_cpu_addr) + reloc_page_index * PAGE_SIZE;

     gpu_addr_t address_to_patch = target_gpu_address + relocation->target_offset;
     static_assert(sizeof(gpu_addr_t) == sizeof(uint64_t), "gpu addr size mismatch");

     memcpy(reloc_page_cpu_addr + offset_in_page, &address_to_patch, sizeof(uint64_t));
     return true;
 }

 bool CommandBuffer::PatchRelocations(std::vector<std::shared_ptr<GpuMapping>>& mappings)
 {
     DASSERT(mappings.size() == num_resources());

     TRACE_DURATION("magma", "PatchRelocations");

     for (uint32_t res_index = 0; res_index < num_resources(); res_index++) {
         auto resource = this->resource(res_index);
         for (uint32_t reloc_index = 0; reloc_index < resource.num_relocations(); reloc_index++) {
             auto reloc = resource.relocation(reloc_index);
             DLOG("Patching relocation res_index %u reloc_index %u target_resource_index %u",
                  res_index, reloc_index, reloc->target_resource_index);
             auto& mapping = mappings[reloc->target_resource_index];
             if (!PatchRelocation(reloc, &exec_resources_[res_index], mapping->gpu_addr()))
                 return DRETF(false, "failed to patch relocation");
         }
     }

     return true;
 }
	// Copyright 2016 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "command_buffer.h"
	#include "address_space.h"
	#include "instructions.h"
	#include "msd_intel_connection.h"
	#include "msd_intel_context.h"
	#include "msd_intel_semaphore.h"
	#include "platform_trace.h"

	std::unique_ptr<CommandBuffer> CommandBuffer::Create(msd_buffer_t* abi_cmd_buf,
	msd_buffer_t** msd_buffers,
	std::weak_ptr<ClientContext> context,
	msd_semaphore_t** msd_wait_semaphores,
	msd_semaphore_t** msd_signal_semaphores)
	{
	auto command_buffer = std::unique_ptr<CommandBuffer>(
	new CommandBuffer(MsdIntelAbiBuffer::cast(abi_cmd_buf)->ptr(), context));

	if (!command_buffer->Initialize())
	return DRETP(nullptr, "failed to initialize command buffer");

	std::vector<std::shared_ptr<MsdIntelBuffer>> buffers;
	buffers.reserve(command_buffer->num_resources());
	for (uint32_t i = 0; i < command_buffer->num_resources(); i++) {
	buffers.emplace_back(MsdIntelAbiBuffer::cast(msd_buffers[i])->ptr());
	}

	std::vector<std::shared_ptr<magma::PlatformSemaphore>> wait_semaphores;
	wait_semaphores.reserve(command_buffer->wait_semaphore_count());
	for (uint32_t i = 0; i < command_buffer->wait_semaphore_count(); i++) {
	wait_semaphores.emplace_back(MsdIntelAbiSemaphore::cast(msd_wait_semaphores[i])->ptr());
	}

	std::vector<std::shared_ptr<magma::PlatformSemaphore>> signal_semaphores;
	signal_semaphores.reserve(command_buffer->signal_semaphore_count());
	for (uint32_t i = 0; i < command_buffer->signal_semaphore_count(); i++) {
	signal_semaphores.emplace_back(MsdIntelAbiSemaphore::cast(msd_signal_semaphores[i])->ptr());
	}

	if (!command_buffer->InitializeResources(std::move(buffers), std::move(wait_semaphores),
	std::move(signal_semaphores)))
	return DRETP(nullptr, "failed to initialize command buffer resources");

	return command_buffer;
	}

	CommandBuffer::CommandBuffer(std::shared_ptr<MsdIntelBuffer> abi_cmd_buf,
	std::weak_ptr<ClientContext> context)
	: abi_cmd_buf_(std::move(abi_cmd_buf)), context_(context), nonce_(TRACE_NONCE())
	{
	}

	CommandBuffer::~CommandBuffer()
	{
	if (!prepared_to_execute_)
	return;

	{
	TRACE_DURATION("magma", "Command Buffer End");
	uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();
	TRACE_FLOW_END("magma", "command_buffer", buffer_id);
	}

	UnmapResourcesGpu();

	for (auto& semaphore : signal_semaphores_) {
	semaphore->Signal();
	}

	std::shared_ptr<MsdIntelConnection> connection = locked_context_->connection().lock();
	if (connection) {
	std::vector<uint64_t> buffer_ids(num_resources());
	for (uint32_t i = 0; i < num_resources(); i++) {
	buffer_ids[i] = exec_resources_[i].buffer->platform_buffer()->id();
	}
	connection->SendNotification(buffer_ids);
	}

	TRACE_ASYNC_END("magma-exec", "CommandBuffer Exec", nonce_);
	}

	void CommandBuffer::SetSequenceNumber(uint32_t sequence_number)
	{
	uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();

	TRACE_ASYNC_BEGIN("magma-exec", "CommandBuffer Exec", nonce_, "id", buffer_id);
	sequence_number_ = sequence_number;
	}

	bool CommandBuffer::InitializeResources(
	std::vector<std::shared_ptr<MsdIntelBuffer>> buffers,
	std::vector<std::shared_ptr<magma::PlatformSemaphore>> wait_semaphores,
	std::vector<std::shared_ptr<magma::PlatformSemaphore>> signal_semaphores)
	{
	TRACE_DURATION("magma", "InitializeResources");
	if (!magma::CommandBuffer::initialized())
	return DRETF(false, "base command buffer not initialized");

	if (num_resources() != buffers.size())
	return DRETF(false, "buffers size mismatch");

	if (wait_semaphores.size() != wait_semaphore_count())
	return DRETF(false, "wait semaphore count mismatch");

	if (signal_semaphores.size() != signal_semaphore_count())
	return DRETF(false, "wait semaphore count mismatch");

	exec_resources_.clear();
	exec_resources_.reserve(num_resources());
	for (uint32_t i = 0; i < num_resources(); i++) {
	exec_resources_.emplace_back(
	ExecResource{buffers[i], resource(i).offset(), resource(i).length()});
	{
	TRACE_DURATION("magma", "CommitPages");
	uint64_t num_pages = AddressSpace::GetMappedSize(resource(i).length()) >> PAGE_SHIFT;
	DASSERT(magma::is_page_aligned(resource(i).offset()));
	uint64_t page_offset = resource(i).offset() >> PAGE_SHIFT;
	buffers[i]->platform_buffer()->CommitPages(page_offset, num_pages);
	}
	}

	wait_semaphores_ = std::move(wait_semaphores);
	signal_semaphores_ = std::move(signal_semaphores);

	return true;
	}

	std::weak_ptr<MsdIntelContext> CommandBuffer::GetContext() { return context_; }

	uint32_t CommandBuffer::GetPipeControlFlags()
	{
	uint32_t flags = MiPipeControl::kCommandStreamerStallEnableBit;

	// Experimentally including this bit has been shown to resolve gpu faults where a batch
	// completes; we clear gtt mappings for resources; then on the next batch,
	// an invalid address is emitted corresponding to a cleared gpu mapping. This was
	// first seen when a compute shader was introduced.
	flags \|= MiPipeControl::kGenericMediaStateClearBit;

	// Similarly, including this bit was shown to resolve the emission of an invalid address.
	flags \|= MiPipeControl::kIndirectStatePointersDisableBit;

	// This one is needed when l3 caching enabled via mocs (memory object control state).
	flags \|= MiPipeControl::kDcFlushEnableBit;

	return flags;
	}

	bool CommandBuffer::GetGpuAddress(gpu_addr_t* gpu_addr_out)
	{
	if (!prepared_to_execute_)
	return DRETF(false, "not prepared to execute");

	*gpu_addr_out = exec_resource_mappings_[batch_buffer_index_]->gpu_addr() + batch_start_offset_;
	return true;
	}

	uint64_t CommandBuffer::GetBatchBufferId()
	{
	if (batch_buffer_resource_index() < num_resources())
	return resource(batch_buffer_resource_index()).buffer_id();
	return 0;
	}

	void CommandBuffer::UnmapResourcesGpu() { exec_resource_mappings_.clear(); }

	bool CommandBuffer::PrepareForExecution()
	{
	locked_context_ = context_.lock();
	if (!locked_context_)
	return DRETF(false, "context has already been deleted, aborting");

	exec_resource_mappings_.clear();
	exec_resource_mappings_.reserve(exec_resources_.size());

	uint64_t ATTRIBUTE_UNUSED buffer_id = resource(batch_buffer_resource_index()).buffer_id();
	TRACE_FLOW_STEP("magma", "command_buffer", buffer_id);

	if (!MapResourcesGpu(locked_context_->exec_address_space(), exec_resource_mappings_))
	return DRETF(false, "failed to map execution resources");

	if (!PatchRelocations(exec_resource_mappings_))
	return DRETF(false, "failed to patch relocations");

	batch_buffer_index_ = batch_buffer_resource_index();
	batch_start_offset_ = batch_start_offset();

	prepared_to_execute_ = true;

	return true;
	}

	bool CommandBuffer::MapResourcesGpu(std::shared_ptr<AddressSpace> address_space,
	std::vector<std::shared_ptr<GpuMapping>>& mappings)
	{
	TRACE_DURATION("magma", "MapResourcesGpu");

	for (auto res : exec_resources_) {
	std::shared_ptr<GpuMapping> mapping =
	AddressSpace::GetSharedGpuMapping(address_space, res.buffer, res.offset, res.length);
	if (!mapping)
	return DRETF(false, "failed to map resource into GPU address space");
	DLOG("MapResourcesGpu aspace %p buffer 0x%" PRIx64 " offset 0x%" PRIx64 " length 0x%" PRIx64
	" gpu_addr 0x%" PRIx64,
	address_space.get(), res.buffer->platform_buffer()->id(), res.offset, res.length,
	mapping->gpu_addr());
	mappings.push_back(mapping);
	}

	return true;
	}

	bool CommandBuffer::PatchRelocation(magma_system_relocation_entry* relocation,
	ExecResource* exec_resource, gpu_addr_t target_gpu_address)
	{
	DLOG("PatchRelocation offset 0x%x exec_resource offset 0x%lx target_gpu_address 0x%lx "
	"target_offset 0x%x",
	relocation->offset, exec_resource->offset, target_gpu_address, relocation->target_offset);

	TRACE_DURATION("magma", "PatchRelocation");

	uint64_t dst_offset = exec_resource->offset + relocation->offset;

	uint32_t reloc_page_index = dst_offset >> PAGE_SHIFT;
	uint32_t offset_in_page = dst_offset & (PAGE_SIZE - 1);

	DLOG("reloc_page_index 0x%x offset_in_page 0x%x", reloc_page_index, offset_in_page);

	void* buffer_cpu_addr;
	if (!exec_resource->buffer->platform_buffer()->MapCpu(&buffer_cpu_addr))
	return DRETF(false, "failed to map buffer into CPU address space");
	DASSERT(buffer_cpu_addr);

	uint8_t* reloc_page_cpu_addr =
	static_cast<uint8_t>(buffer_cpu_addr) + reloc_page_index PAGE_SIZE;

	gpu_addr_t address_to_patch = target_gpu_address + relocation->target_offset;
	static_assert(sizeof(gpu_addr_t) == sizeof(uint64_t), "gpu addr size mismatch");

	memcpy(reloc_page_cpu_addr + offset_in_page, &address_to_patch, sizeof(uint64_t));
	return true;
	}

	bool CommandBuffer::PatchRelocations(std::vector<std::shared_ptr<GpuMapping>>& mappings)
	{
	DASSERT(mappings.size() == num_resources());

	TRACE_DURATION("magma", "PatchRelocations");

	for (uint32_t res_index = 0; res_index < num_resources(); res_index++) {
	auto resource = this->resource(res_index);
	for (uint32_t reloc_index = 0; reloc_index < resource.num_relocations(); reloc_index++) {
	auto reloc = resource.relocation(reloc_index);
	DLOG("Patching relocation res_index %u reloc_index %u target_resource_index %u",
	res_index, reloc_index, reloc->target_resource_index);
	auto& mapping = mappings[reloc->target_resource_index];
	if (!PatchRelocation(reloc, &exec_resources_[res_index], mapping->gpu_addr()))
	return DRETF(false, "failed to patch relocation");
	}
	}

	return true;
	}