| /* |
| * Copyright (c) 2018-2020, Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| //! |
| //! \file mos_gpucontext_specific.cpp |
| //! \brief Container class for the Linux specific gpu context |
| //! |
| |
| #include "mos_context_specific.h" |
| #include "mos_gpucontext_specific.h" |
| #include "mos_graphicsresource_specific.h" |
| #include "mos_commandbuffer_specific.h" |
| #include "mos_util_devult_specific.h" |
| #include "mos_cmdbufmgr.h" |
| #include "mos_os_virtualengine.h" |
| #include <unistd.h> |
| |
| #define MI_BATCHBUFFER_END 0x05000000 |
| static pthread_mutex_t command_dump_mutex = PTHREAD_MUTEX_INITIALIZER; |
| |
| GpuContextSpecific::GpuContextSpecific( |
| const MOS_GPU_NODE gpuNode, |
| MOS_GPU_CONTEXT mosGpuCtx, |
| CmdBufMgr *cmdBufMgr, |
| GpuContext *reusedContext) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| m_nodeOrdinal = gpuNode; |
| m_cmdBufMgr = cmdBufMgr; |
| m_gpuContext = mosGpuCtx; |
| m_statusBufferResource = nullptr; |
| m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE; |
| |
| if (reusedContext) |
| { |
| MOS_OS_NORMALMESSAGE("gpucontex reusing not enabled on Linux."); |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| // get user engine instance setting from environment variable |
| char *engineInstances = getenv("INTEL_ENGINE_INSTANCE"); |
| if (engineInstances != nullptr) |
| { |
| errno = 0; |
| long int instance = strtol(engineInstances, nullptr, 16); |
| /* Check for various possible errors. */ |
| if ((errno == ERANGE && instance == LONG_MAX) || (instance < 0)) |
| { |
| MOS_OS_NORMALMESSAGE("Invalid INTEL_ENGINE_INSTANCE setting.(%s)\n", engineInstances); |
| m_engineInstanceSelect = 0x0; |
| } |
| else |
| { |
| m_engineInstanceSelect = (uint32_t)instance; |
| } |
| } |
| #endif |
| } |
| |
| GpuContextSpecific::~GpuContextSpecific() |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| Clear(); |
| } |
| |
| MOS_STATUS GpuContextSpecific::Init(OsContext *osContext, |
| PMOS_INTERFACE osInterface, |
| MOS_GPU_NODE GpuNode, |
| PMOS_GPUCTX_CREATOPTIONS createOption) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_OS_CHK_NULL_RETURN(osContext); |
| |
| if (m_cmdBufPoolMutex == nullptr) |
| { |
| m_cmdBufPoolMutex = MOS_CreateMutex(); |
| } |
| |
| MOS_OS_CHK_NULL_RETURN(m_cmdBufPoolMutex); |
| |
| MOS_LockMutex(m_cmdBufPoolMutex); |
| |
| m_cmdBufPool.clear(); |
| |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| |
| m_commandBufferSize = COMMAND_BUFFER_SIZE; |
| |
| m_nextFetchIndex = 0; |
| |
| m_cmdBufFlushed = true; |
| |
| m_osContext = osContext; |
| |
| MOS_OS_CHK_STATUS_RETURN(AllocateGPUStatusBuf()); |
| |
| m_commandBuffer = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER)); |
| |
| MOS_OS_CHK_NULL_RETURN(m_commandBuffer); |
| |
| m_IndirectHeapSize = 0; |
| |
| // each thread has its own GPU context, so do not need any lock as guarder here |
| m_allocationList = (ALLOCATION_LIST *)MOS_AllocAndZeroMemory(sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE); |
| MOS_OS_CHK_NULL_RETURN(m_allocationList); |
| m_maxNumAllocations = ALLOCATIONLIST_SIZE; |
| |
| m_patchLocationList = (PATCHLOCATIONLIST *)MOS_AllocAndZeroMemory(sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE); |
| MOS_OS_CHK_NULL_RETURN(m_patchLocationList); |
| m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE; |
| |
| m_attachedResources = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE); |
| MOS_OS_CHK_NULL_RETURN(m_attachedResources); |
| |
| m_writeModeList = (bool *)MOS_AllocAndZeroMemory(sizeof(bool) * ALLOCATIONLIST_SIZE); |
| MOS_OS_CHK_NULL_RETURN(m_writeModeList); |
| |
| m_GPUStatusTag = 1; |
| |
| m_createOptionEnhanced = (MOS_GPUCTX_CREATOPTIONS_ENHANCED*)MOS_AllocAndZeroMemory(sizeof(MOS_GPUCTX_CREATOPTIONS_ENHANCED)); |
| MOS_OS_CHK_NULL_RETURN(m_createOptionEnhanced); |
| m_createOptionEnhanced->SSEUValue = createOption->SSEUValue; |
| |
| if (typeid(*createOption) == typeid(MOS_GPUCTX_CREATOPTIONS_ENHANCED)) |
| { |
| PMOS_GPUCTX_CREATOPTIONS_ENHANCED createOptionEnhanced = static_cast<PMOS_GPUCTX_CREATOPTIONS_ENHANCED>(createOption); |
| m_createOptionEnhanced->UsingSFC = createOptionEnhanced->UsingSFC; |
| } |
| |
| for (int i=0; i<MAX_ENGINE_INSTANCE_NUM+1; i++) |
| { |
| m_i915Context[i] = nullptr; |
| } |
| |
| if (osInterface->ctxBasedScheduling) |
| { |
| unsigned int nengine = 0; |
| struct i915_engine_class_instance *engine_map = nullptr; |
| |
| MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_START, |
| &GpuNode, sizeof(GpuNode), nullptr, 0); |
| |
| m_i915Context[0] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr, |
| osInterface->pOsContext->intel_context, |
| I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE); |
| if (m_i915Context[0] == nullptr) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to create context.\n"); |
| return MOS_STATUS_UNKNOWN; |
| } |
| m_i915Context[0]->pOsContext = osInterface->pOsContext; |
| |
| m_i915ExecFlag = I915_EXEC_DEFAULT; |
| |
| if (mos_query_engines_count(osInterface->pOsContext->bufmgr, &nengine)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to query engines count.\n"); |
| return MOS_STATUS_UNKNOWN; |
| } |
| engine_map = (struct i915_engine_class_instance *)MOS_AllocAndZeroMemory(nengine * sizeof(struct i915_engine_class_instance)); |
| MOS_OS_CHK_NULL_RETURN(engine_map); |
| |
| if (GpuNode == MOS_GPU_NODE_3D) |
| { |
| __u16 engine_class = I915_ENGINE_CLASS_RENDER; |
| __u64 caps = 0; |
| |
| if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to query engines.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| if (createOption->SSEUValue != 0) |
| { |
| struct drm_i915_gem_context_param_sseu sseu; |
| MOS_ZeroMemory(&sseu, sizeof(sseu)); |
| sseu.flags = I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX; |
| sseu.engine.engine_instance = m_i915ExecFlag; |
| |
| if (mos_get_context_param_sseu(m_i915Context[0], &sseu)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to get sseu configuration."); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| if (mos_hweight8(sseu.subslice_mask) > createOption->packed.SubSliceCount) |
| { |
| sseu.subslice_mask = mos_switch_off_n_bits(sseu.subslice_mask, |
| mos_hweight8(sseu.subslice_mask)-createOption->packed.SubSliceCount); |
| } |
| |
| if (mos_set_context_param_sseu(m_i915Context[0], sseu)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set sseu configuration."); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| } |
| else if (GpuNode == MOS_GPU_NODE_COMPUTE) |
| { |
| __u16 engine_class = 4; //To change later when linux define the name |
| __u64 caps = 0; |
| |
| if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to query engines.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| SelectEngineInstanceByUser(engine_map, &nengine, m_engineInstanceSelect, GpuNode); |
| #endif |
| if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| else if (GpuNode == MOS_GPU_NODE_VIDEO || GpuNode == MOS_GPU_NODE_VIDEO2 |
| || GpuNode == MOS_GPU_NODE_VE) |
| { |
| __u16 engine_class = (GpuNode == MOS_GPU_NODE_VE)? I915_ENGINE_CLASS_VIDEO_ENHANCE : I915_ENGINE_CLASS_VIDEO; |
| __u64 caps = 0; |
| |
| SetEngineQueryFlags(createOption, caps); |
| |
| if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to query engines.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| SelectEngineInstanceByUser(engine_map, &nengine, m_engineInstanceSelect, GpuNode); |
| #endif |
| if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| if (nengine >= 2) |
| { |
| if(!osInterface->bGucSubmission) |
| { |
| //master queue |
| m_i915Context[1] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr, |
| osInterface->pOsContext->intel_context, |
| I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE); |
| if (m_i915Context[1] == nullptr) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to create master context.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| m_i915Context[1]->pOsContext = osInterface->pOsContext; |
| |
| if (mos_set_context_param_load_balance(m_i915Context[1], engine_map, 1)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set master context bond extension.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| //slave queue |
| for (int i=1; i<nengine; i++) |
| { |
| m_i915Context[i+1] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr, |
| osInterface->pOsContext->intel_context, |
| I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE); |
| if (m_i915Context[i+1] == nullptr) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to create slave context.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| m_i915Context[i+1]->pOsContext = osInterface->pOsContext; |
| |
| if (mos_set_context_param_bond(m_i915Context[i+1], engine_map[0], &engine_map[i], 1) != S_SUCCESS) |
| { |
| int err = errno; |
| if (err == ENODEV) |
| { |
| mos_gem_context_destroy(m_i915Context[i+1]); |
| m_i915Context[i+1] = nullptr; |
| break; |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set slave context bond extension. errno=%d\n",err); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| } |
| } |
| else |
| { |
| //create context with different width |
| for(int i = 1; i < nengine; i++) |
| { |
| unsigned int ctxWidth = i + 1; |
| m_i915Context[i] = mos_gem_context_create_shared(osInterface->pOsContext->bufmgr, |
| osInterface->pOsContext->intel_context, |
| 0); // I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE not allowed for parallel submission |
| if (mos_set_context_param_parallel(m_i915Context[i], engine_map, ctxWidth) != S_SUCCESS) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set parallel extension.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| } |
| } |
| } |
| else if (GpuNode == MOS_GPU_NODE_BLT) |
| { |
| __u16 engine_class = I915_ENGINE_CLASS_COPY; |
| __u64 caps = 0; |
| |
| if (mos_query_engines(osInterface->pOsContext->bufmgr, engine_class, caps, &nengine, engine_map)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to query engines.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| if (mos_set_context_param_load_balance(m_i915Context[0], engine_map, nengine)) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Unknown engine class.\n"); |
| MOS_SafeFreeMemory(engine_map); |
| return MOS_STATUS_UNKNOWN; |
| } |
| MOS_SafeFreeMemory(engine_map); |
| MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_END, |
| m_i915Context, sizeof(void *), |
| &nengine, sizeof(nengine)); |
| } |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| void GpuContextSpecific::Clear() |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_START, |
| m_i915Context, sizeof(void *), nullptr, 0); |
| // hanlde the status buf bundled w/ the specified gpucontext |
| if (m_statusBufferResource) |
| { |
| if (m_statusBufferResource->Unlock(m_osContext) != MOS_STATUS_SUCCESS) |
| { |
| MOS_OS_ASSERTMESSAGE("failed to unlock the status buf bundled w/ the specified gpucontext"); |
| } |
| m_statusBufferResource->Free(m_osContext, 0); |
| MOS_Delete(m_statusBufferResource); |
| } |
| MOS_FreeMemAndSetNull(m_statusBufferMosResource); |
| |
| MOS_LockMutex(m_cmdBufPoolMutex); |
| |
| if (m_cmdBufMgr) |
| { |
| for (auto& curCommandBuffer : m_cmdBufPool) |
| { |
| auto curCommandBufferSpecific = static_cast<CommandBufferSpecific *>(curCommandBuffer); |
| if (curCommandBufferSpecific == nullptr) |
| continue; |
| curCommandBufferSpecific->waitReady(); // wait ready and return to comamnd buffer manager. |
| m_cmdBufMgr->ReleaseCmdBuf(curCommandBuffer); |
| } |
| } |
| |
| m_cmdBufPool.clear(); |
| |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| MOS_DestroyMutex(m_cmdBufPoolMutex); |
| m_cmdBufPoolMutex = nullptr; |
| MOS_SafeFreeMemory(m_commandBuffer); |
| MOS_SafeFreeMemory(m_allocationList); |
| MOS_SafeFreeMemory(m_patchLocationList); |
| MOS_SafeFreeMemory(m_attachedResources); |
| MOS_SafeFreeMemory(m_writeModeList); |
| MOS_SafeFreeMemory(m_createOptionEnhanced); |
| |
| for (int i=0; i<MAX_ENGINE_INSTANCE_NUM; i++) |
| { |
| if (m_i915Context[i]) |
| { |
| mos_gem_context_destroy(m_i915Context[i]); |
| m_i915Context[i] = nullptr; |
| } |
| } |
| MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_END, |
| nullptr, 0, nullptr, 0); |
| } |
| |
| MOS_STATUS GpuContextSpecific::RegisterResource( |
| PMOS_RESOURCE osResource, |
| bool writeFlag) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_OS_CHK_NULL_RETURN(osResource); |
| |
| MOS_OS_CHK_NULL_RETURN(m_attachedResources); |
| |
| PMOS_RESOURCE registeredResources = m_attachedResources; |
| uint32_t allocationIndex = 0; |
| |
| for ( allocationIndex = 0; allocationIndex < m_resCount; allocationIndex++, registeredResources++) |
| { |
| if (osResource->bo == registeredResources->bo) |
| { |
| break; |
| } |
| } |
| |
| // Allocation list to be updated |
| if (allocationIndex < m_maxNumAllocations) |
| { |
| // New buffer |
| if (allocationIndex == m_resCount) |
| { |
| m_resCount++; |
| } |
| |
| // Set allocation |
| if (m_gpuContext >= MOS_GPU_CONTEXT_MAX) |
| { |
| MOS_OS_ASSERTMESSAGE("Gpu context exceeds max."); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| osResource->iAllocationIndex[m_gpuContext] = (allocationIndex); |
| m_attachedResources[allocationIndex] = *osResource; |
| m_writeModeList[allocationIndex] |= writeFlag; |
| m_allocationList[allocationIndex].hAllocation = &m_attachedResources[allocationIndex]; |
| m_allocationList[allocationIndex].WriteOperation |= writeFlag; |
| m_numAllocations = m_resCount; |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Reached max # registrations."); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS GpuContextSpecific::SetPatchEntry( |
| PMOS_INTERFACE osInterface, |
| PMOS_PATCH_ENTRY_PARAMS params) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_OS_CHK_NULL_RETURN(m_patchLocationList); |
| MOS_OS_CHK_NULL_RETURN(osInterface); |
| MOS_OS_CHK_NULL_RETURN(params); |
| |
| m_patchLocationList[m_currentNumPatchLocations].AllocationIndex = params->uiAllocationIndex; |
| m_patchLocationList[m_currentNumPatchLocations].AllocationOffset = params->uiResourceOffset; |
| m_patchLocationList[m_currentNumPatchLocations].PatchOffset = params->uiPatchOffset; |
| m_patchLocationList[m_currentNumPatchLocations].uiWriteOperation = params->bWrite ? true: false; |
| m_patchLocationList[m_currentNumPatchLocations].cmdBo = |
| params->cmdBuffer != nullptr ? params->cmdBuffer->OsResource.bo : nullptr; |
| |
| if (osInterface->osCpInterface && |
| osInterface->osCpInterface->IsHMEnabled()) |
| { |
| if (MOS_STATUS_SUCCESS != osInterface->osCpInterface->RegisterPatchForHM( |
| (uint32_t *)(params->cmdBufBase + params->uiPatchOffset), |
| params->bWrite, |
| params->HwCommandType, |
| params->forceDwordOffset, |
| params->presResource, |
| &m_patchLocationList[m_currentNumPatchLocations])) |
| { |
| MOS_OS_ASSERTMESSAGE("Failed to RegisterPatchForHM."); |
| } |
| } |
| |
| m_currentNumPatchLocations++; |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS GpuContextSpecific::GetCommandBuffer( |
| PMOS_COMMAND_BUFFER comamndBuffer, |
| uint32_t flags) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_OS_CHK_NULL_RETURN(comamndBuffer); |
| MOS_OS_CHK_NULL_RETURN(m_cmdBufMgr); |
| MOS_OS_CHK_NULL_RETURN(m_commandBuffer); |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| CommandBuffer* cmdBuf = nullptr; |
| |
| uint32_t secondaryIdx = flags; |
| bool isPrimaryCmdBuffer = (secondaryIdx == 0); |
| bool hasSecondaryCmdBuffer = (!isPrimaryCmdBuffer && |
| (m_secondaryCmdBufs.count(secondaryIdx) != 0)); |
| |
| bool needToAlloc = ((isPrimaryCmdBuffer && m_cmdBufFlushed) || |
| (!isPrimaryCmdBuffer && !hasSecondaryCmdBuffer)); |
| |
| if (needToAlloc) |
| { |
| MOS_LockMutex(m_cmdBufPoolMutex); |
| if (m_cmdBufPool.size() < MAX_CMD_BUF_NUM) |
| { |
| cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize); |
| if (cmdBuf == nullptr) |
| { |
| MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer."); |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS) |
| { |
| MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext."); |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| return eStatus; |
| } |
| m_cmdBufPool.push_back(cmdBuf); |
| } |
| else if (m_cmdBufPool.size() == MAX_CMD_BUF_NUM && m_nextFetchIndex < m_cmdBufPool.size()) |
| { |
| auto cmdBufOld = m_cmdBufPool[m_nextFetchIndex]; |
| auto cmdBufSpecificOld = static_cast<CommandBufferSpecific *>(cmdBufOld); |
| if (cmdBufSpecificOld == nullptr) |
| { |
| MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer."); |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| cmdBufSpecificOld->waitReady(); |
| cmdBufSpecificOld->UnBindToGpuContext(); |
| m_cmdBufMgr->ReleaseCmdBuf(cmdBufOld); // here just return old command buffer to available pool |
| |
| //pick up new comamnd buffer |
| cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize); |
| if (cmdBuf == nullptr) |
| { |
| MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer."); |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS) |
| { |
| MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext."); |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| return eStatus; |
| } |
| m_cmdBufPool[m_nextFetchIndex] = cmdBuf; |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Command buffer bool size exceed max."); |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| return MOS_STATUS_UNKNOWN; |
| } |
| MOS_UnlockMutex(m_cmdBufPoolMutex); |
| |
| // util now, we got new command buffer from CmdBufMgr, next step to fill in the input command buffer |
| MOS_OS_CHK_STATUS_RETURN(cmdBuf->GetResource()->ConvertToMosResource(&comamndBuffer->OsResource)); |
| comamndBuffer->pCmdBase = (uint32_t *)cmdBuf->GetLockAddr(); |
| comamndBuffer->pCmdPtr = (uint32_t *)cmdBuf->GetLockAddr(); |
| comamndBuffer->iOffset = 0; |
| comamndBuffer->iRemaining = cmdBuf->GetCmdBufSize(); |
| comamndBuffer->iCmdIndex = m_nextFetchIndex; |
| comamndBuffer->iVdboxNodeIndex = MOS_VDBOX_NODE_INVALID; |
| comamndBuffer->iVeboxNodeIndex = MOS_VEBOX_NODE_INVALID; |
| comamndBuffer->is1stLvlBB = true; |
| comamndBuffer->Attributes.pAttriVe = nullptr; |
| |
| // zero comamnd buffer |
| MOS_ZeroMemory(comamndBuffer->pCmdBase, comamndBuffer->iRemaining); |
| comamndBuffer->iSubmissionType = SUBMISSION_TYPE_SINGLE_PIPE; |
| MOS_ZeroMemory(&comamndBuffer->Attributes,sizeof(comamndBuffer->Attributes)); |
| |
| if (isPrimaryCmdBuffer) |
| { |
| // update command buffer relared filed in GPU context |
| m_cmdBufFlushed = false; |
| |
| // keep a copy in GPU context |
| MOS_SecureMemcpy(m_commandBuffer, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER)); |
| } |
| else |
| { |
| PMOS_COMMAND_BUFFER tempCmdBuf = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER)); |
| MOS_OS_CHK_NULL_RETURN(tempCmdBuf); |
| m_secondaryCmdBufs[secondaryIdx] = tempCmdBuf; |
| MOS_SecureMemcpy(tempCmdBuf, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER)); |
| } |
| |
| // Command buffers are treated as cyclical buffers, the CB after the just submitted one |
| // has the minimal fence value that we should wait |
| m_nextFetchIndex++; |
| if (m_nextFetchIndex >= MAX_CMD_BUF_NUM) |
| { |
| m_nextFetchIndex = 0; |
| } |
| } |
| else |
| { |
| // current command buffer still active, directly copy to comamndBuffer |
| if (isPrimaryCmdBuffer) |
| { |
| MOS_SecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_commandBuffer, sizeof(MOS_COMMAND_BUFFER)); |
| } |
| else |
| { |
| MOS_SecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER)); |
| } |
| } |
| |
| if (isPrimaryCmdBuffer) |
| { |
| MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_commandBuffer->OsResource, false)); |
| } |
| else |
| { |
| MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_secondaryCmdBufs[secondaryIdx]->OsResource, false)); |
| } |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| void GpuContextSpecific::ReturnCommandBuffer( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| uint32_t flags) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_OS_ASSERT(cmdBuffer); |
| MOS_OS_ASSERT(m_commandBuffer); |
| |
| bool isPrimaryCmdBuf = (flags == 0); |
| |
| if (isPrimaryCmdBuf) |
| { |
| m_commandBuffer->iOffset = cmdBuffer->iOffset; |
| m_commandBuffer->iRemaining = cmdBuffer->iRemaining; |
| m_commandBuffer->pCmdPtr = cmdBuffer->pCmdPtr; |
| m_commandBuffer->iVdboxNodeIndex = cmdBuffer->iVdboxNodeIndex; |
| m_commandBuffer->iVeboxNodeIndex = cmdBuffer->iVeboxNodeIndex; |
| } |
| else |
| { |
| uint32_t secondaryIdx = flags; |
| MOS_OS_ASSERT(m_secondaryCmdBufs.count(secondaryIdx)); |
| |
| MOS_SecureMemcpy(m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER), cmdBuffer, sizeof(MOS_COMMAND_BUFFER)); |
| } |
| } |
| |
| MOS_STATUS GpuContextSpecific::ResetCommandBuffer() |
| { |
| m_cmdBufFlushed = true; |
| auto it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| MOS_FreeMemory(it->second); |
| it++; |
| } |
| m_secondaryCmdBufs.clear(); |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS GpuContextSpecific::SetIndirectStateSize(const uint32_t size) |
| { |
| if(size < m_commandBufferSize) |
| { |
| m_IndirectHeapSize = size; |
| return MOS_STATUS_SUCCESS; |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Indirect State Size if out of boundry!"); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| |
| MOS_STATUS GpuContextSpecific::GetIndirectState( |
| uint32_t *offset, |
| uint32_t *size) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| if (offset) |
| { |
| *offset = m_commandBufferSize - m_IndirectHeapSize; |
| } |
| |
| if (size) |
| { |
| *size = m_IndirectHeapSize; |
| } |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS GpuContextSpecific::GetIndirectStatePointer( |
| uint8_t **indirectState) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_OS_CHK_NULL_RETURN(indirectState); |
| |
| *indirectState = (uint8_t *)m_commandBuffer->pCmdBase + m_commandBufferSize - m_IndirectHeapSize; |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS GpuContextSpecific::ResizeCommandBufferAndPatchList( |
| uint32_t requestedCommandBufferSize, |
| uint32_t requestedPatchListSize, |
| uint32_t flags) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| // m_commandBufferSize is used for allocate command buffer and submit command buffer, in this moment, command buffer has not allocated yet. |
| // Linux KMD requires command buffer size align to 8 bytes, or it will not execute the commands. |
| m_commandBufferSize = MOS_ALIGN_CEIL(requestedCommandBufferSize, 8); |
| |
| if (requestedPatchListSize > m_maxPatchLocationsize) |
| { |
| PPATCHLOCATIONLIST newPatchList = (PPATCHLOCATIONLIST)realloc(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * requestedPatchListSize); |
| MOS_OS_CHK_NULL_RETURN(newPatchList); |
| |
| m_patchLocationList = newPatchList; |
| |
| // now zero the extended portion |
| MOS_ZeroMemory((m_patchLocationList + m_maxPatchLocationsize), sizeof(PATCHLOCATIONLIST) * (requestedPatchListSize - m_maxPatchLocationsize)); |
| m_maxPatchLocationsize = requestedPatchListSize; |
| } |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS GpuContextSpecific::ResizeCommandBuffer(uint32_t requestedSize) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| m_commandBufferSize = requestedSize; |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| uint32_t GetVcsExecFlag(PMOS_INTERFACE osInterface, |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| MOS_GPU_NODE gpuNode) |
| { |
| if (osInterface == 0 || |
| cmdBuffer == 0) |
| { |
| MOS_OS_ASSERTMESSAGE("Input invalid(null) parameter."); |
| return I915_EXEC_DEFAULT; |
| } |
| |
| uint32_t vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1; |
| |
| if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex) |
| { |
| // That's those case when BB did not have any VDBOX# specific commands. |
| // Thus, we need to select VDBOX# here. Alternatively we can rely on KMD |
| // to make balancing for us, i.e. rely on Virtual Engine support. |
| cmdBuffer->iVdboxNodeIndex = osInterface->pfnGetVdboxNodeId(osInterface, cmdBuffer); |
| if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex) |
| { |
| cmdBuffer->iVdboxNodeIndex = (gpuNode == MOS_GPU_NODE_VIDEO)? |
| MOS_VDBOX_NODE_1: MOS_VDBOX_NODE_2; |
| } |
| } |
| |
| if (MOS_VDBOX_NODE_1 == cmdBuffer->iVdboxNodeIndex) |
| { |
| vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1; |
| } |
| else if (MOS_VDBOX_NODE_2 == cmdBuffer->iVdboxNodeIndex) |
| { |
| vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2; |
| } |
| |
| return vcsExecFlag; |
| } |
| |
| MOS_STATUS GpuContextSpecific::MapResourcesToAuxTable(mos_linux_bo *cmd_bo) |
| { |
| MOS_OS_CHK_NULL_RETURN(cmd_bo); |
| |
| OsContextSpecific *osCtx = static_cast<OsContextSpecific*>(m_osContext); |
| MOS_OS_CHK_NULL_RETURN(osCtx); |
| |
| AuxTableMgr *auxTableMgr = osCtx->GetAuxTableMgr(); |
| if (auxTableMgr) |
| { |
| // Map compress allocations to aux table if it is not mapped. |
| for (uint32_t i = 0; i < m_numAllocations; i++) |
| { |
| auto res = (PMOS_RESOURCE)m_allocationList[i].hAllocation; |
| MOS_OS_CHK_NULL_RETURN(res); |
| MOS_OS_CHK_STATUS_RETURN(auxTableMgr->MapResource(res->pGmmResInfo, res->bo)); |
| } |
| MOS_OS_CHK_STATUS_RETURN(auxTableMgr->EmitAuxTableBOList(cmd_bo)); |
| } |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS GpuContextSpecific::SubmitCommandBuffer( |
| PMOS_INTERFACE osInterface, |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| bool nullRendering) |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_START, nullptr, 0, nullptr, 0); |
| |
| MOS_OS_CHK_NULL_RETURN(osInterface); |
| PMOS_CONTEXT osContext = osInterface->pOsContext; |
| MOS_OS_CHK_NULL_RETURN(osContext); |
| MOS_OS_CHK_NULL_RETURN(cmdBuffer); |
| MOS_OS_CHK_NULL_RETURN(m_patchLocationList); |
| |
| MOS_GPU_NODE gpuNode = OSKMGetGpuNode(m_gpuContext); |
| uint32_t execFlag = gpuNode; |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| int32_t ret = 0; |
| bool scalaEnabled = false; |
| auto it = m_secondaryCmdBufs.begin(); |
| |
| // Command buffer object DRM pointer |
| m_cmdBufFlushed = true; |
| auto cmd_bo = cmdBuffer->OsResource.bo; |
| |
| // Map Resource to Aux if needed |
| MapResourcesToAuxTable(cmd_bo); |
| for(auto it : m_secondaryCmdBufs) |
| { |
| MapResourcesToAuxTable(it.second->OsResource.bo); |
| } |
| |
| if (m_secondaryCmdBufs.size() >= 2) |
| { |
| scalaEnabled = true; |
| cmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_MASTER; |
| } |
| |
| std::vector<PMOS_RESOURCE> mappedResList; |
| std::vector<MOS_LINUX_BO *> skipSyncBoList; |
| |
| // Now, the patching will be done, based on the patch list. |
| for (uint32_t patchIndex = 0; patchIndex < m_currentNumPatchLocations; patchIndex++) |
| { |
| auto currentPatch = &m_patchLocationList[patchIndex]; |
| MOS_OS_CHK_NULL_RETURN(currentPatch); |
| |
| auto tempCmdBo = currentPatch->cmdBo == nullptr ? cmd_bo : currentPatch->cmdBo; |
| |
| // Following are for Nested BB buffer, if it's nested BB, we need to ensure it's locked. |
| if (tempCmdBo != cmd_bo) |
| { |
| bool isSecondaryCmdBuf = false; |
| it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| if (it->second->OsResource.bo == tempCmdBo) |
| { |
| isSecondaryCmdBuf = true; |
| break; |
| } |
| it++; |
| } |
| |
| for(auto allocIdx = 0; allocIdx < m_numAllocations && (!isSecondaryCmdBuf); allocIdx++) |
| { |
| auto tempRes = (PMOS_RESOURCE)m_allocationList[allocIdx].hAllocation; |
| if (tempCmdBo == tempRes->bo) |
| { |
| GraphicsResource::LockParams param; |
| param.m_writeRequest = true; |
| tempRes->pGfxResource->Lock(m_osContext, param); |
| mappedResList.push_back(tempRes); |
| break; |
| } |
| } |
| } |
| |
| // This is the resource for which patching will be done |
| auto resource = (PMOS_RESOURCE)m_allocationList[currentPatch->AllocationIndex].hAllocation; |
| MOS_OS_CHK_NULL_RETURN(resource); |
| |
| // For now, we'll assume the system memory's DRM bo pointer |
| // is NULL. If nullptr is detected, then the resource has been |
| // placed inside the command buffer's indirect state area. |
| // We'll simply set alloc_bo to the command buffer's bo pointer. |
| MOS_OS_ASSERT(resource->bo); |
| |
| auto alloc_bo = (resource->bo) ? resource->bo : tempCmdBo; |
| |
| MOS_OS_CHK_STATUS_RETURN(osInterface->osCpInterface->PermeatePatchForHM( |
| tempCmdBo->virt, |
| currentPatch, |
| resource)); |
| |
| uint64_t boOffset = alloc_bo->offset64; |
| if (alloc_bo != tempCmdBo) |
| { |
| auto item_ctx = osContext->contextOffsetList.begin(); |
| for (; item_ctx != osContext->contextOffsetList.end(); item_ctx++) |
| { |
| if (item_ctx->intel_context == osContext->intel_context && item_ctx->target_bo == alloc_bo) |
| { |
| boOffset = item_ctx->offset64; |
| break; |
| } |
| } |
| } |
| |
| if (osContext->bUse64BitRelocs) |
| { |
| *((uint64_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) = |
| boOffset + currentPatch->AllocationOffset; |
| } |
| else |
| { |
| *((uint32_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) = |
| boOffset + currentPatch->AllocationOffset; |
| } |
| |
| if (scalaEnabled) |
| { |
| it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| if (it->second->OsResource.bo == tempCmdBo && |
| it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE && |
| !mos_gem_bo_is_exec_object_async(alloc_bo)) |
| { |
| skipSyncBoList.push_back(alloc_bo); |
| break; |
| } |
| it++; |
| } |
| } |
| else if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE && |
| !mos_gem_bo_is_exec_object_async(alloc_bo)) |
| { |
| skipSyncBoList.push_back(alloc_bo); |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| { |
| uint32_t evtData[] = {alloc_bo->handle, currentPatch->uiWriteOperation, currentPatch->AllocationOffset}; |
| MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_INFO, |
| evtData, sizeof(evtData), |
| &boOffset, sizeof(boOffset)); |
| } |
| #endif |
| if(mos_gem_bo_is_softpin(alloc_bo)) |
| { |
| if (alloc_bo != tempCmdBo) |
| { |
| ret = mos_bo_add_softpin_target(tempCmdBo, alloc_bo, currentPatch->uiWriteOperation); |
| } |
| } |
| else |
| { |
| // This call will patch the command buffer with the offsets of the indirect state region of the command buffer |
| ret = mos_bo_emit_reloc2( |
| tempCmdBo, // Command buffer |
| currentPatch->PatchOffset, // Offset in the command buffer |
| alloc_bo, // Allocation object for which the patch will be made. |
| currentPatch->AllocationOffset, // Offset to the indirect state |
| I915_GEM_DOMAIN_RENDER, // Read domain |
| (currentPatch->uiWriteOperation) ? I915_GEM_DOMAIN_RENDER : 0x0, // Write domain |
| boOffset); |
| } |
| |
| if (ret != 0) |
| { |
| MOS_OS_ASSERTMESSAGE("Error patching alloc_bo = 0x%x, cmd_bo = 0x%x.", |
| (uintptr_t)alloc_bo, |
| (uintptr_t)tempCmdBo); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| |
| for(auto res: mappedResList) |
| { |
| res->pGfxResource->Unlock(m_osContext); |
| } |
| mappedResList.clear(); |
| |
| if (scalaEnabled) |
| { |
| it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| //Add Batch buffer End Command |
| uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END; |
| if (MOS_FAILED(Mos_AddCommand( |
| it->second, |
| &batchBufferEndCmd, |
| sizeof(uint32_t)))) |
| { |
| MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!"); |
| return MOS_STATUS_UNKNOWN; |
| } |
| it++; |
| } |
| } |
| else |
| { |
| //Add Batch buffer End Command |
| uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END; |
| if (MOS_FAILED(Mos_AddCommand( |
| cmdBuffer, |
| &batchBufferEndCmd, |
| sizeof(uint32_t)))) |
| { |
| MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!"); |
| return MOS_STATUS_UNKNOWN; |
| } |
| } |
| |
| // Now, we can unmap the video command buffer, since we don't need CPU access anymore. |
| MOS_OS_CHK_NULL_RETURN(cmdBuffer->OsResource.pGfxResource); |
| cmdBuffer->OsResource.pGfxResource->Unlock(m_osContext); |
| |
| it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| MOS_OS_CHK_NULL_RETURN(it->second->OsResource.pGfxResource); |
| it->second->OsResource.pGfxResource->Unlock(m_osContext); |
| |
| it++; |
| } |
| |
| int32_t perfData; |
| if (osContext->pPerfData != nullptr) |
| { |
| perfData = *(int32_t *)(osContext->pPerfData); |
| } |
| else |
| { |
| perfData = 0; |
| } |
| |
| drm_clip_rect_t *cliprects = nullptr; |
| int32_t num_cliprects = 0; |
| int32_t DR4 = osContext->uEnablePerfTag ? perfData : 0; |
| |
| //Since CB2 command is not supported, remove it and set cliprects to nullprt as default. |
| if ((gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2) && |
| (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_SINGLE_PIPE_MASK)) |
| { |
| if (osContext->bKMDHasVCS2) |
| { |
| if (osContext->bPerCmdBufferBalancing && osInterface->pfnGetVdboxNodeId) |
| { |
| execFlag = GetVcsExecFlag(osInterface, cmdBuffer, gpuNode); |
| } |
| else if (gpuNode == MOS_GPU_NODE_VIDEO) |
| { |
| execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1; |
| } |
| else if (gpuNode == MOS_GPU_NODE_VIDEO2) |
| { |
| execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2; |
| } |
| else if ((gpuNode == MOS_GPU_NODE_BLT)) |
| { |
| execFlag = I915_EXEC_BLT; |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Invalid gpuNode."); |
| } |
| } |
| else |
| { |
| execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1; |
| } |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| |
| MOS_LINUX_BO *bad_cmd_bo = nullptr; |
| MOS_LINUX_BO *nop_cmd_bo = nullptr; |
| uint32_t dwComponentTag = 0; |
| uint32_t dwCallType = 0; |
| |
| //dwComponentTag 3: decode,5: vpp,6: encode |
| //dwCallType 8: PAK(CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE) |
| // 34: PREENC |
| // 5: VPP |
| dwComponentTag = (perfData & 0xF000) >> 12; |
| dwCallType = (perfData & 0xFC) >> 2; |
| |
| if (osInterface->bTriggerCodecHang && |
| (dwComponentTag == 3 || (dwComponentTag == 6 && dwCallType == 8) || |
| (dwComponentTag == 6 && dwCallType == 34) || |
| (dwComponentTag == 5 && dwCallType == 5))) |
| { |
| bad_cmd_bo = Mos_GetBadCommandBuffer_Linux(osInterface); |
| if (bad_cmd_bo) |
| { |
| ret = mos_bo_mrb_exec(bad_cmd_bo, |
| 4096, |
| nullptr, |
| 0, |
| 0, |
| execFlag); |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Mos_GetBadCommandBuffer_Linux failed!"); |
| } |
| } |
| else if (osInterface->bTriggerVPHang == true) |
| { |
| bad_cmd_bo = Mos_GetBadCommandBuffer_Linux(osInterface); |
| |
| if (bad_cmd_bo) |
| { |
| ret = mos_bo_mrb_exec(bad_cmd_bo, |
| 4096, |
| nullptr, |
| 0, |
| 0, |
| execFlag); |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Mos_GetBadCommandBuffer_Linux failed!"); |
| } |
| |
| osInterface->bTriggerVPHang = false; |
| } |
| |
| nop_cmd_bo = nullptr; |
| if (nullRendering == true) |
| { |
| nop_cmd_bo = Mos_GetNopCommandBuffer_Linux(osInterface); |
| |
| if (nop_cmd_bo) |
| { |
| ret = mos_bo_mrb_exec(nop_cmd_bo, |
| 4096, |
| nullptr, |
| 0, |
| 0, |
| execFlag); |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("Mos_GetNopCommandBuffer_Linux failed!"); |
| } |
| } |
| |
| #endif //(_DEBUG || _RELEASE_INTERNAL) |
| |
| if (gpuNode != I915_EXEC_RENDER && |
| osInterface->osCpInterface->IsTearDownHappen()) |
| { |
| // skip PAK command when CP tear down happen to avoid of GPU hang |
| // conditonal batch buffer start PoC is in progress |
| } |
| else if (nullRendering == false) |
| { |
| if (osInterface->ctxBasedScheduling && m_i915Context[0] != nullptr) |
| { |
| if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASK) |
| { |
| if (scalaEnabled && !osInterface->bGucSubmission) |
| { |
| uint32_t secondaryIndex = 0; |
| it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| if (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE) |
| { |
| if(execFlag == MOS_GPU_NODE_VE) |
| { |
| // decode excluded since init in other place |
| it->second->iSubmissionType |= (secondaryIndex << SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT); |
| secondaryIndex++; |
| } |
| } |
| |
| ret = SubmitPipeCommands(it->second, |
| it->second->OsResource.bo, |
| osContext, |
| skipSyncBoList, |
| execFlag, |
| DR4); |
| it++; |
| } |
| } |
| else if(scalaEnabled && osInterface->bGucSubmission) |
| { |
| ret = ParallelSubmitCommands(m_secondaryCmdBufs, |
| osContext, |
| execFlag, |
| DR4); |
| } |
| else |
| { |
| ret = SubmitPipeCommands(cmdBuffer, |
| cmd_bo, |
| osContext, |
| skipSyncBoList, |
| execFlag, |
| DR4); |
| } |
| } |
| else |
| { |
| ret = mos_gem_bo_context_exec2(cmd_bo, |
| m_commandBufferSize, |
| m_i915Context[0], |
| cliprects, |
| num_cliprects, |
| DR4, |
| m_i915ExecFlag, |
| nullptr); |
| } |
| } |
| else |
| { |
| ret = mos_gem_bo_context_exec2(cmd_bo, |
| m_commandBufferSize, |
| osContext->intel_context, |
| cliprects, |
| num_cliprects, |
| DR4, |
| execFlag, |
| nullptr); |
| } |
| if (ret != 0) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| } |
| } |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| MOS_OS_ASSERTMESSAGE("Command buffer submission failed!"); |
| } |
| |
| MOS_DEVULT_FuncCall(pfnUltGetCmdBuf, cmdBuffer); |
| |
| #if MOS_COMMAND_BUFFER_DUMP_SUPPORTED |
| pthread_mutex_lock(&command_dump_mutex); |
| if (osInterface->bDumpCommandBuffer) |
| { |
| if (scalaEnabled) |
| { |
| it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| mos_bo_map(it->second->OsResource.bo, 0); |
| osInterface->pfnDumpCommandBuffer(osInterface, it->second); |
| mos_bo_unmap(it->second->OsResource.bo); |
| it++; |
| } |
| } |
| else |
| { |
| mos_bo_map(cmd_bo, 0); |
| osInterface->pfnDumpCommandBuffer(osInterface, cmdBuffer); |
| mos_bo_unmap(cmd_bo); |
| } |
| } |
| pthread_mutex_unlock(&command_dump_mutex); |
| #endif // MOS_COMMAND_BUFFER_DUMP_SUPPORTED |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| if (bad_cmd_bo) |
| { |
| mos_bo_wait_rendering(bad_cmd_bo); |
| mos_bo_unreference(bad_cmd_bo); |
| } |
| if (nop_cmd_bo) |
| { |
| mos_bo_unreference(nop_cmd_bo); |
| } |
| #endif //(_DEBUG || _RELEASE_INTERNAL) |
| |
| //clear command buffer relocations to fix memory leak issue |
| mos_gem_bo_clear_relocs(cmd_bo, 0); |
| it = m_secondaryCmdBufs.begin(); |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| mos_gem_bo_clear_relocs(it->second->OsResource.bo, 0); |
| MOS_FreeMemory(it->second); |
| it++; |
| } |
| m_secondaryCmdBufs.clear(); |
| skipSyncBoList.clear(); |
| |
| // Reset resource allocation |
| m_numAllocations = 0; |
| MOS_ZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * m_maxNumAllocations); |
| m_currentNumPatchLocations = 0; |
| MOS_ZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * m_maxNumAllocations); |
| m_resCount = 0; |
| |
| MOS_ZeroMemory(m_writeModeList, sizeof(bool) * m_maxNumAllocations); |
| finish: |
| MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_END, &eStatus, sizeof(eStatus), nullptr, 0); |
| return eStatus; |
| } |
| |
| int32_t GpuContextSpecific::SubmitPipeCommands( |
| MOS_COMMAND_BUFFER *cmdBuffer, |
| MOS_LINUX_BO *cmdBo, |
| PMOS_CONTEXT osContext, |
| const std::vector<MOS_LINUX_BO *> &skipSyncBoList, |
| uint32_t execFlag, |
| int32_t dr4) |
| { |
| int32_t ret = 0; |
| int fence = -1; |
| unsigned int fenceFlag = 0; |
| |
| MOS_LINUX_CONTEXT *queue = m_i915Context[0]; |
| bool isVeboxSubmission = false; |
| |
| if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2) |
| { |
| execFlag = I915_EXEC_DEFAULT; |
| } |
| if (execFlag == MOS_GPU_NODE_VE) |
| { |
| execFlag = I915_EXEC_DEFAULT; |
| isVeboxSubmission = true; |
| } |
| |
| if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE) |
| { |
| fence = osContext->submit_fence; |
| fenceFlag = I915_EXEC_FENCE_SUBMIT; |
| int slaveIndex = (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_MASK) >> SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT; |
| if(slaveIndex < 7) |
| { |
| queue = m_i915Context[2 + slaveIndex]; //0 is for single pipe, 1 is for master, slave starts from 2 |
| } |
| else |
| { |
| MOS_OS_ASSERTMESSAGE("slaveIndex value: %s is invalid!", slaveIndex); |
| return -1; |
| } |
| |
| if (isVeboxSubmission) |
| { |
| queue = m_i915Context[cmdBuffer->iVeboxNodeIndex + 1]; |
| } |
| |
| for(auto bo: skipSyncBoList) |
| { |
| mos_bo_set_exec_object_async(cmdBo, bo); |
| } |
| } |
| |
| //Keep FE and BE0 running on same engine for VT decode |
| if((cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_ALONE) |
| || (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)) |
| { |
| if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER) |
| { |
| //Only master pipe needs fence out flag |
| fenceFlag = I915_EXEC_FENCE_OUT; |
| } |
| queue = m_i915Context[1]; |
| } |
| |
| ret = mos_gem_bo_context_exec2(cmdBo, |
| cmdBo->size, |
| queue, |
| nullptr, |
| 0, |
| dr4, |
| execFlag | fenceFlag, |
| &fence); |
| |
| if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER) |
| { |
| osContext->submit_fence = fence; |
| } |
| |
| if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE) |
| { |
| close(fence); |
| } |
| |
| return ret; |
| } |
| |
| int32_t GpuContextSpecific::ParallelSubmitCommands( |
| std::map<uint32_t, PMOS_COMMAND_BUFFER> secondaryCmdBufs, |
| PMOS_CONTEXT osContext, |
| uint32_t execFlag, |
| int32_t dr4) |
| { |
| int32_t ret = 0; |
| int fence = -1; |
| unsigned int fenceFlag = 0; |
| auto it = m_secondaryCmdBufs.begin(); |
| MOS_LINUX_BO *cmdBos[MAX_PARALLEN_CMD_BO_NUM]; |
| int numBos = 0; // exclude FE bo |
| |
| MOS_LINUX_CONTEXT *queue = m_i915Context[0]; |
| bool isVeboxSubmission = false; |
| |
| if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2) |
| { |
| execFlag = I915_EXEC_DEFAULT; |
| } |
| if (execFlag == MOS_GPU_NODE_VE) |
| { |
| execFlag = I915_EXEC_DEFAULT; |
| isVeboxSubmission = true; |
| } |
| |
| while(it != m_secondaryCmdBufs.end()) |
| { |
| if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_ALONE) |
| { |
| fenceFlag = I915_EXEC_FENCE_OUT; |
| queue = m_i915Context[0]; |
| |
| ret = mos_gem_bo_context_exec2(it->second->OsResource.bo, |
| it->second->OsResource.bo->size, |
| queue, |
| nullptr, |
| 0, |
| dr4, |
| execFlag | fenceFlag, |
| &fence); |
| |
| osContext->submit_fence = fence; |
| } |
| |
| if((it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER) |
| || (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE)) |
| { |
| cmdBos[numBos++] = it->second->OsResource.bo; |
| |
| if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE) |
| { |
| queue = m_i915Context[numBos - 1]; |
| if(-1 != fence) |
| { |
| fenceFlag = I915_EXEC_FENCE_IN; |
| } |
| |
| ret = mos_gem_bo_context_exec3(cmdBos, |
| numBos, |
| queue, |
| nullptr, |
| 0, |
| dr4, |
| execFlag | fenceFlag, |
| &fence); |
| |
| for(int i = 0; i < numBos; i++) |
| { |
| cmdBos[i] = nullptr; |
| } |
| numBos = 0; |
| |
| if(-1 != fence) |
| { |
| close(fence); |
| } |
| } |
| } |
| |
| it++; |
| } |
| |
| return ret; |
| } |
| |
| void GpuContextSpecific::IncrementGpuStatusTag() |
| { |
| m_GPUStatusTag = m_GPUStatusTag % UINT_MAX + 1; |
| if (m_GPUStatusTag == 0) |
| { |
| m_GPUStatusTag = 1; |
| } |
| } |
| |
| void GpuContextSpecific::ResetGpuContextStatus() |
| { |
| MOS_ZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE); |
| m_numAllocations = 0; |
| MOS_ZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE); |
| m_currentNumPatchLocations = 0; |
| |
| MOS_ZeroMemory(m_attachedResources, sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE); |
| m_resCount = 0; |
| |
| MOS_ZeroMemory(m_writeModeList, sizeof(bool) * ALLOCATIONLIST_SIZE); |
| |
| if ((m_cmdBufFlushed == true) && m_commandBuffer->OsResource.bo) |
| { |
| m_commandBuffer->OsResource.bo = nullptr; |
| } |
| } |
| |
| MOS_STATUS GpuContextSpecific::AllocateGPUStatusBuf() |
| { |
| MOS_OS_FUNCTION_ENTER; |
| |
| m_statusBufferMosResource = (MOS_RESOURCE_HANDLE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE)); |
| MOS_OS_CHK_NULL_RETURN(m_statusBufferMosResource); |
| |
| GraphicsResource::CreateParams params; |
| params.m_tileType = MOS_TILE_LINEAR; |
| params.m_type = MOS_GFXRES_BUFFER; |
| params.m_format = Format_Buffer; |
| params.m_width = sizeof(MOS_GPU_STATUS_DATA); |
| params.m_height = 1; |
| params.m_depth = 1; |
| params.m_arraySize = 1; |
| params.m_name = "GPU Status Buffer"; |
| |
| GraphicsResource *graphicsResource = GraphicsResource::CreateGraphicResource(GraphicsResource::osSpecificResource); |
| MOS_OS_CHK_NULL_RETURN(graphicsResource); |
| |
| MOS_OS_CHK_STATUS_RETURN(graphicsResource->Allocate(m_osContext, params)); |
| |
| GraphicsResource::LockParams lockParams; |
| lockParams.m_writeRequest = true; |
| auto gpuStatusData = (MOS_GPU_STATUS_DATA *)graphicsResource->Lock(m_osContext, lockParams); |
| if (gpuStatusData == nullptr) |
| { |
| MOS_OS_ASSERTMESSAGE("Unable to lock gpu eStatus buffer for read."); |
| graphicsResource->Free(m_osContext); |
| MOS_Delete(graphicsResource); |
| return MOS_STATUS_UNKNOWN; |
| } |
| |
| m_statusBufferResource = graphicsResource; |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| bool GpuContextSpecific::SelectEngineInstanceByUser(struct i915_engine_class_instance *engineMap, |
| uint32_t *engineNum, uint32_t userEngineInstance, MOS_GPU_NODE gpuNode) |
| { |
| uint32_t engineInstance = 0x0; |
| |
| if(gpuNode == MOS_GPU_NODE_COMPUTE) |
| { |
| engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_COMPUTE_INSTANCE_SHIFT) |
| & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum)); |
| } |
| else if(gpuNode == MOS_GPU_NODE_VE) |
| { |
| engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VEBOX_INSTANCE_SHIFT) |
| & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum)); |
| } |
| else if(gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2) |
| { |
| engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VDBOX_INSTANCE_SHIFT) |
| & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum)); |
| } |
| else |
| { |
| MOS_OS_NORMALMESSAGE("Invalid gpu node in use."); |
| } |
| |
| if(engineInstance) |
| { |
| auto unSelectIndex = 0; |
| for(auto bit = 0; bit < *engineNum; bit++) |
| { |
| if(((engineInstance >> bit) & 0x1) && (bit > unSelectIndex)) |
| { |
| engineMap[unSelectIndex].engine_class = engineMap[bit].engine_class; |
| engineMap[unSelectIndex].engine_instance = engineMap[bit].engine_instance; |
| engineMap[bit].engine_class = 0; |
| engineMap[bit].engine_instance = 0; |
| unSelectIndex++; |
| } |
| else if(((engineInstance >> bit) & 0x1) && (bit == unSelectIndex)) |
| { |
| unSelectIndex++; |
| } |
| else if(!((engineInstance >> bit) & 0x1)) |
| { |
| engineMap[bit].engine_class = 0; |
| engineMap[bit].engine_instance = 0; |
| } |
| } |
| *engineNum = unSelectIndex; |
| } |
| return engineInstance; |
| } |
| #endif |