blob: 3c2860f846d27b5f52b60bd2a6f50e501591ae4a [file] [log] [blame]
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//!
//! \file cm_hal.cpp
//! \brief HAL Layer for CM Component
//!
#include "mos_os.h"
#include "cm_hal.h"
#include "media_interfaces_cmhal.h"
#include "media_interfaces_mhw.h"
#include "cm_common.h"
#include "cm_hal_vebox.h"
#include "cm_mem.h"
#include "renderhal_platform_interface.h"
#include "cm_execution_adv.h"
#include "cm_extension_creator.h"
#define INDEX_ALIGN(index, elemperIndex, base) ((index * elemperIndex)/base + ( (index *elemperIndex % base))? 1:0)
//----------------------------------
//| CM scoreboard XY
//----------------------------------
struct CM_HAL_SCOREBOARD_XY
{
int32_t x;
int32_t y;
};
typedef CM_HAL_SCOREBOARD_XY *PCM_HAL_SCOREBOARD_XY;
//---------------------------------------
//| CM scoreboard XY with mask
//---------------------------------------
struct CM_HAL_SCOREBOARD_XY_MASK
{
int32_t x;
int32_t y;
uint8_t mask;
uint8_t resetMask;
};
typedef CM_HAL_SCOREBOARD_XY_MASK *PCM_HAL_SCOREBOARD_XY_MASK;
//------------------------------------------------------------------------------
//| CM kernel slice and subslice being assigned to (for EnqueueWithHints)
//------------------------------------------------------------------------------
struct CM_HAL_KERNEL_SLICE_SUBSLICE
{
uint32_t slice;
uint32_t subSlice;
};
typedef CM_HAL_KERNEL_SLICE_SUBSLICE *PCM_HAL_KERNEL_SLICE_SUBSLICE;
//------------------------------------------------------------------------------
//| CM kernel information for EnqueueWithHints to assign subslice
//------------------------------------------------------------------------------
struct CM_HAL_KERNEL_SUBSLICE_INFO
{
uint32_t numSubSlices;
uint32_t counter;
PCM_HAL_KERNEL_SLICE_SUBSLICE destination;
};
typedef CM_HAL_KERNEL_SUBSLICE_INFO *PCM_HAL_KERNEL_SUBSLICE_INFO;
// forward declaration
int32_t HalCm_InsertCloneKernel(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
PRENDERHAL_KRN_ALLOCATION &kernelAllocation);
extern MOS_STATUS HalCm_GetSipBinary(
PCM_HAL_STATE state);
#if MDF_COMMAND_BUFFER_DUMP
extern int32_t HalCm_InitDumpCommandBuffer(PCM_HAL_STATE state);
extern int32_t HalCm_DumpCommadBuffer(PCM_HAL_STATE state, PMOS_COMMAND_BUFFER cmdBuffer,
int offsetSurfaceState, size_t sizeOfSurfaceState);
#endif
#if MDF_CURBE_DATA_DUMP
extern int32_t HalCm_InitDumpCurbeData(PCM_HAL_STATE state);
extern int32_t HalCm_DumpCurbeData(PCM_HAL_STATE state);
#endif
#if MDF_SURFACE_CONTENT_DUMP
extern int32_t HalCm_InitSurfaceDump(PCM_HAL_STATE state);
#endif
#if MDF_SURFACE_STATE_DUMP
extern int32_t HalCm_InitDumpSurfaceState(PCM_HAL_STATE state);
extern int32_t HalCm_DumpSurfaceState(PCM_HAL_STATE state, int offsetSurfaceState, size_t sizeOfSurfaceState);
#endif
#if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
extern int32_t HalCm_InitDumpInterfaceDescriporData(PCM_HAL_STATE state);
extern int32_t HalCm_DumpInterfaceDescriptorData(PCM_HAL_STATE state);
#endif
extern uint64_t HalCm_GetTsFrequency(PMOS_INTERFACE pOsInterface);
//===============<Private Functions>============================================
//*-----------------------------------------------------------------------------
//| Purpose: Align to the next power of 2
//| Returns: Aligned data
//| Reference: http://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
//*-----------------------------------------------------------------------------
__inline uint32_t HalCm_GetPow2Aligned(uint32_t d)
{
CM_ASSERT(d > 0);
// subtract the number first
--d;
d |= d >> 1;
d |= d >> 2;
d |= d >> 4;
d |= d >> 8;
d |= d >> 16;
return ++d;
}
//*-----------------------------------------------------------------------------
//| Purpose: Checks if Task has any thread arguments
//| Returns: True if task has any thread arguments, false otherwise
//*-----------------------------------------------------------------------------
bool HalCm_GetTaskHasThreadArg(PCM_HAL_KERNEL_PARAM *kernels, uint32_t numKernels)
{
PCM_HAL_KERNEL_PARAM kernelParam;
PCM_HAL_KERNEL_ARG_PARAM argParam;
bool threadArgExists = false;
for( uint32_t krn = 0; krn < numKernels; krn++)
{
kernelParam = kernels[krn];
for(uint32_t argIndex = 0; argIndex < kernelParam->numArgs; argIndex++)
{
argParam = &kernelParam->argParams[argIndex];
if( argParam->perThread )
{
threadArgExists = true;
break;
}
}
if( threadArgExists )
break;
}
return threadArgExists;
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocate Timestamp Resource
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_AllocateTsResource(
PCM_HAL_STATE state) // [in] Pointer to CM HAL State
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t size;
PMOS_INTERFACE osInterface;
MOS_ALLOC_GFXRES_PARAMS allocParams;
MOS_LOCK_PARAMS lockFlags;
osInterface = state->osInterface;
size = state->cmHalInterface->GetTimeStampResourceSize() * state->cmDeviceParam.maxTasks;
// allocate render engine Ts Resource
MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParams.Type = MOS_GFXRES_BUFFER;
allocParams.dwBytes = size;
allocParams.Format = Format_Buffer; //used in RenderHal_OsAllocateResource_Linux
allocParams.TileType= MOS_TILE_LINEAR;
allocParams.pBufName = "TsResource";
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
osInterface,
&allocParams,
&state->renderTimeStampResource.osResource));
osInterface->pfnSkipResourceSync(&state->renderTimeStampResource.osResource);
// Lock the Resource
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.ReadOnly = 1;
lockFlags.ForceCached = true;
state->renderTimeStampResource.data = (uint8_t*)osInterface->pfnLockResource(
osInterface,
&state->renderTimeStampResource.osResource,
&lockFlags);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderTimeStampResource.data);
state->renderTimeStampResource.locked = true;
//allocated for vebox TS resource
MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParams.Type = MOS_GFXRES_BUFFER;
allocParams.dwBytes = size;
allocParams.Format = Format_Buffer; //used in RenderHal_OsAllocateResource_Linux
allocParams.TileType = MOS_TILE_LINEAR;
allocParams.pBufName = "TsResource";
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
osInterface,
&allocParams,
&state->veboxTimeStampResource.osResource));
// Lock the Resource
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.ReadOnly = 1;
lockFlags.ForceCached = true;
state->veboxTimeStampResource.data = (uint8_t*)osInterface->pfnLockResource(
osInterface,
&state->veboxTimeStampResource.osResource,
&lockFlags);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->veboxTimeStampResource.data);
state->veboxTimeStampResource.locked = true;
finish:
return eStatus;
}
//! \brief Allocate tracker resource
//! \param [in] state
//! Pointer to CM_HAL_STATE structure
//! \return MOS_STATUS
MOS_STATUS HalCm_AllocateTrackerResource(
PCM_HAL_STATE state)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
MOS_ALLOC_GFXRES_PARAMS allocParamsLinearBuffer;
MOS_LOCK_PARAMS lockFlags;
PMOS_INTERFACE osInterface;
PRENDERHAL_INTERFACE renderHal;
osInterface = state->osInterface;
renderHal = state->renderHal;
// Tracker producer for RENDER engine
renderHal->trackerProducer.Initialize(osInterface);
// Tracker resource for VeBox engine
Mos_ResetResource(&renderHal->veBoxTrackerRes.osResource);
MOS_ZeroMemory(&allocParamsLinearBuffer, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsLinearBuffer.Type = MOS_GFXRES_BUFFER;
allocParamsLinearBuffer.TileType = MOS_TILE_LINEAR;
allocParamsLinearBuffer.Format = Format_Buffer;
allocParamsLinearBuffer.dwBytes = MHW_CACHELINE_SIZE;
allocParamsLinearBuffer.pBufName = "VeboxTrackerRes";
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
osInterface,
&allocParamsLinearBuffer,
&renderHal->veBoxTrackerRes.osResource));
// Lock the Resource
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.ReadOnly = 1;
lockFlags.ForceCached = true;
renderHal->veBoxTrackerRes.data = (uint32_t*)osInterface->pfnLockResource(
osInterface,
&renderHal->veBoxTrackerRes.osResource,
&lockFlags);
CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal->veBoxTrackerRes.data);
*(renderHal->veBoxTrackerRes.data) = MemoryBlock::m_invalidTrackerId;
renderHal->veBoxTrackerRes.currentTrackerId = 1;
renderHal->veBoxTrackerRes.locked = true;
finish:
return eStatus;
}
//! \brief Initialize dynamic state heap
//! \param [in] state
//! Pointer to CM_HAL_STATE structure
//! \param [in] heapParam
//! Pointer to CM_HAL_HEAP_PARAM structure
//! \return MOS_STATUS
MOS_STATUS HalCm_InitializeDynamicStateHeaps(
PCM_HAL_STATE state,
CM_HAL_HEAP_PARAM *heapParam)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
HeapManager* dgsHeap = state->renderHal->dgsheapManager;
CM_CHK_NULL_GOTOFINISH_MOSERROR(heapParam);
dgsHeap = MOS_New(HeapManager);
CM_CHK_NULL_GOTOFINISH_MOSERROR(dgsHeap);
CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->RegisterOsInterface(state->osInterface));
dgsHeap->SetDefaultBehavior(heapParam->behaviorGSH);
CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->SetInitialHeapSize(heapParam->initialSizeGSH));
CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->SetExtendHeapSize(heapParam->extendSizeGSH));
CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->RegisterTrackerProducer(heapParam->trackerProducer));
// lock the heap in the beginning, so cpu doesn't need to wait gpu finishing occupying it to lock it again
CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->LockHeapsOnAllocate());
state->renderHal->dgsheapManager = dgsHeap;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Free Timestamp Resource
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
__inline void HalCm_FreeTsResource(
PCM_HAL_STATE state) // [in] Pointer to CM HAL State
{
PMOS_INTERFACE osInterface;
MOS_STATUS hr;
osInterface = state->osInterface;
if (!Mos_ResourceIsNull(&state->renderTimeStampResource.osResource))
{
if (state->renderTimeStampResource.locked)
{
hr = (MOS_STATUS)osInterface->pfnUnlockResource(
osInterface,
&state->renderTimeStampResource.osResource);
CM_ASSERT(hr == MOS_STATUS_SUCCESS);
}
osInterface->pfnFreeResourceWithFlag(
osInterface,
&state->renderTimeStampResource.osResource,
SURFACE_FLAG_ASSUME_NOT_IN_USE);
}
//free vebox TS resource
if (!Mos_ResourceIsNull(&state->veboxTimeStampResource.osResource))
{
if (state->veboxTimeStampResource.locked)
{
hr = (MOS_STATUS)osInterface->pfnUnlockResource(
osInterface,
&state->veboxTimeStampResource.osResource);
CM_ASSERT(hr == MOS_STATUS_SUCCESS);
}
osInterface->pfnFreeResourceWithFlag(
osInterface,
&state->veboxTimeStampResource.osResource,
SURFACE_FLAG_ASSUME_NOT_IN_USE);
}
}
//! \brief Free tracker resource
//! \param PCM_HAL_STATE state
//! [in] Pointer to CM_HAL_STATE structure
//! \return void
__inline void HalCm_FreeTrackerResources(
PCM_HAL_STATE state) // [in] Pointer to CM HAL State
{
PMOS_INTERFACE osInterface;
MOS_STATUS hr;
osInterface = state->osInterface;
if (!Mos_ResourceIsNull(&state->renderHal->veBoxTrackerRes.osResource))
{
if (state->renderHal->veBoxTrackerRes.locked)
{
hr = (MOS_STATUS)osInterface->pfnUnlockResource(
osInterface,
&state->renderHal->veBoxTrackerRes.osResource);
CM_ASSERT(hr == MOS_STATUS_SUCCESS);
}
osInterface->pfnFreeResourceWithFlag(
osInterface,
&state->renderHal->veBoxTrackerRes.osResource,
SURFACE_FLAG_ASSUME_NOT_IN_USE);
}
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocate CSR Resource
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_AllocateCSRResource(
PCM_HAL_STATE state) // [in] Pointer to CM HAL State
{
PMOS_INTERFACE osInterface = state->osInterface;
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t size;
MOS_ALLOC_GFXRES_PARAMS allocParams;
//Enable Mid-thread
state->renderHal->pfnEnableGpgpuMiddleThreadPreemption(state->renderHal);
size = CM_CSR_SURFACE_SIZE;
MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParams.Type = MOS_GFXRES_BUFFER;
allocParams.dwBytes = size;
allocParams.Format = Format_RAW; //used in VpHal_OsAllocateResource_Linux
allocParams.TileType = MOS_TILE_LINEAR;
allocParams.pBufName = "CSRResource";
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
osInterface,
&allocParams,
&state->csrResource));
osInterface->pfnSkipResourceSync(&state->csrResource);
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocate Sip Resource
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_AllocateSipResource(
PCM_HAL_STATE state) // [in] Pointer to CM HAL State
{
PMOS_INTERFACE osInterface = state->osInterface;
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t size;
MOS_ALLOC_GFXRES_PARAMS allocParams;
MOS_LOCK_PARAMS lockFlags;
size = CM_DEBUG_SURFACE_SIZE;
MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParams.Type = MOS_GFXRES_BUFFER;
allocParams.dwBytes = size;
allocParams.Format = Format_Buffer; //used in RenderHal_OsAllocateResource_Linux
allocParams.TileType = MOS_TILE_LINEAR;
allocParams.pBufName = "SipResource";
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
osInterface,
&allocParams,
&state->sipResource.osResource));
// Lock the Resource
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.ReadOnly = 1;
lockFlags.ForceCached = true;
state->sipResource.data = (uint8_t*)osInterface->pfnLockResource(
osInterface,
&state->sipResource.osResource,
&lockFlags);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->sipResource.data);
state->sipResource.locked = true;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Free CSR Resource
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
__inline void HalCm_FreeCsrResource(
PCM_HAL_STATE state) // [in] Pointer to CM HAL State
{
PMOS_INTERFACE osInterface = state->osInterface;
if (!Mos_ResourceIsNull(&state->csrResource))
{
osInterface->pfnFreeResourceWithFlag(
osInterface,
&state->csrResource,
SURFACE_FLAG_ASSUME_NOT_IN_USE);
}
}
//*-----------------------------------------------------------------------------
//| Purpose: Free Sip Resource
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
__inline void HalCm_FreeSipResource(
PCM_HAL_STATE state) // [in] Pointer to CM HAL State
{
PMOS_INTERFACE osInterface = state->osInterface;
MOS_STATUS hr = MOS_STATUS_SUCCESS;
if (!Mos_ResourceIsNull(&state->sipResource.osResource))
{
if (state->sipResource.locked)
{
hr = (MOS_STATUS)osInterface->pfnUnlockResource(
osInterface,
&state->sipResource.osResource);
CM_ASSERT(hr == MOS_STATUS_SUCCESS);
}
osInterface->pfnFreeResourceWithFlag(
osInterface,
&state->sipResource.osResource,
SURFACE_FLAG_ASSUME_NOT_IN_USE);
}
}
//*-----------------------------------------------------------------------------
//| Purpose: Sets Arg data in the buffer
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
__inline void HalCm_SetArgData(
PCM_HAL_KERNEL_ARG_PARAM argParam,
uint32_t threadIndex,
uint8_t *buffer)
{
uint8_t *dst;
uint8_t *src;
dst = buffer + argParam->payloadOffset;
src = argParam->firstValue + (threadIndex * argParam->unitSize);
MOS_SecureMemcpy(dst, argParam->unitSize, src, argParam->unitSize);
}
//*-----------------------------------------------------------------------------
//| Purpose: Get the Buffer Entry
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
__inline MOS_STATUS HalCm_GetResourceUPEntry(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle, // [in] Handle
PCM_HAL_SURFACE2D_UP_ENTRY *entryOut) // [out] Buffer Entry
{
MOS_STATUS eStatus;
PCM_HAL_SURFACE2D_UP_ENTRY entry;
eStatus = MOS_STATUS_SUCCESS;
if (handle >= state->cmDeviceParam.max2DSurfaceUPTableSize)
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
goto finish;
}
entry = &state->surf2DUPTable[handle];
if (entry->width == 0)
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("handle '%d' is not set", handle);
goto finish;
}
*entryOut = entry;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Get the Buffer Entry
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
__inline MOS_STATUS HalCm_GetBufferEntry(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle, // [in] Handle
PCM_HAL_BUFFER_ENTRY *entryOut) // [out] Buffer Entry
{
MOS_STATUS eStatus;
PCM_HAL_BUFFER_ENTRY entry;
eStatus = MOS_STATUS_SUCCESS;
if (handle >= state->cmDeviceParam.maxBufferTableSize)
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
goto finish;
}
entry = &state->bufferTable[handle];
if (entry->size == 0)
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("handle '%d' is not set", handle);
goto finish;
}
*entryOut = entry;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Get the Surface2D Entry
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
__inline MOS_STATUS HalCm_GetSurface2DEntry(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle, // [in] Handle
PCM_HAL_SURFACE2D_ENTRY *entryOut) // [out] Buffer Entry
{
MOS_STATUS eStatus;
PCM_HAL_SURFACE2D_ENTRY entry;
eStatus = MOS_STATUS_SUCCESS;
if (handle >= state->cmDeviceParam.max2DSurfaceTableSize)
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
goto finish;
}
entry = &state->umdSurf2DTable[handle];
if ((entry->width == 0)||(entry->height == 0))
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("handle '%d' is not set", handle);
goto finish;
}
*entryOut = entry;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Get the 3D Entry
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
__inline MOS_STATUS HalCm_Get3DResourceEntry(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle, // [in] Handle
PCM_HAL_3DRESOURCE_ENTRY *entryOut) // [out] Buffer Entry
{
MOS_STATUS eStatus;
PCM_HAL_3DRESOURCE_ENTRY entry;
eStatus = MOS_STATUS_SUCCESS;
if (handle >= state->cmDeviceParam.max3DSurfaceTableSize)
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
goto finish;
}
entry = &state->surf3DTable[handle];
if (Mos_ResourceIsNull(&entry->osResource))
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("3D handle '%d' is not set", handle);
goto finish;
}
*entryOut = entry;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocates and sets up Task Param memory structure
//| Return: true if enabled
//| Note: A single layer of memory is allocated to avoid fragmentation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_AllocateTables(
PCM_HAL_STATE state) // [in] Pointer to HAL CM state
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PCM_HAL_DEVICE_PARAM deviceParam;
uint8_t *pb;
uint32_t lookUpTableSize;
uint32_t samplerTableSize;
uint32_t vmeTableSize;
uint32_t sampler8x8TableSize;
uint32_t taskStatusTableSize;
uint32_t bt2DIndexTableSize;
uint32_t bt2DUPIndexTableSize;
uint32_t bt3DIndexTableSize;
uint32_t btbufferIndexTableSize;
uint32_t samplerIndexTableSize;
uint32_t vmeIndexTableSize;
uint32_t sampler8x8IndexTableSize;
uint32_t bufferTableSize;
uint32_t i2DSURFUPTableSize;
uint32_t i3DSurfTableSize;
uint32_t size;
uint32_t i2DSURFTableSize;
deviceParam = &state->cmDeviceParam;
lookUpTableSize = deviceParam->max2DSurfaceTableSize *
sizeof(CMLOOKUP_ENTRY);
i2DSURFTableSize = deviceParam->max2DSurfaceTableSize *
sizeof(CM_HAL_SURFACE2D_ENTRY);
bufferTableSize = deviceParam->maxBufferTableSize *
sizeof(CM_HAL_BUFFER_ENTRY);
i2DSURFUPTableSize = deviceParam->max2DSurfaceUPTableSize *
sizeof(CM_HAL_SURFACE2D_UP_ENTRY);
i3DSurfTableSize = deviceParam->max3DSurfaceTableSize *
sizeof(CM_HAL_3DRESOURCE_ENTRY);
samplerTableSize = deviceParam->maxSamplerTableSize *
sizeof(MHW_SAMPLER_STATE_PARAM);
sampler8x8TableSize = deviceParam->maxSampler8x8TableSize *
sizeof(CM_HAL_SAMPLER_8X8_ENTRY);
taskStatusTableSize = deviceParam->maxTasks * sizeof(char);
bt2DIndexTableSize = deviceParam->max2DSurfaceTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
bt2DUPIndexTableSize = deviceParam->max2DSurfaceUPTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
bt3DIndexTableSize = deviceParam->max3DSurfaceTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
btbufferIndexTableSize = deviceParam->maxBufferTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
samplerIndexTableSize = deviceParam->maxSamplerTableSize * sizeof(char);
sampler8x8IndexTableSize = deviceParam->maxSampler8x8TableSize * sizeof(char);
size = lookUpTableSize +
i2DSURFTableSize +
bufferTableSize +
i2DSURFUPTableSize +
i3DSurfTableSize +
samplerTableSize +
sampler8x8TableSize +
taskStatusTableSize +
bt2DIndexTableSize +
bt2DUPIndexTableSize +
bt3DIndexTableSize +
btbufferIndexTableSize +
samplerIndexTableSize +
sampler8x8IndexTableSize;
state->tableMemories = MOS_AllocAndZeroMemory(size);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->tableMemories);
pb = (uint8_t*)state->tableMemories;
state->surf2DTable = (PCMLOOKUP_ENTRY)pb;
pb += lookUpTableSize;
state->umdSurf2DTable = (PCM_HAL_SURFACE2D_ENTRY)pb;
pb += i2DSURFTableSize;
state->bufferTable = (PCM_HAL_BUFFER_ENTRY)pb;
pb += bufferTableSize;
state->surf2DUPTable = (PCM_HAL_SURFACE2D_UP_ENTRY)pb;
pb += i2DSURFUPTableSize;
state->surf3DTable = (PCM_HAL_3DRESOURCE_ENTRY)pb;
pb += i3DSurfTableSize;
state->samplerTable = (PMHW_SAMPLER_STATE_PARAM)pb;
pb += samplerTableSize;
state->sampler8x8Table = (PCM_HAL_SAMPLER_8X8_ENTRY)pb;
pb += sampler8x8TableSize;
state->taskStatusTable = (char *)pb;
pb += taskStatusTableSize;
state->bti2DIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
pb += bt2DIndexTableSize;
state->bti2DUPIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
pb += bt2DUPIndexTableSize;
state->bti3DIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
pb += bt3DIndexTableSize;
state->btiBufferIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
pb += btbufferIndexTableSize;
state->samplerIndexTable = (char *)pb;
pb += samplerIndexTableSize;
state->sampler8x8IndexTable = (char *)pb;
pb += sampler8x8IndexTableSize;
finish:
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
//| Purpose: Adds a tag to distinguish between same kernel ID
//| Used for batch buffer re-use when splitting large task into
//| smaller pieces for EnqueueWithHints
//| Using bits [48:42] from kernel ID for extra tag
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_AddKernelIDTag(
PCM_HAL_KERNEL_PARAM *pKernels,
uint32_t numKernels,
uint32_t numTasks,
uint32_t numCurrentTask)
{
uint32_t i;
uint64_t tmpNumTasks;
uint64_t tmpNumCurrentTask;
uint64_t tmpNumTasksMask;
uint64_t tmpNumCurrentTaskMask;
tmpNumTasks = numTasks;
tmpNumCurrentTask = numCurrentTask;
tmpNumTasksMask = tmpNumTasks << 45;
tmpNumCurrentTaskMask = tmpNumCurrentTask << 42;
for( i = 0; i < numKernels; ++i )
{
pKernels[i]->kernelId |= tmpNumTasksMask;
pKernels[i]->kernelId |= tmpNumCurrentTaskMask;
}
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
//| Purpose: Gets the Batch Buffer for rendering. If needed, de-allocate /
//| allocate the memory for BB
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetBatchBuffer(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t numKernels, // [in] Number of Kernels
PCM_HAL_KERNEL_PARAM *kernels, // [in] Array for kernel data
PMHW_BATCH_BUFFER *batchBufferOut) // [out] Batch Buffer Out
{
MOS_STATUS eStatus;
PMHW_BATCH_BUFFER batchBuffer = nullptr;
PRENDERHAL_INTERFACE renderHal;
int32_t size;
uint32_t i;
uint32_t j;
uint32_t k;
int32_t freeIdx;
uint64_t kernelIds[CM_MAX_KERNELS_PER_TASK];
uint64_t kernelParamsIds[CM_MAX_KERNELS_PER_TASK];
CM_HAL_BB_DIRTY_STATUS bbDirtyStatus;
PCM_HAL_BB_ARGS bbcmArgs;
eStatus = MOS_STATUS_SUCCESS;
renderHal = state->renderHal;
freeIdx = CM_INVALID_INDEX;
bbDirtyStatus = CM_HAL_BB_CLEAN;
// Align the Batch Buffer size to power of 2
size = HalCm_GetPow2Aligned(state->taskParam->batchBufferSize);
MOS_ZeroMemory(&kernelIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
MOS_ZeroMemory(&kernelParamsIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
//Sanity check for batch buffer
if (size > CM_MAX_BB_SIZE)
{
eStatus = MOS_STATUS_EXCEED_MAX_BB_SIZE;
CM_ASSERTMESSAGE("Batch Buffer Size exeeceds Max '%d'", size);
goto finish;
}
for( i = 0; i < numKernels; ++i )
{
// remove upper 16 bits used for kernel binary re-use in GSH
kernelParamsIds[i] = ((kernels[i])->kernelId << 16 ) >> 16;
}
#if CM_BATCH_BUFFER_REUSE_ENABLE
bbDirtyStatus = CM_HAL_BB_CLEAN;
for (k = 0; k < numKernels; ++k)
{
if (kernels[k]->kernelThreadSpaceParam.bbDirtyStatus == CM_HAL_BB_DIRTY)
{
bbDirtyStatus = CM_HAL_BB_DIRTY;
break;
}
}
for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
{
batchBuffer = &state->batchBuffers[i];
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
//if (!Mos_ResourceIsNull(&batchBuffer->OsResource) && (!batchBuffer->bBusy))
if (!Mos_ResourceIsNull(&batchBuffer->OsResource))
{
MOS_FillMemory(kernelIds, sizeof(uint64_t)*CM_MAX_KERNELS_PER_TASK, 0);
for (j = 0; j < numKernels; j ++)
{
kernelIds[j] = kernelParamsIds[j];
}
bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
if (RtlEqualMemory(kernelIds, bbcmArgs->kernelIds, sizeof(uint64_t)*CM_MAX_KERNELS_PER_TASK))
{
if( batchBuffer->bBusy && bbDirtyStatus == CM_HAL_BB_DIRTY )
{
bbcmArgs->latest = false;
}
else if( bbcmArgs->latest == true )
{
break;
}
}
}
}
if (i < (uint32_t)state->numBatchBuffers)
{
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
bbcmArgs->refCount ++;
batchBuffer->iCurrent = 0;
batchBuffer->dwSyncTag = 0;
batchBuffer->iRemaining = batchBuffer->iSize;
*batchBufferOut = batchBuffer;
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
#endif
for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
{
batchBuffer = &state->batchBuffers[i];
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
// No holes in the array of batch buffers
if (Mos_ResourceIsNull(&batchBuffer->OsResource))
{
freeIdx = i;
break;
}
}
if (freeIdx == CM_INVALID_INDEX)
{
for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
{
batchBuffer = &state->batchBuffers[i];
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
if (!batchBuffer->bBusy)
{
if (batchBuffer->iSize >= size)
{
batchBuffer->iCurrent = 0;
batchBuffer->iRemaining = batchBuffer->iSize;
batchBuffer->dwSyncTag = 0;
bbcmArgs->refCount = 1;
for (i = 0; i <numKernels; i ++)
{
bbcmArgs->kernelIds[i] = kernelParamsIds[i];
}
bbcmArgs->latest = true;
*batchBufferOut = batchBuffer;
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
if (freeIdx == CM_INVALID_INDEX)
{
freeIdx = i;
}
}
}
}
if (freeIdx == CM_INVALID_INDEX)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("No batch buffer available");
goto finish;
}
batchBuffer = &state->batchBuffers[freeIdx];
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
bbcmArgs->refCount = 1;
for (i = 0; i <numKernels; i ++)
{
bbcmArgs->kernelIds[i] = kernelParamsIds[i];
}
bbcmArgs->latest = true;
if (!Mos_ResourceIsNull(&batchBuffer->OsResource))
{
// Deallocate Batch Buffer
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnFreeBB(renderHal, batchBuffer));
}
// Allocate Batch Buffer
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAllocateBB(renderHal, batchBuffer, size));
*batchBufferOut = batchBuffer;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Parse the Kernel and populate the Task Param structure
//| Return: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_ParseTask(
PCM_HAL_STATE state, // [in] Pointer to HAL CM state
PCM_HAL_EXEC_TASK_PARAM execParam) // [in] Pointer to Exec Task Param
{
MOS_STATUS eStatus;
PCM_HAL_TASK_PARAM taskParam;
PCM_HAL_KERNEL_PARAM kernelParam;
uint32_t hdrSize;
uint32_t totalThreads;
uint32_t krn;
uint32_t curbeOffset;
PMHW_VFE_SCOREBOARD scoreboardParams;
uint32_t hasThreadArg;
bool nonstallingScoreboardEnable;
CM_HAL_DEPENDENCY vfeDependencyInfo;
PCM_HAL_KERNEL_THREADSPACE_PARAM kernelTSParam;
uint32_t i, j, k;
uint8_t reuseBBUpdateMask;
bool bitIsSet;
PCM_HAL_MASK_AND_RESET dependencyMask;
uint32_t uSurfaceNumber;
uint32_t uSurfaceIndex;
bool threadArgExists;
eStatus = MOS_STATUS_SUCCESS;
curbeOffset = 0;
totalThreads = 0;
taskParam = state->taskParam;
taskParam->batchBufferSize = 0;
hasThreadArg = 0;
nonstallingScoreboardEnable = true;
reuseBBUpdateMask = 0;
bitIsSet = false;
threadArgExists = false;
hdrSize = state->renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
taskParam->dependencyPattern = execParam->dependencyPattern;
taskParam->threadSpaceWidth = execParam->threadSpaceWidth;
taskParam->threadSpaceHeight = execParam->threadSpaceHeight;
taskParam->walkingPattern = execParam->walkingPattern;
taskParam->walkingParamsValid = execParam->walkingParamsValid;
taskParam->dependencyVectorsValid = execParam->dependencyVectorsValid;
if( taskParam->walkingParamsValid )
{
taskParam->walkingParams = execParam->walkingParams;
}
if( taskParam->dependencyVectorsValid )
{
taskParam->dependencyVectors = execParam->dependencyVectors;
}
taskParam->kernelDebugEnabled = (uint32_t)execParam->kernelDebugEnabled;
//GT-PIN
taskParam->surfEntryInfoArrays = execParam->surfEntryInfoArrays;
taskParam->surfacePerBT = 0;
taskParam->colorCountMinusOne = execParam->colorCountMinusOne;
taskParam->mediaWalkerGroupSelect = execParam->mediaWalkerGroupSelect;
if (execParam->threadCoordinates)
{
taskParam->threadCoordinates = execParam->threadCoordinates;
}
taskParam->dependencyMasks = execParam->dependencyMasks;
taskParam->syncBitmap = execParam->syncBitmap;
taskParam->conditionalEndBitmap = execParam->conditionalEndBitmap;
MOS_SecureMemcpy(taskParam->conditionalEndInfo, sizeof(taskParam->conditionalEndInfo), execParam->conditionalEndInfo, sizeof(execParam->conditionalEndInfo));
taskParam->numKernels = execParam->numKernels;
taskParam->taskConfig = execParam->taskConfig;
state->walkerParams.CmWalkerEnable = true;
state->renderHal->IsMDFLoad = (taskParam->taskConfig.turboBoostFlag == CM_TURBO_BOOST_ENABLE);
for (krn = 0; krn < execParam->numKernels; krn++)
{
if ((execParam->kernels[krn] == nullptr) ||
(execParam->kernelSizes[krn] == 0))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Invalid Kernel data");
goto finish;
}
kernelParam = (PCM_HAL_KERNEL_PARAM)execParam->kernels[krn];
PCM_INDIRECT_SURFACE_INFO indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
uSurfaceNumber = 0;
if (kernelParam->indirectDataParam.surfaceCount)
{
uSurfaceIndex = 0;
for (i = 0; i < kernelParam->indirectDataParam.surfaceCount; i++)
{
uSurfaceIndex = (indirectSurfaceInfo + i)->bindingTableIndex > uSurfaceIndex ? ((indirectSurfaceInfo + i)->bindingTableIndex + (indirectSurfaceInfo + i)->numBTIPerSurf - 1) : uSurfaceIndex;
uSurfaceNumber = uSurfaceNumber + (indirectSurfaceInfo + i)->numBTIPerSurf;
}
taskParam->surfacePerBT = taskParam->surfacePerBT > uSurfaceIndex ? taskParam->surfacePerBT : uSurfaceIndex;
}
uSurfaceNumber += kernelParam->numSurfaces;
taskParam->surfacePerBT = taskParam->surfacePerBT < uSurfaceNumber ?
uSurfaceNumber : taskParam->surfacePerBT;
// 26Z must be media object because by default it uses thread dependency mask
// if there is no thread payload and dependency is not WAVEFRONT26Z, check if walker can be used
if ( kernelParam->payloadSize == 0)
{
//per-kernel thread space is avaiable, and check it at first
if((kernelParam->kernelThreadSpaceParam.threadSpaceWidth != 0) &&
(kernelParam->kernelThreadSpaceParam.patternType != CM_WAVEFRONT26Z) &&
(kernelParam->kernelThreadSpaceParam.patternType != CM_WAVEFRONT26ZI) &&
(kernelParam->kernelThreadSpaceParam.threadCoordinates == nullptr))
{
kernelParam->walkerParams.cmWalkerEnable = true;
kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
}
else if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth == 0)
{
//Check per-task thread space setting
if (state->taskParam->threadCoordinates)
{
if (state->taskParam->threadCoordinates[krn] == nullptr)
{
kernelParam->walkerParams.cmWalkerEnable = true;
kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
}
}
else
{
kernelParam->walkerParams.cmWalkerEnable = true;
kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
}
}
}
//Media walker mode will be disabled if any kernel need use media object, we don't support mixed working modes
state->walkerParams.CmWalkerEnable &= kernelParam->walkerParams.cmWalkerEnable;
if (!state->walkerParams.CmWalkerEnable)
{
taskParam->batchBufferSize +=
kernelParam->numThreads * (hdrSize + MOS_MAX(kernelParam->payloadSize, 4));
}
totalThreads += kernelParam->numThreads;
}
taskParam->batchBufferSize += CM_EXTRA_BB_SPACE;
if (state->cmHalInterface->IsScoreboardParamNeeded())
{
scoreboardParams = &state->scoreboardParams;
scoreboardParams->ScoreboardMask = 0;
scoreboardParams->ScoreboardType = nonstallingScoreboardEnable;
// set VFE scoreboarding information from union of kernel dependency vectors
MOS_ZeroMemory(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY));
for (krn = 0; krn < execParam->numKernels; krn++)
{
kernelParam = execParam->kernels[krn];
kernelTSParam = &kernelParam->kernelThreadSpaceParam;
// calculate union dependency vector of all kernels with dependency
if (kernelTSParam->dependencyInfo.count || kernelTSParam->dependencyVectorsValid)
{
if (vfeDependencyInfo.count == 0)
{
if (kernelTSParam->dependencyInfo.count)
{
MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyInfo, sizeof(CM_HAL_DEPENDENCY));
}
else if (kernelTSParam->dependencyVectorsValid)
{
MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyVectors, sizeof(CM_HAL_DEPENDENCY));
}
kernelTSParam->globalDependencyMask = (1 << vfeDependencyInfo.count) - 1;
}
else
{
uint32_t count = 0;
CM_HAL_DEPENDENCY dependencyInfo;
if (kernelTSParam->dependencyVectorsValid)
{
count = kernelTSParam->dependencyVectors.count;
MOS_SecureMemcpy(&dependencyInfo.deltaX, sizeof(int32_t) * count, &kernelTSParam->dependencyVectors.deltaX, sizeof(int32_t) * count);
MOS_SecureMemcpy(&dependencyInfo.deltaY, sizeof(int32_t) * count, &kernelTSParam->dependencyVectors.deltaY, sizeof(int32_t) * count);
}
else
{
count = kernelTSParam->dependencyInfo.count;
MOS_SecureMemcpy(&dependencyInfo.deltaX, sizeof(int32_t) * count, &kernelTSParam->dependencyInfo.deltaX, sizeof(int32_t) * count);
MOS_SecureMemcpy(&dependencyInfo.deltaY, sizeof(int32_t) * count, &kernelTSParam->dependencyInfo.deltaY, sizeof(int32_t) * count);
}
for (j = 0; j < count; ++j)
{
for (k = 0; k < vfeDependencyInfo.count; ++k)
{
if ((dependencyInfo.deltaX[j] == vfeDependencyInfo.deltaX[k]) &&
(dependencyInfo.deltaY[j] == vfeDependencyInfo.deltaY[k]))
{
CM_HAL_SETBIT(kernelTSParam->globalDependencyMask, k);
break;
}
}
if (k == vfeDependencyInfo.count)
{
vfeDependencyInfo.deltaX[vfeDependencyInfo.count] = dependencyInfo.deltaX[j];
vfeDependencyInfo.deltaY[vfeDependencyInfo.count] = dependencyInfo.deltaY[j];
CM_HAL_SETBIT(kernelTSParam->globalDependencyMask, vfeDependencyInfo.count);
vfeDependencyInfo.count++;
}
}
}
}
reuseBBUpdateMask |= kernelTSParam->reuseBBUpdateMask;
}
if (vfeDependencyInfo.count > CM_HAL_MAX_DEPENDENCY_COUNT)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Union of kernel dependencies exceeds max dependency count (8)");
goto finish;
}
scoreboardParams->ScoreboardMask = (uint8_t)vfeDependencyInfo.count;
for (i = 0; i < scoreboardParams->ScoreboardMask; ++i)
{
scoreboardParams->ScoreboardDelta[i].x = vfeDependencyInfo.deltaX[i];
scoreboardParams->ScoreboardDelta[i].y = vfeDependencyInfo.deltaY[i];
}
//If no dependency defined in kernel data, then check per-task thread space setting
if (scoreboardParams->ScoreboardMask == 0)
{
if (taskParam->dependencyVectorsValid)
{
scoreboardParams->ScoreboardMask = (uint8_t)taskParam->dependencyVectors.count;
for (uint32_t i = 0; i < scoreboardParams->ScoreboardMask; ++i)
{
scoreboardParams->ScoreboardDelta[i].x = taskParam->dependencyVectors.deltaX[i];
scoreboardParams->ScoreboardDelta[i].y = taskParam->dependencyVectors.deltaY[i];
}
}
else
{
switch (taskParam->dependencyPattern)
{
case CM_NONE_DEPENDENCY:
break;
case CM_VERTICAL_WAVE:
scoreboardParams->ScoreboardMask = 1;
scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[0].y = 0;
break;
case CM_HORIZONTAL_WAVE:
scoreboardParams->ScoreboardMask = 1;
scoreboardParams->ScoreboardDelta[0].x = 0;
scoreboardParams->ScoreboardDelta[0].y = 0xF; // -1 in uint8_t:4
break;
case CM_WAVEFRONT:
scoreboardParams->ScoreboardMask = 3;
scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[0].y = 0;
scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[1].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[2].x = 0;
scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
break;
case CM_WAVEFRONT26:
scoreboardParams->ScoreboardMask = 4;
scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[0].y = 0;
scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[1].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[2].x = 0;
scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[3].x = 1;
scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
break;
case CM_WAVEFRONT26Z:
case CM_WAVEFRONT26ZIG:
scoreboardParams->ScoreboardMask = 5;
scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[0].y = 1;
scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[1].y = 0;
scoreboardParams->ScoreboardDelta[2].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[3].x = 0;
scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[4].x = 1;
scoreboardParams->ScoreboardDelta[4].y = 0xF; // -1 in uint8_t:4
break;
case CM_WAVEFRONT26ZI:
scoreboardParams->ScoreboardMask = 7;
scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[0].y = 1;
scoreboardParams->ScoreboardDelta[1].x = 0xE; // -2
scoreboardParams->ScoreboardDelta[1].y = 0;
scoreboardParams->ScoreboardDelta[2].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[2].y = 0;
scoreboardParams->ScoreboardDelta[3].x = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[4].x = 0;
scoreboardParams->ScoreboardDelta[4].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[5].x = 1;
scoreboardParams->ScoreboardDelta[5].y = 0xF; // -1 in uint8_t:4
scoreboardParams->ScoreboardDelta[6].x = 1;
scoreboardParams->ScoreboardDelta[6].y = 0;
break;
case CM_WAVEFRONT26X:
scoreboardParams->ScoreboardMask = 7;
scoreboardParams->ScoreboardDelta[0].x = 0xF;
scoreboardParams->ScoreboardDelta[0].y = 3;
scoreboardParams->ScoreboardDelta[1].x = 0xF;
scoreboardParams->ScoreboardDelta[1].y = 1;
scoreboardParams->ScoreboardDelta[2].x = 0xF;
scoreboardParams->ScoreboardDelta[2].y = 0xF;
scoreboardParams->ScoreboardDelta[3].x = 0;
scoreboardParams->ScoreboardDelta[3].y = 0xF;
scoreboardParams->ScoreboardDelta[4].x = 0;
scoreboardParams->ScoreboardDelta[4].y = 0xE;
scoreboardParams->ScoreboardDelta[5].x = 0;
scoreboardParams->ScoreboardDelta[5].y = 0xD;
scoreboardParams->ScoreboardDelta[6].x = 1;
scoreboardParams->ScoreboardDelta[6].y = 0xD;
break;
default:
taskParam->dependencyPattern = CM_NONE_DEPENDENCY;
break;
}
}
}
}
//Set size of surface binding table size
CM_SURFACE_BTI_INFO surfBTIInfo;
state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
taskParam->surfacePerBT += surfBTIInfo.normalSurfaceStart ;
// add one if kernel debugger is enabled
if (execParam->kernelDebugEnabled)
{
taskParam->surfacePerBT += CM_RESERVED_SURFACE_NUMBER_FOR_KERNEL_DEBUG;
}
//If global surface is used and current surface bt size less than the max index of reserved surfaces
//use set it as max bti size
if ((execParam->globalSurfaceUsed) && (taskParam->surfacePerBT < surfBTIInfo.reservedSurfaceEnd))
{
taskParam->surfacePerBT = CM_MAX_STATIC_SURFACE_STATES_PER_BT;
}
//Make sure surfacePerBT do not exceed CM_MAX_STATIC_SURFACE_STATES_PER_BT
taskParam->surfacePerBT = MOS_MIN(CM_MAX_STATIC_SURFACE_STATES_PER_BT, taskParam->surfacePerBT);
if( taskParam->dependencyMasks )
{
for (krn = 0; krn < execParam->numKernels; krn++)
{
kernelParam = execParam->kernels[krn];
dependencyMask = taskParam->dependencyMasks[krn];
if( dependencyMask )
{
for( i = 0; i < kernelParam->numThreads; ++i )
{
reuseBBUpdateMask |= dependencyMask[i].resetMask;
}
}
}
}
CM_HAL_CHECKBIT_IS_SET(bitIsSet, reuseBBUpdateMask, CM_NO_BATCH_BUFFER_REUSE_BIT_POS);
if( bitIsSet || reuseBBUpdateMask == 0 )
{
taskParam->reuseBBUpdateMask = 0;
}
else
{
taskParam->reuseBBUpdateMask = 1;
}
threadArgExists = HalCm_GetTaskHasThreadArg(execParam->kernels, execParam->numKernels);
// For media object with thread arg, only support up to CM_MAX_USER_THREADS (512*512) threads
// otherwise can support up to 262144 media object commands in batch buffer
if (!state->walkerParams.CmWalkerEnable) {
if (!threadArgExists)
{
if(totalThreads > CM_MAX_USER_THREADS_NO_THREADARG)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Total task threads '%d' exceeds max allowed threads '%d'",
totalThreads,
CM_MAX_USER_THREADS_NO_THREADARG);
goto finish;
}
}
else
{
if (totalThreads > CM_MAX_USER_THREADS)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Total task threads '%d' exceeds max allowed threads '%d'",
totalThreads,
CM_MAX_USER_THREADS);
goto finish;
}
}
}
taskParam->queueOption = execParam->queueOption;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Parse the Kernel and populate the Task Param structure
//| Return: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_ParseGroupTask(
PCM_HAL_STATE state, // [in] Pointer to HAL CM state
PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam) // [in] Pointer to Exec Task Param
{
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PCM_HAL_KERNEL_PARAM kernelParam = nullptr;
uint32_t uSurfaceIndex;
taskParam->surfEntryInfoArrays = execGroupParam->surEntryInfoArrays; //GT-PIN
taskParam->batchBufferSize = 0;
taskParam->kernelDebugEnabled = (uint32_t)execGroupParam->kernelDebugEnabled;
taskParam->numKernels = execGroupParam->numKernels;
taskParam->syncBitmap = execGroupParam->syncBitmap;
taskParam->conditionalEndBitmap = execGroupParam->conditionalEndBitmap;
MOS_SecureMemcpy(taskParam->conditionalEndInfo, sizeof(taskParam->conditionalEndInfo),
execGroupParam->conditionalEndInfo, sizeof(execGroupParam->conditionalEndInfo));
taskParam->taskConfig = execGroupParam->taskConfig;
MOS_SecureMemcpy(taskParam->krnExecCfg, sizeof(taskParam->krnExecCfg),
execGroupParam->krnExecCfg, sizeof(execGroupParam->krnExecCfg));
for (uint32_t krn = 0; krn < execGroupParam->numKernels; krn ++)
{
kernelParam = execGroupParam->kernels[krn];
PCM_INDIRECT_SURFACE_INFO indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
uint32_t uSurfaceNumber = 0;
if (kernelParam->indirectDataParam.surfaceCount)
{
uSurfaceIndex = 0;
for (uint32_t i = 0; i < kernelParam->indirectDataParam.surfaceCount; i++)
{
uSurfaceIndex = (indirectSurfaceInfo + i)->bindingTableIndex > uSurfaceIndex ? (indirectSurfaceInfo + i)->bindingTableIndex : uSurfaceIndex;
uSurfaceNumber++;
}
taskParam->surfacePerBT = taskParam->surfacePerBT > uSurfaceIndex ? taskParam->surfacePerBT : uSurfaceIndex;
}
uSurfaceNumber += kernelParam->numSurfaces;
taskParam->surfacePerBT = taskParam->surfacePerBT < uSurfaceNumber ?
uSurfaceNumber : taskParam->surfacePerBT;
}
CM_SURFACE_BTI_INFO surfBTIInfo;
state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
taskParam->surfacePerBT += surfBTIInfo.normalSurfaceStart ;
// add one if kernel debugger is enabled
if (execGroupParam->kernelDebugEnabled)
{
taskParam->surfacePerBT += CM_RESERVED_SURFACE_NUMBER_FOR_KERNEL_DEBUG;
}
//If global surface is used and current surface bt size less than the max index of reserved surfaces
//use set it as max bti size
if ((execGroupParam->globalSurfaceUsed) &&
(taskParam->surfacePerBT < surfBTIInfo.reservedSurfaceEnd))
{
taskParam->surfacePerBT = CM_MAX_STATIC_SURFACE_STATES_PER_BT;
}
//Make sure surfacePerBT do not exceed CM_MAX_STATIC_SURFACE_STATES_PER_BT
taskParam->surfacePerBT = MOS_MIN(CM_MAX_STATIC_SURFACE_STATES_PER_BT, taskParam->surfacePerBT);
taskParam->queueOption = execGroupParam->queueOption;
taskParam->mosVeHintParams = execGroupParam->mosVeHintParams;
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Parse the Kernel and populate the Hints Task Param structure
//| Return: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_ParseHintsTask(
PCM_HAL_STATE state, // [in] Pointer to HAL CM state
PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam)
{
MOS_STATUS eStatus;
PCM_HAL_TASK_PARAM taskParam;
PCM_HAL_KERNEL_PARAM kernelParam;
uint32_t hdrSize;
uint32_t totalThreads;
uint32_t krn;
uint32_t curbeOffset;
PMHW_VFE_SCOREBOARD scoreboardParams;
uint32_t hasThreadArg;
bool nonstallingScoreboardEnable;
bool bitIsSet;
uint8_t reuseBBUpdateMask;
bool threadArgExists;
eStatus = MOS_STATUS_SUCCESS;
krn = 0;
taskParam = state->taskParam;
nonstallingScoreboardEnable = true;
bitIsSet = false;
curbeOffset = 0;
hasThreadArg = 0;
totalThreads = 0;
reuseBBUpdateMask = 0;
threadArgExists = false;
hdrSize = state->renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
scoreboardParams = &state->scoreboardParams;
for( krn = 0; krn < execHintsParam->numKernels; ++krn )
{
if ((execHintsParam->kernels[krn] == nullptr) ||
(execHintsParam->kernelSizes[krn] == 0))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Invalid Kernel data");
goto finish;
}
// Parse the kernel Param
kernelParam = execHintsParam->kernels[krn];
// if any kernel disables non-stalling, the non-stalling will be disabled
nonstallingScoreboardEnable &= (kernelParam->cmFlags & CM_KERNEL_FLAGS_NONSTALLING_SCOREBOARD) ? true : false;
if (!state->walkerParams.CmWalkerEnable)
{
taskParam->batchBufferSize +=
kernelParam->numThreads * (hdrSize + MOS_MAX(kernelParam->payloadSize, 4));
}
totalThreads += kernelParam->numThreads;
reuseBBUpdateMask |= kernelParam->kernelThreadSpaceParam.reuseBBUpdateMask;
}
CM_HAL_CHECKBIT_IS_SET(bitIsSet, reuseBBUpdateMask, CM_NO_BATCH_BUFFER_REUSE_BIT_POS);
if( bitIsSet || reuseBBUpdateMask == 0 )
{
taskParam->reuseBBUpdateMask = 0;
}
else
{
taskParam->reuseBBUpdateMask = 1;
}
taskParam->batchBufferSize += CM_EXTRA_BB_SPACE;
scoreboardParams->ScoreboardType = nonstallingScoreboardEnable;
threadArgExists = HalCm_GetTaskHasThreadArg(execHintsParam->kernels, execHintsParam->numKernels);
if (!state->walkerParams.CmWalkerEnable) {
if (!threadArgExists)
{
if(totalThreads > CM_MAX_USER_THREADS_NO_THREADARG)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Total task threads '%d' exceeds max allowed threads '%d'",
totalThreads,
CM_MAX_USER_THREADS_NO_THREADARG);
goto finish;
}
}
else
{
if (totalThreads > CM_MAX_USER_THREADS)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Total task threads '%d' exceeds max allowed threads '%d'",
totalThreads,
CM_MAX_USER_THREADS);
goto finish;
}
}
}
taskParam->queueOption = execHintsParam->queueOption;
finish:
return eStatus;
}
/*
** check to see if kernel entry is flaged as free or it is null
** used for combining
*/
bool bIsFree( PRENDERHAL_KRN_ALLOCATION kAlloc )
{
if (kAlloc== nullptr)
{
return false;
}
else
{
if (kAlloc->dwFlags != RENDERHAL_KERNEL_ALLOCATION_FREE)
{
return false;
}
}
return true;
}
/*
** local used supporting function
** setup correct values according to input and copy kernelBinary as needed
*/
void CmLoadKernel(PCM_HAL_STATE state,
PRENDERHAL_STATE_HEAP stateHeap,
PRENDERHAL_KRN_ALLOCATION kernelAllocation,
uint32_t sync,
uint32_t count,
PRENDERHAL_KERNEL_PARAM parameters,
PCM_HAL_KERNEL_PARAM kernelParam,
MHW_KERNEL_PARAM *mhwKernelParam,
bool isCloneEntry)
{
UNUSED(state);
if (mhwKernelParam)
{
kernelAllocation->iKID = -1;
kernelAllocation->iKUID = mhwKernelParam->iKUID;
kernelAllocation->iKCID = mhwKernelParam->iKCID;
kernelAllocation->dwSync = sync;
kernelAllocation->dwCount = count & 0xFFFFFFFF; // 28 bits
kernelAllocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_USED;
kernelAllocation->Params = *parameters;
kernelAllocation->pMhwKernelParam = mhwKernelParam;
if (!isCloneEntry)
{
// Copy kernel data
// Copy MovInstruction First
MOS_SecureMemcpy(stateHeap->pIshBuffer + kernelAllocation->dwOffset,
kernelParam->movInsDataSize,
kernelParam->movInsData,
kernelParam->movInsDataSize);
// Copy Cm Kernel Binary
MOS_SecureMemcpy(stateHeap->pIshBuffer + kernelAllocation->dwOffset + kernelParam->movInsDataSize,
kernelParam->kernelBinarySize - kernelParam->movInsDataSize,
kernelParam->kernelBinary,
kernelParam->kernelBinarySize - kernelParam->movInsDataSize);
// Padding bytes dummy instructions after kernel binary to resolve page fault issue
MOS_ZeroMemory(stateHeap->pIshBuffer + kernelAllocation->dwOffset + kernelParam->kernelBinarySize, CM_KERNEL_BINARY_PADDING_SIZE);
}
}
else
{
kernelAllocation->iKID = -1;
kernelAllocation->iKUID = -1;
kernelAllocation->iKCID = -1;
kernelAllocation->dwSync = 0;
FrameTrackerTokenFlat_Clear(&kernelAllocation->trackerToken);
kernelAllocation->dwCount = 0;
kernelAllocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
kernelAllocation->pMhwKernelParam = nullptr;
kernelAllocation->cloneKernelParams.cloneKernelID = -1;
kernelAllocation->cloneKernelParams.isClone = false;
kernelAllocation->cloneKernelParams.isHeadKernel = false;
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = -1;
kernelAllocation->cloneKernelParams.referenceCount = 0;
}
}
/*
** local used supporting function
** Try to find free entry which is big enough to load kernel binary
** If we cannot find one, then return fail, so we will delete more entries
*/
int32_t CmSearchFreeSlotSize(PCM_HAL_STATE state, MHW_KERNEL_PARAM *mhwKernelParam, bool isCloneEntry)
{
PRENDERHAL_STATE_HEAP stateHeap;
PRENDERHAL_KRN_ALLOCATION kernelAllocation;
int32_t kernelAllocationID;
int32_t returnVal = -1;
int32_t neededSize;
stateHeap = state->renderHal->pStateHeap;
kernelAllocation = stateHeap->pKernelAllocation;
if (isCloneEntry)
{
neededSize = CM_64BYTE;
}
else
{
neededSize = mhwKernelParam->iSize;
}
for (kernelAllocationID = 0;
kernelAllocationID < state->kernelNumInGsh;
kernelAllocationID++, kernelAllocation++)
{
if(kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE)
{
if(state->totalKernelSize[kernelAllocationID] >= neededSize)
{
// found free slot which is big enough
return kernelAllocationID;
}
}
}
// not found
return returnVal;
}
//*-----------------------------------------------------------------------------
//| Purpose: Updates the clone entries' head kernel binary allocation IDs
//| Function is called after kernel allocations are shifted due to combining neighboring free entries
//| Return: Result of the operation
//*-----------------------------------------------------------------------------
void HalCm_UpdateCloneKernel(PCM_HAL_STATE state,
uint32_t shiftPoint,
CM_SHIFT_DIRECTION shiftDirection,
uint32_t shiftFactor)
{
PRENDERHAL_STATE_HEAP stateHeap;
PRENDERHAL_KRN_ALLOCATION kernelAllocation;
int32_t allocationID;
stateHeap = state->renderHal->pStateHeap;
kernelAllocation = stateHeap->pKernelAllocation;
for (allocationID = 0; allocationID < state->kernelNumInGsh; allocationID++, kernelAllocation++)
{
kernelAllocation = &(stateHeap->pKernelAllocation[allocationID]);
if (kernelAllocation->cloneKernelParams.isClone && ((kernelAllocation->cloneKernelParams.kernelBinaryAllocID) > (int32_t)shiftPoint))
{
if (shiftDirection == CM_SHIFT_LEFT)
{
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID + shiftFactor;
}
else
{
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID - shiftFactor;
}
}
}
}
/*
** local used supporting function
** We found free slot and load kernel to this slot. There are 3 cases (see code)
*/
int32_t CmAddCurrentKernelToFreeSlot(PCM_HAL_STATE state,
int32_t slot,
PRENDERHAL_KERNEL_PARAM parameters,
PCM_HAL_KERNEL_PARAM kernelParam,
MHW_KERNEL_PARAM *mhwKernelParam,
CM_CLONE_TYPE cloneType,
int32_t headKernelAllocationID)
{
PRENDERHAL_STATE_HEAP stateHeap;
PRENDERHAL_KRN_ALLOCATION kernelAllocation, pKernelAllocationN;
int32_t hr = CM_SUCCESS;
int32_t i;
int32_t totalSize, tmpSize, dwOffset, neededSize;
bool adjust, isCloneEntry, isHeadKernel, isCloneAsHead, adjustHeadKernelID;
uint32_t tag;
stateHeap = state->renderHal->pStateHeap;
kernelAllocation = stateHeap->pKernelAllocation;
adjustHeadKernelID = false;
switch (cloneType)
{
case CM_CLONE_ENTRY:
{
neededSize = CM_64BYTE;
isCloneEntry = true;
isHeadKernel = false;
isCloneAsHead = false;
}
break;
case CM_HEAD_KERNEL:
{
neededSize = mhwKernelParam->iSize;
isHeadKernel = true;
isCloneEntry = false;
isCloneAsHead = false;
}
break;
case CM_CLONE_AS_HEAD_KERNEL:
{
neededSize = mhwKernelParam->iSize;
isHeadKernel = true;
isCloneEntry = false;
isCloneAsHead = true;
}
break;
case CM_NO_CLONE:
{
neededSize = mhwKernelParam->iSize;
isCloneEntry = false;
isHeadKernel = false;
isCloneAsHead = false;
}
break;
default:
{
hr = CM_FAILURE;
goto finish;
}
}
// to check if we have perfect size match
if(stateHeap->pKernelAllocation[slot].iSize == neededSize)
{
adjust = false;
}
else
{
adjust = true;
}
if ((state->kernelNumInGsh < state->cmDeviceParam.maxGshKernelEntries) && adjust)
{
// we have extra entry to add
// add new entry and pump index down below
int32_t lastKernel = state->kernelNumInGsh - 1;
for(i = lastKernel; i>slot; i--)
{
kernelAllocation = &stateHeap->pKernelAllocation[i];
pKernelAllocationN = &stateHeap->pKernelAllocation[i+1];
*pKernelAllocationN = *kernelAllocation;
state->totalKernelSize[i+1] = state->totalKernelSize[i];
}
if (lastKernel > slot)
{
// update the headKernelAllocationID if it was shifted
if (headKernelAllocationID > slot)
{
headKernelAllocationID++;
adjustHeadKernelID = true;
}
}
totalSize = state->totalKernelSize[slot];
tmpSize = neededSize;
dwOffset = stateHeap->pKernelAllocation[slot].dwOffset;
// now add new one
kernelAllocation = &stateHeap->pKernelAllocation[slot];
if(state->cbbEnabled)
{
tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface,
state->osInterface->CurrentGpuContextOrdinal);
}
else
{
tag = stateHeap->dwNextTag;
}
CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
stateHeap->dwAccessCounter++;
kernelAllocation->iSize = tmpSize;
state->totalKernelSize[slot] = MOS_ALIGN_CEIL(tmpSize, 64);
// insert a new slot which is free with rest
tmpSize = MOS_ALIGN_CEIL(tmpSize, 64); // HW required 64 byte align
kernelAllocation = &stateHeap->pKernelAllocation[slot+1];
CmLoadKernel(state, stateHeap, kernelAllocation, 0, 0, parameters, kernelParam, nullptr, isCloneEntry);
kernelAllocation->dwOffset = dwOffset+tmpSize;
kernelAllocation->iSize = 0;
state->totalKernelSize[slot+1] = totalSize - tmpSize;
// added one more entry
state->kernelNumInGsh++;
kernelAllocation = &stateHeap->pKernelAllocation[slot];
if (isCloneEntry)
{
if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
{
// ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
hr = CM_FAILURE;
goto finish;
}
kernelAllocation->cloneKernelParams.dwOffsetForAllocID = dwOffset;
kernelAllocation->dwOffset = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
kernelAllocation->cloneKernelParams.isClone = true;
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
kernelAllocation->cloneKernelParams.cloneKernelID = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
// update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
}
else
{
kernelAllocation->dwOffset = dwOffset;
if (isHeadKernel)
{
kernelAllocation->cloneKernelParams.isHeadKernel = true;
if (isCloneAsHead)
{
kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
}
}
}
if (lastKernel > slot)
{
HalCm_UpdateCloneKernel(state, slot, CM_SHIFT_LEFT, 1);
if (isCloneEntry && adjustHeadKernelID)
{
// if clone entry and already adjusted head kernel ID, then adjusted again in HalCm_UpdateCloneKernel, need to do only once
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID - 1;
}
}
}
else if (state->kernelNumInGsh < state->cmDeviceParam.maxGshKernelEntries)
{
// no need to create a new entry since we have the same size
kernelAllocation = &stateHeap->pKernelAllocation[slot];
if(state->cbbEnabled)
{
tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface,
state->osInterface->CurrentGpuContextOrdinal);
}
else
{
tag = stateHeap->dwNextTag;
}
CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
stateHeap->dwAccessCounter++;
// no change for kernelAllocation->dwOffset
kernelAllocation->iSize = neededSize;
state->totalKernelSize[slot] = MOS_ALIGN_CEIL(mhwKernelParam->iSize, 64);
if (isCloneEntry)
{
if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
{
// ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
hr = CM_FAILURE;
goto finish;
}
kernelAllocation->cloneKernelParams.dwOffsetForAllocID = kernelAllocation->dwOffset;
kernelAllocation->dwOffset = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
kernelAllocation->cloneKernelParams.isClone = true;
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
kernelAllocation->cloneKernelParams.cloneKernelID = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
// update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
}
else if (isHeadKernel)
{
kernelAllocation->cloneKernelParams.isHeadKernel = true;
if (isCloneAsHead)
{
kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
}
}
}
else
{
// all slots are used, but we have one free which is big enough
// we may have fragmentation, but code is the same as above case
kernelAllocation = &stateHeap->pKernelAllocation[slot];
if(state->cbbEnabled)
{
tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface, state->osInterface->CurrentGpuContextOrdinal);
}
else
{
tag = stateHeap->dwNextTag;
}
CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
stateHeap->dwAccessCounter++;
// kernelAllocation->iTotalSize is not changed, but we have smaller actual size
// no change for kernelAllocation->dwOffset
kernelAllocation->iSize = neededSize;
if (isCloneEntry)
{
if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
{
// ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
hr = CM_FAILURE;
goto finish;
}
kernelAllocation->cloneKernelParams.dwOffsetForAllocID = kernelAllocation->dwOffset;
kernelAllocation->dwOffset = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
kernelAllocation->cloneKernelParams.isClone = true;
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
kernelAllocation->cloneKernelParams.cloneKernelID = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
// update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
}
else if (isHeadKernel)
{
kernelAllocation->cloneKernelParams.isHeadKernel = true;
if (isCloneAsHead)
{
kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
}
}
}
finish:
return hr;
}
/*----------------------------------------------------------------------------
| Name : HalCm_UnLoadKernel ( Replace RenderHal_UnloadKernel)
\---------------------------------------------------------------------------*/
int32_t HalCm_UnloadKernel(
PCM_HAL_STATE state,
PRENDERHAL_KRN_ALLOCATION kernelAllocation)
{
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PRENDERHAL_STATE_HEAP stateHeap;
int32_t hr;
//---------------------------------------
CM_CHK_NULL_GOTOFINISH_CMERROR(renderHal);
CM_CHK_NULL_GOTOFINISH_CMERROR(renderHal->pStateHeap);
CM_CHK_NULL_GOTOFINISH_CMERROR(kernelAllocation);
//---------------------------------------
hr = CM_FAILURE;
stateHeap = renderHal->pStateHeap;
if (kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE)
{
goto finish;
}
CM_CHK_CMSTATUS_GOTOFINISH(HalCm_SyncKernel(state, kernelAllocation->dwSync));
// Unload kernel
if (kernelAllocation->pMhwKernelParam)
{
kernelAllocation->pMhwKernelParam->bLoaded = 0;
}
if (kernelAllocation->cloneKernelParams.isClone)
{
if (stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.isHeadKernel)
{
if ((stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount) <= 0)
{
// ERROR
hr = CM_FAILURE;
goto finish;
}
}
else
{
// ERROR
hr = CM_FAILURE;
goto finish;
}
stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount =
stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount - 1;
// restore the dwOffset for this allocationID
kernelAllocation->dwOffset = kernelAllocation->cloneKernelParams.dwOffsetForAllocID;
}
else if (kernelAllocation->cloneKernelParams.isHeadKernel && kernelAllocation->cloneKernelParams.referenceCount != 0)
{
// ERROR, cloned kernel entries should have been selected for deletion before head kernel entry
hr = CM_FAILURE;
goto finish;
}
// Release kernel entry (Offset/size may be used for reallocation)
kernelAllocation->iKID = -1;
kernelAllocation->iKUID = -1;
kernelAllocation->iKCID = -1;
kernelAllocation->dwSync = 0;
FrameTrackerTokenFlat_Clear(&kernelAllocation->trackerToken);
kernelAllocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
kernelAllocation->dwCount = 0;
kernelAllocation->pMhwKernelParam = nullptr;
kernelAllocation->cloneKernelParams.cloneKernelID = -1;
kernelAllocation->cloneKernelParams.isClone = false;
kernelAllocation->cloneKernelParams.isHeadKernel = false;
kernelAllocation->cloneKernelParams.kernelBinaryAllocID = -1;
kernelAllocation->cloneKernelParams.referenceCount = 0;
hr = CM_SUCCESS;
finish:
return hr;
}
/*----------------------------------------------------------------------------
| Name : HalCmw_TouchKernel ( Replace RenderHal_TouchKernel)
\---------------------------------------------------------------------------*/
int32_t HalCm_TouchKernel(
PCM_HAL_STATE state,
int32_t kernelAllocationID)
{
int32_t hr = CM_SUCCESS;
PRENDERHAL_STATE_HEAP stateHeap;
PRENDERHAL_KRN_ALLOCATION kernelAllocation;
PRENDERHAL_KRN_ALLOCATION headKernelAllocation;
uint32_t tag;
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PMOS_INTERFACE osInterface = state->osInterface;
stateHeap = (renderHal) ? renderHal->pStateHeap : nullptr;
if (stateHeap == nullptr ||
stateHeap->pKernelAllocation == nullptr ||
kernelAllocationID < 0 ||
kernelAllocationID >= renderHal->StateHeapSettings.iKernelCount)
{
hr = CM_FAILURE;
goto finish;
}
// Update usage
kernelAllocation = &(stateHeap->pKernelAllocation[kernelAllocationID]);
if (kernelAllocation->dwFlags != RENDERHAL_KERNEL_ALLOCATION_FREE &&
kernelAllocation->dwFlags != RENDERHAL_KERNEL_ALLOCATION_LOCKED)
{
kernelAllocation->dwCount = stateHeap->dwAccessCounter++;
}
// Set sync tag, for deallocation control
if(state->cbbEnabled)
{
tag = osInterface->pfnGetGpuStatusTag(osInterface, osInterface->CurrentGpuContextOrdinal);
}
else
{
tag = stateHeap->dwNextTag;
}
kernelAllocation->dwSync = tag;
// if this kernel allocation is a cloned kernel, update the orig kernel sync tag and access counter
if (kernelAllocation->cloneKernelParams.isClone)
{
headKernelAllocation = &(stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID]);
if (headKernelAllocation->cloneKernelParams.referenceCount <= 0)
{
// ERROR
hr = CM_FAILURE;
goto finish;
}
headKernelAllocation->dwSync = tag;
headKernelAllocation->dwCount = stateHeap->dwAccessCounter++;
}
finish:
return hr;
}
/*
** Supporting function
** Delete oldest entry from table to free more space
** According to different cases, we will combine space with previous or next slot to get max space
*/
int32_t CmDeleteOldestKernel(PCM_HAL_STATE state, MHW_KERNEL_PARAM *mhwKernelParam)
{
PRENDERHAL_KRN_ALLOCATION kernelAllocation;
PRENDERHAL_INTERFACE renderHal = state->renderHal;;
PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
UNUSED(state);
UNUSED(mhwKernelParam);
uint32_t oldest = 0;
uint32_t lastUsed;
int32_t kernelAllocationID, searchIndex = -1, index = -1;
int32_t alignedSize, shiftOffset;
int32_t hr = CM_SUCCESS;
kernelAllocation = stateHeap->pKernelAllocation;
// Search and deallocate oldest kernel (most likely this is optimal scheduling algorithm)
kernelAllocation = stateHeap->pKernelAllocation;
for (kernelAllocationID = 0;
kernelAllocationID < state->kernelNumInGsh;
kernelAllocationID++, kernelAllocation++)
{
// Skip unused entries
// Skip kernels flagged as locked (cannot be automatically deallocated)
if (kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE ||
kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_LOCKED)
{
continue;
}
// Find kernel not used for the greater amount of time (measured in number of operations)
// Must not unload recently allocated kernels
lastUsed = (uint32_t)(stateHeap->dwAccessCounter - kernelAllocation->dwCount);
if (lastUsed > oldest)
{
searchIndex = kernelAllocationID;
oldest = lastUsed;
}
}
// Did not found any entry for deallocation, we get into a strange case!
if (searchIndex < 0)
{
CM_ASSERTMESSAGE("Failed to delete any slot from GSH. It is impossible.");
return CM_FAILURE;
}
if (stateHeap->pKernelAllocation[searchIndex].cloneKernelParams.isHeadKernel &&
(stateHeap->pKernelAllocation[searchIndex].cloneKernelParams.referenceCount != 0))
{
// ERROR, chose a head kernel for deletion but it still has clones pointing to it
return CM_FAILURE;
}
// Free kernel entry and states associated with the kernel (if any)
kernelAllocation = &stateHeap->pKernelAllocation[searchIndex];
if (HalCm_UnloadKernel(state, kernelAllocation) != CM_SUCCESS)
{
CM_ASSERTMESSAGE("Failed to load kernel - no space available in GSH.");
return CM_FAILURE;
}
// Let's check if we can merge searchIndex-1, searchIndex, searchIndex+1
index = searchIndex;
PRENDERHAL_KRN_ALLOCATION kAlloc0, kAlloc1, kAlloc2;
kAlloc0 = (index == 0)? nullptr : &stateHeap->pKernelAllocation[index-1];
kAlloc1 = &stateHeap->pKernelAllocation[index]; // free one
kAlloc2 = (index == state->cmDeviceParam.maxGshKernelEntries - 1) ? nullptr : &stateHeap->pKernelAllocation[index + 1];
if (bIsFree(kAlloc0) && bIsFree(kAlloc2))
{
// merge 3 into 1 slot and bump index after
stateHeap->pKernelAllocation[index-1].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
state->totalKernelSize[index-1] += state->totalKernelSize[index] + state->totalKernelSize[index+1];
stateHeap->pKernelAllocation[index-1].iSize = 0;
// no change for stateHeap->pKernelAllocation[index-1].dwOffset
// copy the rest
for (int32_t i = index + 2; i<state->kernelNumInGsh; i++)
{
stateHeap->pKernelAllocation[i-2] = stateHeap->pKernelAllocation[i];
state->totalKernelSize[i-2] = state->totalKernelSize[i];
}
state->kernelNumInGsh -= 2;
if ( index == 0 )
HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 2);
else
HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 2);
}
else if (bIsFree(kAlloc0))
{
// merge before and current into 1 slot
stateHeap->pKernelAllocation[index-1].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
state->totalKernelSize[index-1] += state->totalKernelSize[index];
stateHeap->pKernelAllocation[index-1].iSize = 0;
// no change for stateHeap->pKernelAllocation[index-1].dwOffset
for (int32_t i = index + 1; i<state->kernelNumInGsh; i++)
{
stateHeap->pKernelAllocation[i-1] = stateHeap->pKernelAllocation[i];
state->totalKernelSize[i-1] = state->totalKernelSize[i];
}
state->kernelNumInGsh -= 1;
if ( index == 0 )
HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 1);
else
HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 1);
}
else if (bIsFree(kAlloc2))
{
// kAlloc0 is not free, but it can be nullptr
// merge after and current into 1 slot
stateHeap->pKernelAllocation[index].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
state->totalKernelSize[index] += state->totalKernelSize[index+1];
stateHeap->pKernelAllocation[index].iSize = 0;
if (kAlloc0)
{
// get free space starting point
alignedSize = MOS_ALIGN_CEIL(kAlloc0->iSize, 64);
shiftOffset = state->totalKernelSize[index-1] - alignedSize;
state->totalKernelSize[index-1] -= shiftOffset;
// no change for stateHeap->pKernelAllocation[index-1].iSize -= 0;
state->totalKernelSize[index] += shiftOffset;
stateHeap->pKernelAllocation[index].dwOffset -= shiftOffset;
}
for (int32_t i = index + 1; i<state->kernelNumInGsh; i++)
{
stateHeap->pKernelAllocation[i] = stateHeap->pKernelAllocation[i+1];
state->totalKernelSize[i] = state->totalKernelSize[i+1];
}
state->kernelNumInGsh -= 1;
if ( index == 0 )
HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 1);
else
HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 1);
}
else
{
// no merge
stateHeap->pKernelAllocation[index].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
// no change for stateHeap->pKernelAllocation[index].iTotalSize;
stateHeap->pKernelAllocation[index].iSize = 0;
if(kAlloc0)
{
// get free space starting point
alignedSize = MOS_ALIGN_CEIL(kAlloc0->iSize, 64);
shiftOffset = state->totalKernelSize[index-1] - alignedSize;
state->totalKernelSize[index-1] -= shiftOffset;
// no change for stateHeap->pKernelAllocation[index-1].iSize -= 0;
state->totalKernelSize[index] += shiftOffset;
stateHeap->pKernelAllocation[index].dwOffset -= shiftOffset;
}
// no change for stateHeap->iNumKernels;
}
return hr;
}
/*----------------------------------------------------------------------------
| Name : HalCm_LoadKernel ( Replace RenderHal_LoadKernel)
\---------------------------------------------------------------------------*/
int32_t HalCm_LoadKernel(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
int32_t samplerCount,
PRENDERHAL_KRN_ALLOCATION &kernelAllocation)
{
PRENDERHAL_STATE_HEAP stateHeap;
PRENDERHAL_INTERFACE renderHal;
int32_t hr;
PRENDERHAL_KERNEL_PARAM parameters;
PMHW_KERNEL_PARAM mhwKernelParam;
int32_t kernelAllocationID; // Kernel allocation ID in GSH
int32_t kernelCacheID; // Kernel cache ID
int32_t kernelUniqueID; // Kernel unique ID
void *kernelPtr;
int32_t kernelSize;
int32_t searchIndex;
int32_t freeSlot;
bool isClonedKernel;
bool hasClones;
hr = CM_SUCCESS;
renderHal = state->renderHal;
stateHeap = (renderHal) ? renderHal->pStateHeap : nullptr;
kernelAllocationID = RENDERHAL_KERNEL_LOAD_FAIL;
mhwKernelParam = &(state->kernelParamsMhw);
parameters = &(state->kernelParamsRenderHal.Params);
// Validate parameters
if (stateHeap == nullptr ||
stateHeap->bIshLocked == false ||
stateHeap->pKernelAllocation == nullptr ||
kernelParam->kernelBinarySize == 0 ||
state->kernelNumInGsh > state->cmDeviceParam.maxGshKernelEntries)
{
CM_ASSERTMESSAGE("Failed to load kernel - invalid parameters.");
return CM_FAILURE;
}
isClonedKernel = kernelParam->clonedKernelParam.isClonedKernel;
hasClones = kernelParam->clonedKernelParam.hasClones;
parameters->Sampler_Count = samplerCount;
mhwKernelParam->iKUID = static_cast<int>( (kernelParam->kernelId >> 32) );
mhwKernelParam->iKCID = -1;
mhwKernelParam->pBinary = kernelParam->kernelBinary;
mhwKernelParam->iSize = kernelParam->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
// Kernel parameters
kernelPtr = mhwKernelParam->pBinary;
kernelSize = mhwKernelParam->iSize;
kernelUniqueID = mhwKernelParam->iKUID;
kernelCacheID = mhwKernelParam->iKCID;
// Check if kernel is already loaded; Search free allocation index
searchIndex = -1;
kernelAllocation = stateHeap->pKernelAllocation;
for (kernelAllocationID = 0;
kernelAllocationID < state->kernelNumInGsh;
kernelAllocationID++, kernelAllocation++)
{
if (kernelAllocation->iKUID == kernelUniqueID &&
kernelAllocation->iKCID == kernelCacheID)
{
// found match and Update kernel usage
hr = HalCm_TouchKernel(state, kernelAllocationID);
if (hr == CM_FAILURE)
{
goto finish;
}
// Increment reference counter
mhwKernelParam->bLoaded = 1;
// Record kernel allocation
kernelAllocation = &stateHeap->pKernelAllocation[kernelAllocationID];
goto finish;
}
}
if (isClonedKernel || hasClones)
{
hr = HalCm_InsertCloneKernel(state, kernelParam, kernelAllocation);
goto finish;
}
// here is the algorithm
// 1) search for free slot which is big enough to load current kerenel
// 2) if found slot, then add current kerenel
// 3) if we cannot find slot, we need to delete some entry (delete oldest first), after delete oldest entry
// we will loop over to step 1 until we get enough space.
// The algorithm won't fail except we load 1 kernel which is larger than 2MB
do
{
freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, false);
if (freeSlot >= 0)
{
// found free slot which is big enough to hold kernel
hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, parameters, kernelParam, mhwKernelParam, CM_NO_CLONE, -1);
// update GSH states stateHeap->numKernels inside add function
break;
}
else
{
if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
{
return CM_FAILURE;
}
}
} while(1);
mhwKernelParam->bLoaded = 1; // Increment reference counter
kernelAllocation = &stateHeap->pKernelAllocation[freeSlot]; // Record kernel allocation
finish:
return hr;
}
//*-----------------------------------------------------------------------------
//| Purpose: Loads cloned kernel entries and kernels with clones into free slot
//| Return: Result of the operation
//*-----------------------------------------------------------------------------
int32_t HalCm_InsertCloneKernel(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
PRENDERHAL_KRN_ALLOCATION &kernelAllocation)
{
int32_t hr = CM_SUCCESS;
int32_t kernelAllocationID; // Kernel allocation ID in GSH
uint32_t tag;
PMOS_INTERFACE osInterface = state->osInterface;
PMHW_KERNEL_PARAM mhwKernelParam = &(state->kernelParamsMhw);
int32_t freeSlot = -1;
PRENDERHAL_STATE_HEAP stateHeap = state->renderHal->pStateHeap;
kernelAllocation = state->renderHal->pStateHeap->pKernelAllocation;
for (kernelAllocationID = 0; kernelAllocationID < state->kernelNumInGsh;
kernelAllocationID++, kernelAllocation++)
{
if (kernelAllocation->cloneKernelParams.isHeadKernel)
{
if ((kernelAllocation->iKUID == kernelParam->clonedKernelParam.kernelID) || // original kernel that cloned from is already loaded as head
(kernelAllocation->cloneKernelParams.cloneKernelID == kernelParam->clonedKernelParam.kernelID) || // another clone from same original kernel is serving as the head
(kernelAllocation->cloneKernelParams.cloneKernelID == static_cast<int>(kernelParam->kernelId >> 32))) // clone is serving as the head and this is the original kernel
{
// found match, insert 64B dummy entry and set piKAID
do
{
// Before getting a free slot, update head kernel sync tag and count so head will not be selected for deletion
// then update head kernel count after inserting clone
// so that clone will be selected first for deletion (this is done in CmAddCurrentKernelToFreeSlot)
// update head kernel sync tag
if(state->cbbEnabled)
{
tag = osInterface->pfnGetGpuStatusTag(osInterface, osInterface->CurrentGpuContextOrdinal);
}
else
{
tag = state->renderHal->pStateHeap->dwNextTag;
}
kernelAllocation->dwSync = tag;
// update the head kernel count so it will not be selected for deletion
kernelAllocation->dwCount = state->renderHal->pStateHeap->dwAccessCounter++;
freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, true);
if (freeSlot >= 0)
{
// found free slot
hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
kernelParam, &(state->kernelParamsMhw), CM_CLONE_ENTRY, kernelAllocationID);
goto finish;
}
else
{
if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
{
hr = CM_FAILURE;
goto finish;
}
}
} while (1);
}
}
}
// didn't find a match, insert this kernel as the head kernel
do
{
freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, false);
if (freeSlot >= 0)
{
if (kernelParam->clonedKernelParam.isClonedKernel)
{
hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
kernelParam, &(state->kernelParamsMhw), CM_CLONE_AS_HEAD_KERNEL, -1);
}
else
{
hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
kernelParam, &(state->kernelParamsMhw), CM_HEAD_KERNEL, -1);
}
break;
}
else
{
if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
{
hr = CM_FAILURE;
goto finish;
}
}
} while (1);
finish:
if (hr == CM_SUCCESS)
{
mhwKernelParam->bLoaded = 1;
kernelAllocation = &stateHeap->pKernelAllocation[freeSlot];
}
return hr;
}
//!
//! \brief Get offset to sampler state
//! \details Get offset to sampler state in General State Heap,
//! (Cm customized version of the RenderHal function which calculates
//! the sampler offset by MDF owned parameters).
//! \param PCM_HAL_STATE state
//! [in] Pointer to CM_HAL_STATE structure
//! \param PRENDERHAL_INTERFACE renderHal
//! [in] Pointer to RenderHal Interface
//! \param int mediaID
//! [in] Media ID associated with sampler
//! \param int samplerOffset
//! [in] sampler offset from the base of current kernel's sampler heap
//! \param int samplerBTI
//! [in] sampler BTI
//! \param unsigned long *pdwSamplerOffset
//! [out] optional; offset of sampler state from GSH base
//! \return MOS_STATUS
//!
MOS_STATUS HalCm_GetSamplerOffset(
PCM_HAL_STATE state,
PRENDERHAL_INTERFACE renderHal,
int mediaID,
unsigned int samplerOffset,
unsigned int samplerBTI,
PMHW_SAMPLER_STATE_PARAM samplerParam,
uint32_t *pdwSamplerOffset)
{
unsigned int tmpSamplerOffset = renderHal->pStateHeap->pCurMediaState->pDynamicState->Sampler3D.dwOffset +
state->taskParam->samplerOffsetsByKernel[mediaID] +
samplerOffset;
if (pdwSamplerOffset != nullptr)
{
*pdwSamplerOffset = tmpSamplerOffset;
}
if (samplerParam->SamplerType == MHW_SAMPLER_TYPE_3D)
{
samplerParam->Unorm.IndirectStateOffset = MOS_ALIGN_CEIL( renderHal->pStateHeap->pCurMediaState->pDynamicState->Sampler3D.dwOffset +
state->taskParam->samplerIndirectOffsetsByKernel[mediaID] +
samplerBTI * renderHal->pHwSizes->dwSizeSamplerIndirectState,
1 << MHW_SAMPLER_INDIRECT_SHIFT);
}
return MOS_STATUS_SUCCESS;
}
//!
//! \brief Setup Interface Descriptor
//! \details Set interface descriptor, (overriding RenderHal function),
//! (Cm customized version of the RenderHal function which set
//! dwSamplerOffset and dwSamplerCount by MDF owned parameters).
//! \param PCM_HAL_STATE state
//! [in] Pointer to CM_HAL_STATE structure
//! \param PRENDERHAL_INTERFACE renderHal
//! [in] Pointer to HW interface
//! \param PRENDERHAL_MEDIA_STATE mediaState
//! [in] Pointer to media state
//! \param PRENDERHAL_KRN_ALLOCATION kernelAllocation
//! [in] Pointer to kernel allocation
//! \param PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams
//! [in] Pointer to interface descriptor parameters
//! \param PMHW_GPGPU_WALKER_PARAMS pGpGpuWalkerParams
//! [in] Pointer to gpgpu walker parameters
//! \return MOS_STATUS
//!
MOS_STATUS HalCm_SetupInterfaceDescriptor(
PCM_HAL_STATE state,
PRENDERHAL_INTERFACE renderHal,
PRENDERHAL_MEDIA_STATE mediaState,
PRENDERHAL_KRN_ALLOCATION kernelAllocation,
PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
MHW_ID_ENTRY_PARAMS params;
PRENDERHAL_STATE_HEAP stateHeap;
PRENDERHAL_DYNAMIC_STATE dynamicState;
unsigned long mediaStateOffset;
//-----------------------------------------
MHW_RENDERHAL_CHK_NULL(renderHal);
MHW_RENDERHAL_CHK_NULL(renderHal->pMhwStateHeap);
MHW_RENDERHAL_CHK_NULL(mediaState);
MHW_RENDERHAL_CHK_NULL(mediaState->pDynamicState);
MHW_RENDERHAL_CHK_NULL(interfaceDescriptorParams);
//-----------------------------------------
// Get states, params
stateHeap = renderHal->pStateHeap;
dynamicState = mediaState->pDynamicState;
mediaStateOffset = dynamicState->memoryBlock.GetOffset();
params.dwMediaIdOffset = mediaStateOffset + dynamicState->MediaID.dwOffset;
params.iMediaId = interfaceDescriptorParams->iMediaID;
params.dwKernelOffset = kernelAllocation->dwOffset;
params.dwSamplerOffset = mediaStateOffset + dynamicState->Sampler3D.dwOffset + state->taskParam->samplerOffsetsByKernel[params.iMediaId];
params.dwSamplerCount = ( state->taskParam->samplerCountsByKernel[params.iMediaId] + 3 ) / 4;
params.dwSamplerCount = (params.dwSamplerCount > 4) ? 4 : params.dwSamplerCount;
params.dwBindingTableOffset = interfaceDescriptorParams->iBindingTableID * stateHeap->iBindingTableSize;
params.iCurbeOffset = interfaceDescriptorParams->iCurbeOffset;
params.iCurbeLength = interfaceDescriptorParams->iCurbeLength;
params.bBarrierEnable = interfaceDescriptorParams->blBarrierEnable;
params.bGlobalBarrierEnable = interfaceDescriptorParams->blGlobalBarrierEnable; //It's only applied for BDW+
params.dwNumberofThreadsInGPGPUGroup = interfaceDescriptorParams->iNumberThreadsInGroup;
params.dwSharedLocalMemorySize = renderHal->pfnEncodeSLMSize(renderHal, interfaceDescriptorParams->iSLMSize);
params.iCrsThdConDataRdLn = interfaceDescriptorParams->iCrsThrdConstDataLn;
params.memoryBlock = &dynamicState->memoryBlock;
MHW_RENDERHAL_CHK_STATUS(renderHal->pMhwStateHeap->AddInterfaceDescriptorData(&params));
dynamicState->MediaID.iCurrent++;
finish:
return eStatus;
}
/*----------------------------------------------------------------------------
| Name : HalCm_AllocateMediaID replace old RenderHal_AllocateMediaID
| Don't need touch kernel since we handle this a loadKernel time
|
| Purpose : Allocates an setup Interface Descriptor for Media Pipeline
|
| Arguments : [in] renderHal - Pointer to RenderHal interface structure
| [in] kernelParam - Pointer to Kernel parameters
| [in] pKernelAllocationID - Pointer to Kernel allocation
| [in] bindingTableID - Binding table ID
| [in] curbeOffset - Curbe offset (from CURBE base)
|
| Returns : Media Interface descriptor ID
| -1 if invalid parameters
| no Interface Descriptor entry available in GSH
|
| Comments : Kernel must be preloaded
| Curbe must be allocated using pfnAllocateCurbe
| Binding Table must be allocated using pfnAllocateBindingTable
\---------------------------------------------------------------------------*/
//!
//! \brief
//! \details
//! \param PRENDERHAL_INTERFACE renderHal
//| \param PCM_HAL_KERNEL_PARAM kernelParam
//| \param PRENDERHAL_KRN_ALLOCATION kernelAllocation
//| \param int32_t bindingTableID
//| \param int32_t curbeOffset
//! \return int32_t
//!
int32_t HalCm_AllocateMediaID(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
PRENDERHAL_KRN_ALLOCATION kernelAllocation,
int32_t bindingTableID,
int32_t curbeOffset)
{
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PRENDERHAL_MEDIA_STATE curMediaState;
int32_t curbeSize, iCurbeCurrent;
int32_t interfaceDescriptor;
RENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams;
interfaceDescriptor = -1;
// Obtain pointer and validate current media state
curMediaState = renderHal->pStateHeap->pCurMediaState;
if (state->dshEnabled)
{
if (curMediaState == nullptr || (state->dshEnabled && (curMediaState->pDynamicState == nullptr)))
{
CM_ASSERTMESSAGE("Invalid Media State.");
goto finish;
}
}
else
{
if (curMediaState == nullptr)
{
CM_ASSERTMESSAGE("Invalid Media State.");
goto finish;
}
}
// Validate kernel allocation (kernel must be pre-loaded into GSH)
if (!kernelAllocation ||
kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE ||
kernelAllocation->iSize == 0)
{
CM_ASSERTMESSAGE("Error: Invalid Kernel Allocation.");
goto finish;
}
// Check Curbe allocation (CURBE_Lenght is in 256-bit count -> convert to bytes)
curbeSize = kernelParam->curbeSizePerThread;
if (state->dshEnabled)
{
iCurbeCurrent = curMediaState->pDynamicState->Curbe.iCurrent;
}
else
{
iCurbeCurrent = curMediaState->iCurbeOffset;
}
if (curbeSize <= 0)
{
// Curbe is not used by the kernel
curbeSize = curbeOffset = 0;
}
// Validate Curbe Offset (curbe must be pre-allocated)
else if ( curbeOffset < 0 || // Not allocated
(curbeOffset & 0x1F) != 0 || // Invalid alignment
(curbeOffset + curbeSize) > iCurbeCurrent) // Invalid size
{
CM_ASSERTMESSAGE("Error: Invalid Curbe Allocation.");
goto finish;
}
// Try to reuse interface descriptor (for 2nd level buffer optimizations)
// Check if ID already in use by another kernel - must use a different ID
interfaceDescriptor = renderHal->pfnGetMediaID(renderHal, curMediaState, kernelAllocation);
if (interfaceDescriptor < 0)
{
CM_ASSERTMESSAGE("Error: No Interface Descriptor available.");
goto finish;
}
interfaceDescriptorParams.iMediaID = interfaceDescriptor;
interfaceDescriptorParams.iBindingTableID = bindingTableID;
//CURBE size and offset setting
//Media w/o group: only per-thread CURBE is used, CrossThread CURBE is not used.
//Media w/ group: should follow GPGPU walker setting, there is per-thread CURBE and cross-thread CURBE. But per-thread CURBE should be ZERO, and all should be cross-thread CURBE
//GPGPU: both per-thread CURBE and cross-thread CURBE need be set.
interfaceDescriptorParams.iCurbeOffset = curbeOffset;
if ((!kernelParam->gpgpuWalkerParams.gpgpuEnabled) && (kernelParam->kernelThreadSpaceParam.groupSelect == CM_MW_GROUP_NONE) && (state->taskParam->mediaWalkerGroupSelect == CM_MW_GROUP_NONE))
{ //Media pipe without group
interfaceDescriptorParams.iCurbeLength = kernelParam->curbeSizePerThread;
interfaceDescriptorParams.iCrsThrdConstDataLn = kernelParam->crossThreadConstDataLen; //should always be 0 in this case
interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1; // This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption.
interfaceDescriptorParams.blGlobalBarrierEnable = false;
interfaceDescriptorParams.blBarrierEnable = false;
interfaceDescriptorParams.iSLMSize = 0;
}
else if ((!kernelParam->gpgpuWalkerParams.gpgpuEnabled) && ((kernelParam->kernelThreadSpaceParam.groupSelect != CM_MW_GROUP_NONE) || (state->taskParam->mediaWalkerGroupSelect != CM_MW_GROUP_NONE)))
{ //Media w/ group
interfaceDescriptorParams.iCurbeLength = 0; //No using per-thread CURBE
interfaceDescriptorParams.iCrsThrdConstDataLn = kernelParam->curbeSizePerThread; //treat all CURBE as cross-thread CURBE
interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1; // This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption.
interfaceDescriptorParams.blBarrierEnable = (kernelParam->barrierMode != CM_NO_BARRIER) ? true : false;
interfaceDescriptorParams.blGlobalBarrierEnable = (kernelParam->barrierMode == CM_GLOBAL_BARRIER) ? true : false;
interfaceDescriptorParams.iSLMSize = kernelParam->slmSize;
}
else
{ //GPGPU pipe
interfaceDescriptorParams.iCurbeLength = kernelParam->curbeSizePerThread;
interfaceDescriptorParams.iCrsThrdConstDataLn = kernelParam->crossThreadConstDataLen;
interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1;
interfaceDescriptorParams.blBarrierEnable = (kernelParam->barrierMode != CM_NO_BARRIER) ? true : false;
interfaceDescriptorParams.blGlobalBarrierEnable = (kernelParam->barrierMode == CM_GLOBAL_BARRIER) ? true : false;
interfaceDescriptorParams.iSLMSize = kernelParam->slmSize;
}
if (state->useNewSamplerHeap == true)
{
HalCm_SetupInterfaceDescriptor(state, renderHal, curMediaState, kernelAllocation, &interfaceDescriptorParams);
}
else
{
// Setup Media ID entry - this call could be HW dependent
renderHal->pfnSetupInterfaceDescriptor(
renderHal,
curMediaState,
kernelAllocation,
&interfaceDescriptorParams);
}
finish:
return interfaceDescriptor;
}
bool isRenderTarget(PCM_HAL_STATE state, uint32_t index)
{
bool readSync = false;
readSync = state->umdSurf2DTable[index].readSyncs[state->osInterface->CurrentGpuContextOrdinal];
if (readSync)
return false;
else
return true;
}
int32_t HalCm_DSH_LoadKernelArray(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM *kernelArray,
int32_t kernelCount,
PRENDERHAL_KRN_ALLOCATION *krnAllocation)
{
PRENDERHAL_INTERFACE renderHal;
PCM_HAL_KERNEL_PARAM kernel;
PMHW_STATE_HEAP_MEMORY_BLOCK memoryBlock; // Kernel memory block
int32_t totalSize; // Total size
uint32_t blockSize[CM_MAX_KERNELS_PER_TASK]; // Size of kernels to load
int32_t blockCount; // Number of kernels to load
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
int32_t hr = CM_FAILURE;
renderHal = state->renderHal;
state->criticalSectionDSH->Acquire();
do
{
blockCount = 0;
totalSize = 0;
// Obtain list of kernels already loaded, discard kernels loaded in older heaps.
// Calculate total size of kernels to be loaded, and get size of largest kernel.
for (int i = 0; i < kernelCount; i++)
{
// Find out if kernel is already allocated and loaded in ISH
kernel = kernelArray[i];
krnAllocation[i] = (PRENDERHAL_KRN_ALLOCATION)renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((kernel->kernelId >> 32)), -1);
// Kernel is allocated - check if kernel is in current ISH
if (krnAllocation[i])
{
// Check if kernel is loaded
memoryBlock = krnAllocation[i]->pMemoryBlock;
if (memoryBlock)
{
// Kernel needs to be reloaded in current heap
if (memoryBlock->pStateHeap != renderHal->pMhwStateHeap->GetISHPointer() || state->forceKernelReload) //pInstructionStateHeaps
{
renderHal->pMhwStateHeap->FreeDynamicBlockDyn(MHW_ISH_TYPE, memoryBlock);
krnAllocation[i]->pMemoryBlock = nullptr;
}
else
{
// Increment kernel usage count, used in kernel caching architecture
state->dshKernelCacheHit++;
krnAllocation[i]->dwCount++;
// Lock kernel to avoid removal while loading other kernels
krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_LOCKED;
}
}
else if (krnAllocation[i]->dwFlags == RENDERHAL_KERNEL_ALLOCATION_REMOVED)
{
// This is a kernel that was unloaded and now needs to be reloaded
// Track how many times this "cache miss" happens to determine if the
// ISH is under pressure and needs to be expanded
state->dshKernelCacheMiss++;
}
}
else
{
// Assign kernel allocation for this kernel
krnAllocation[i] = renderHal->pfnAllocateDynamicKernel(renderHal, static_cast<int>((kernel->kernelId >> 32)), -1);
CM_CHK_NULL_GOTOFINISH_MOSERROR(krnAllocation[i]);
}
// Kernel is not loaded -> add to list of kernels to be loaded
if (krnAllocation[i]->pMemoryBlock == nullptr &&
krnAllocation[i]->dwFlags != RENDERHAL_KERNEL_ALLOCATION_LOADING)
{
// Increment amount of data that needs to be loaded in ISH (kernel already registered but unloaded)
blockSize[blockCount++] = kernel->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
totalSize += kernel->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
// Flag this kernel as loading - one single kernel instance is needed, not multiple!
// If the same kernel is used multiple times, avoid multiple reservations/loads
krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_LOADING;
}
}
// Use Hit/Miss ratio to ignore eventual cache misses
// This code prevents ISH reallocation in case of eventual cache misses
while (state->dshKernelCacheHit >= HAL_CM_KERNEL_CACHE_HIT_TO_MISS_RATIO)
{
if (state->dshKernelCacheMiss > 0) state->dshKernelCacheMiss--;
state->dshKernelCacheHit -= HAL_CM_KERNEL_CACHE_HIT_TO_MISS_RATIO;
}
// Grow the kernel heap if too many kernels are being reloaded or there isn't enough room to load all kernels
if (state->dshKernelCacheMiss > HAL_CM_KERNEL_CACHE_MISS_THRESHOLD ||
renderHal->pfnRefreshDynamicKernels(renderHal, totalSize, blockSize, blockCount) != MOS_STATUS_SUCCESS)
{
renderHal->pfnExpandKernelStateHeap(renderHal, (uint32_t)totalSize);
state->dshKernelCacheHit = 0;
state->dshKernelCacheMiss = 0;
continue;
}
// blockSize/blockCount define a list of blocks that must be loaded in current ISH for the
// kernels not yet present. Pre-existing kernels are marked as bStatic to avoid being unloaded here
if (blockCount > 0)
{
// Allocate array of kernels
MHW_STATE_HEAP_DYNAMIC_ALLOC_PARAMS params;
params.piSizes = (int32_t*)blockSize;
params.iCount = blockCount;
params.dwAlignment = RENDERHAL_KERNEL_BLOCK_ALIGN;
params.bHeapAffinity = true; // heap affinity - load all kernels in the same heap
params.pHeapAffinity = renderHal->pMhwStateHeap->GetISHPointer(); // Select the active instruction heap
params.dwScratchSpace = 0;
params.bZeroAssignedMem = true;
params.bStatic = true;
params.bGrow = false;
// Try to allocate array of blocks; if it fails, we may need to clear some space or grow the heap!
memoryBlock = renderHal->pMhwStateHeap->AllocateDynamicBlockDyn(MHW_ISH_TYPE, &params);
if (!memoryBlock)
{
// Reset flags
for (int i = 0; i < kernelCount; i++)
{
if (krnAllocation[i] && krnAllocation[i]->dwFlags == RENDERHAL_KERNEL_ALLOCATION_LOADING)
{
krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_STALE;
}
}
if (renderHal->pfnRefreshDynamicKernels(renderHal, totalSize, blockSize, blockCount) != MOS_STATUS_SUCCESS)
{
renderHal->pfnExpandKernelStateHeap(renderHal, (uint32_t)totalSize);
}
continue;
}
// All blocks are allocated in ISH
// Setup kernel allocations, load kernel binaries
for (int32_t i = 0; i < kernelCount; i++)
{
// Load kernels in ISH
if (!krnAllocation[i]->pMemoryBlock)
{
PCM_HAL_KERNEL_PARAM kernelParam = kernelArray[i];
PRENDERHAL_KRN_ALLOCATION allocation = krnAllocation[i];
if (memoryBlock)
{
allocation->iKID = -1;
allocation->iKUID = static_cast<int>((kernelArray[i]->kernelId >> 32));
allocation->iKCID = -1;
FrameTrackerTokenFlat_SetProducer(&allocation->trackerToken, &renderHal->trackerProducer);
FrameTrackerTokenFlat_Merge(&allocation->trackerToken,
renderHal->currentTrackerIndex,
renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex));
allocation->dwOffset = memoryBlock->dwDataOffset;
allocation->iSize = kernelArray[i]->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
allocation->dwCount = 0;
allocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_USED;
allocation->Params = state->kernelParamsRenderHal.Params;
allocation->pMhwKernelParam = &state->kernelParamsMhw;
allocation->pMemoryBlock = memoryBlock;
// Copy kernel data
// Copy MovInstruction First
if (allocation->pMemoryBlock &&
allocation->pMemoryBlock->dwDataSize >= kernelParam->kernelBinarySize)
{
MOS_SecureMemcpy(allocation->pMemoryBlock->pDataPtr,
kernelParam->movInsDataSize,
kernelParam->movInsData,
kernelParam->movInsDataSize);
// Copy Cm Kernel Binary
MOS_SecureMemcpy(allocation->pMemoryBlock->pDataPtr + kernelParam->movInsDataSize,
kernelParam->kernelBinarySize - kernelParam->movInsDataSize,
kernelParam->kernelBinary,
kernelParam->kernelBinarySize - kernelParam->movInsDataSize);
// Padding bytes dummy instructions after kernel binary to resolve page fault issue
MOS_ZeroMemory(allocation->pMemoryBlock->pDataPtr + kernelParam->kernelBinarySize, CM_KERNEL_BINARY_PADDING_SIZE);
}
// Get next memory block returned as part of the array
memoryBlock = memoryBlock->pNext;
}
}
}
}
// Kernel load was successfull, or nothing else to load -
// Quit the kernel load loop
hr = CM_SUCCESS;
eStatus = MOS_STATUS_SUCCESS;
break;
} while (1);
finish:
if (eStatus == MOS_STATUS_SUCCESS)
{
for (int32_t i = 0; i < kernelCount; i++)
{
renderHal->pfnTouchDynamicKernel(renderHal, krnAllocation[i]);
}
}
state->criticalSectionDSH->Release();
return hr;
}
MOS_STATUS HalCm_DSH_GetDynamicStateConfiguration(
PCM_HAL_STATE state,
PRENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params,
uint32_t numKernels,
PCM_HAL_KERNEL_PARAM *kernels,
uint32_t *piCurbeOffsets)
{
PCM_HAL_KERNEL_PARAM cmKernel;
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PRENDERHAL_KRN_ALLOCATION krnAllocation;
MOS_ZeroMemory(params, sizeof(RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS));
params->iMaxMediaIDs = numKernels;
for (uint32_t i = 0; i < numKernels; i++)
{
cmKernel = kernels[i];
// get max curbe size
int32_t curbeSize = MOS_ALIGN_CEIL(cmKernel->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
int32_t curbeOffset = piCurbeOffsets[i] + curbeSize;
params->iMaxCurbeOffset = MOS_MAX(params->iMaxCurbeOffset, curbeOffset);
params->iMaxCurbeSize += curbeSize;
// get max spill size
params->iMaxSpillSize = MOS_MAX(params->iMaxSpillSize, (int32_t)cmKernel->spillSize);
// check if kernel already used - increase Max Media ID to allow BB reuse logic
krnAllocation = renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((cmKernel->kernelId >> 32)), -1);
if (krnAllocation)
{
params->iMaxMediaIDs = MOS_MAX(params->iMaxMediaIDs, krnAllocation->iKID + 1);
}
}
if (state->useNewSamplerHeap == true)
{
// Update offset to the base of first kernel and update count
// for 3D sampler, update indirect state information
unsigned int heapOffset = 0;
unsigned int sampler3DCount = 0;
MHW_SAMPLER_STATE_PARAM samplerParamMhw = {};
SamplerParam samplerParam = {};
samplerParamMhw.SamplerType = MHW_SAMPLER_TYPE_3D;
state->cmHalInterface->GetSamplerParamInfoForSamplerType(&samplerParamMhw, samplerParam);
for (unsigned int i = 0; i < numKernels; i++)
{
cmKernel = kernels[i];
std::list<SamplerParam> *sampler_heap = cmKernel->samplerHeap;
std::list<SamplerParam>::iterator iter;
heapOffset = MOS_ALIGN_CEIL(heapOffset, MHW_SAMPLER_STATE_ALIGN);
state->taskParam->samplerOffsetsByKernel[i] = heapOffset;
state->taskParam->samplerCountsByKernel[i] = sampler_heap->size();
if (sampler_heap->size() > 0)
{
heapOffset = heapOffset + sampler_heap->back().heapOffset + sampler_heap->back().size;
// 3D sampler needs indirect sampler heap, so calculates the required size
// and offset for indirect sampler heap.
unsigned int max3DCount = 0;
for (iter = sampler_heap->begin(); iter != sampler_heap->end(); ++iter)
{
if (iter->elementType == samplerParam.elementType)
{
if (iter->userDefinedBti == true)
{
max3DCount = iter->bti + 1;
}
else
{
max3DCount += 1;
}
}
}
heapOffset = MOS_ALIGN_CEIL(heapOffset, MHW_SAMPLER_STATE_ALIGN);
state->taskParam->samplerIndirectOffsetsByKernel[i] = heapOffset;
heapOffset += max3DCount * state->renderHal->pHwSizes->dwSizeSamplerIndirectState;
sampler3DCount += max3DCount;
}
}
// Temporary solution for DSH sampler heap assginment:
// Adjust sampler space for DSH, because the DSH use sampler count to
// allocate the space. However the mechanism is not correct. The sampler
// heap size is actually calculated by the maximum offset of the largest
// sampler type.
// So the offset of largest element plus the size of all of the largest
// element samplers should be equal to the maximum size. However we cannot
// do this because of the DSH's mechanism.
// To resolve this, we first let DSH allocate enough 3D samplers
// (because 3D samplers has indirect state), then just convert the rest of
// the heap to AVS. Here we only care about the size, not the correct
// number because we are going to calculate the offset by ourself.
// Since DSH allocation has some alignments inside, the actually size of the
// heap should be slightly larger, which should be OK.
samplerParamMhw.SamplerType = MHW_SAMPLER_TYPE_AVS;
state->cmHalInterface->GetSamplerParamInfoForSamplerType(&samplerParamMhw, samplerParam);
params->iMaxSamplerIndex3D = (sampler3DCount + numKernels - 1) / numKernels;
params->iMaxSamplerIndexAVS = ((heapOffset - sampler3DCount * (state->renderHal->pHwSizes->dwSizeSamplerState + state->renderHal->pHwSizes->dwSizeSamplerIndirectState)) + samplerParam.btiMultiplier * numKernels - 1) / (samplerParam.btiMultiplier * numKernels);
}
else
{
// Get total sampler count
// Initialize pointers to samplers and reset sampler index table
MOS_FillMemory(state->samplerIndexTable, state->cmDeviceParam.maxSamplerTableSize, CM_INVALID_INDEX);
params->iMaxSamplerIndex3D = CM_MAX_3D_SAMPLER_SIZE;
params->iMaxSamplerIndexAVS = CM_MAX_AVS_SAMPLER_SIZE;
params->iMaxSamplerIndexConv = 0;
params->iMaxSamplerIndexMisc = 0;
params->iMax8x8Tables = CM_MAX_AVS_SAMPLER_SIZE;
}
return MOS_STATUS_SUCCESS;
}
MOS_STATUS HalCm_DSH_UnregisterKernel(
PCM_HAL_STATE state,
uint64_t kernelId)
{
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PRENDERHAL_KRN_ALLOCATION krnAllocation = renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((kernelId >> 32)), -1);
if (krnAllocation)
{
state->criticalSectionDSH->Acquire();
renderHal->pfnUnregisterKernel(renderHal, krnAllocation);
state->criticalSectionDSH->Release();
}
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup Sampler State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupSamplerState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t mediaID,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
PRENDERHAL_INTERFACE renderHal;
PMHW_SAMPLER_STATE_PARAM samplerParam;
uint8_t *src;
uint8_t *dst;
uint32_t index;
uint32_t samplerIndex = 0;
void *sampler = nullptr;
uint32_t samplerOffset = 0;
eStatus = MOS_STATUS_SUCCESS;
CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
renderHal = state->renderHal;
if (indexParam->samplerIndexCount >= (uint32_t)renderHal->StateHeapSettings.iSamplers)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Exceeded Max samplers '%d'",
indexParam->samplerIndexCount);
goto finish;
}
// Get the Index to sampler array from the kernel data
//----------------------------------
CM_ASSERT(argParam->unitSize == sizeof(index));
//----------------------------------
src = argParam->firstValue + (threadIndex * argParam->unitSize);
index = *((uint32_t*)src);
// check to see if the data present for the sampler in the array
if (index >= state->cmDeviceParam.maxSamplerTableSize ||
!state->samplerTable[index].bInUse)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid Sampler array index '%d'", index);
goto finish;
}
// Setup samplers
samplerParam = &state->samplerTable[index];
if (state->useNewSamplerHeap == true)
{
std::list<SamplerParam>::iterator iter;
for (iter = kernelParam->samplerHeap->begin(); iter != kernelParam->samplerHeap->end(); ++iter)
{
if ((iter->samplerTableIndex == index)&&(iter->regularBti == true))
{
break;
}
}
if (iter != kernelParam->samplerHeap->end())
{
samplerIndex = iter->bti;
}
else
{
// There must be incorrect internal logic
CM_ASSERTMESSAGE( "BTI calculation error in cm_hal\n");
return MOS_STATUS_UNKNOWN;
}
HalCm_GetSamplerOffset(state, renderHal, mediaID, iter->heapOffset, iter->bti, samplerParam, &samplerOffset);
}
else
{
// Check to see if sampler is already assigned
samplerIndex = state->samplerIndexTable[index];
if ((int)samplerIndex == CM_INVALID_INDEX)
{
switch (state->samplerTable[index].ElementType)
{
case MHW_Sampler2Elements:
{
unsigned int index = 0;
index = state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements];
while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
{
index++;
}
samplerIndex = index;
state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements] = (index + 1);
break;
}
case MHW_Sampler4Elements:
{
unsigned int index = 0;
index = state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements];
while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
{
index++;
}
samplerIndex = index;
state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements] = (index + 1);
break;
}
case MHW_Sampler8Elements:
{
unsigned int index = 0;
index = state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements];
while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
{
index++;
}
samplerIndex = index;
state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements] = (index + 1);
break;
}
case MHW_Sampler64Elements:
{
unsigned int index = 0;
index = state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements];
while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
{
index += index + 2;
}
samplerIndex = index;
state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements] = (index + 2);
break;
}
case MHW_Sampler128Elements:
{
unsigned int index = 0;
index = state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements];
while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
{
index++;
}
samplerIndex = index;
state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements] = (index + 1);
break;
}
default:
CM_ASSERTMESSAGE("Invalid sampler type '%d'.", state->samplerTable[index].SamplerType);
break;
}
}
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnGetSamplerOffsetAndPtr(
renderHal,
mediaID,
samplerIndex,
samplerParam,
&samplerOffset,
&sampler));
}
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwStateHeap->AddSamplerStateData(
samplerOffset,
&(renderHal->pStateHeap->pCurMediaState->pDynamicState->memoryBlock),
samplerParam));
state->samplerIndexTable[index] = (unsigned char)samplerIndex;
// Update the Batch Buffer
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = samplerIndex;
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup Sampler State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupSamplerStateWithBTIndex(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
PCM_HAL_SAMPLER_BTI_ENTRY samplerBTIEntry,
uint32_t samplerCount,
int32_t mediaID )
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PRENDERHAL_INTERFACE renderHal;
PMHW_SAMPLER_STATE_PARAM samplerParam;
uint32_t index;
uint32_t samplerIndex;
void *sampler = nullptr;
uint32_t samplerOffset = 0;
renderHal = state->renderHal;
if (state->useNewSamplerHeap != true)
{
if (samplerCount >= (uint32_t)renderHal->StateHeapSettings.iSamplers)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Exceeded Max samplers '%d'",
samplerCount);
goto finish;
}
}
index = samplerBTIEntry[ samplerCount ].samplerIndex;
// check to see if the data present for the sampler in the array
if ( index >= state->cmDeviceParam.maxSamplerTableSize ||
!state->samplerTable[ index ].bInUse )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid Sampler array index '%d'", index );
goto finish;
}
samplerIndex = samplerBTIEntry[ samplerCount ].samplerBTI;
// Setup samplers
samplerParam = &state->samplerTable[ index ];
if (state->useNewSamplerHeap == true)
{
std::list<SamplerParam>::iterator iter;
for (iter = kernelParam->samplerHeap->begin(); iter != kernelParam->samplerHeap->end(); ++iter)
{
if ((iter->samplerTableIndex == index) && (iter->bti == samplerIndex) && (iter->userDefinedBti == true))
{
break;
}
}
if (iter == kernelParam->samplerHeap->end())
{
// There must be incorrect internal logic
CM_ASSERTMESSAGE("BTI calculation error in cm_hal\n");
return MOS_STATUS_UNKNOWN;
}
HalCm_GetSamplerOffset(state, renderHal, mediaID, iter->heapOffset, iter->bti, samplerParam, &samplerOffset);
}
else
{
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnGetSamplerOffsetAndPtr(renderHal, mediaID, samplerIndex, samplerParam, &samplerOffset, &sampler));
}
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwStateHeap->AddSamplerStateData(
samplerOffset,
&(renderHal->pStateHeap->pCurMediaState->pDynamicState->memoryBlock),
samplerParam));
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup Buffer surface State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupBufferSurfaceState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
int16_t globalSurface,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
PMOS_SURFACE mosSurface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry;
uint8_t *src;
uint8_t *dst;
uint32_t index;
uint32_t btIndex;
uint16_t memObjCtl;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
CM_SURFACE_BTI_INFO surfBTIInfo;
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
//GT-PIN
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
// Get the Index to Buffer array from the kernel data
CM_ASSERT(argParam->unitSize == sizeof(index));
//Init surfBTIInfo
state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
src = argParam->firstValue + (threadIndex * argParam->unitSize);
index = *((uint32_t*)src) & CM_SURFACE_MASK;
if (index == CM_NULL_SURFACE)
{
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
}
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
memObjCtl = state->bufferTable[index].memObjCtl;
if (!memObjCtl)
{
memObjCtl = CM_DEFAULT_CACHE_TYPE;
}
// check to see if index is valid
if (index >= state->cmDeviceParam.maxBufferTableSize ||
(state->bufferTable[index].size == 0))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid Buffer surface array index '%d'", index);
goto finish;
}
// Check to see if buffer is already assigned
btIndex = state->btiBufferIndexTable[index].BTI.regularSurfIndex;
if (btIndex == ( unsigned char )CM_INVALID_INDEX || argParam->aliasCreated == true)
{
if (globalSurface < 0)
{
btIndex = HalCm_GetFreeBindingIndex(state, indexParam, 1);
}
else
{
btIndex = globalSurface + surfBTIInfo.reservedSurfaceStart; //CM_BINDING_START_INDEX_OF_GLOBAL_SURFACE(state);
if ( btIndex >= (surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER) ) {
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Exceeded Max Global Surfaces '%d'", btIndex);
goto finish;
}
}
// Get Details of Buffer surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACEBUFFER, index, 0));
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
// override the buffer offset and size if alias is used
mosSurface = &(surface.OsSurface);
if (state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateSize)
{
mosSurface->dwWidth = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateSize;
mosSurface->dwOffset = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateOffset;
surface.rcSrc.right = mosSurface->dwWidth;
surface.rcDst.right = mosSurface->dwWidth;
}
// override the mocs value if it is set
if (state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateMOCS)
{
memObjCtl = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateMOCS;
}
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
// Set the bRenderTarget by default
surfaceParam.bRenderTarget = true;
// Setup Buffer surface
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupBufferSurfaceState(
renderHal,
&surface,
&surfaceParam,
&surfaceEntry));
// Bind the surface State
CM_ASSERT(((int32_t)btIndex) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex,
surfaceEntry));
if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
(taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
{
//GT-Pin
uint32_t dummy = 0;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
state,
indexParam,
btIndex,
surface.OsSurface,
globalSurface,
nullptr,
dummy,
surfaceParam,
CM_ARGUMENT_SURFACEBUFFER));
}
// Update index to table
state->btiBufferIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->btiBufferIndexTable[ index ].nPlaneNumber = 1;
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
else
{
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
int nEntryIndex = (int) ((uint32_t*)( state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
{
uint32_t surfaceEntries = state->btiBufferIndexTable[ index ].nPlaneNumber;
if ( globalSurface < 0 )
{
btIndex = HalCm_GetFreeBindingIndex( state, indexParam, surfaceEntries );
}
else
{
btIndex = globalSurface + surfBTIInfo.reservedSurfaceStart;
if ( btIndex >= (surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER ) )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE( "Exceeded Max Global Surfaces '%d'", btIndex );
goto finish;
}
}
// Bind the surface State
CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
// Get Offset to Current Binding Table
uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * surfaceEntries, state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * surfaceEntries );
// Update index to table
state->btiBufferIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
}
}
// Update the Batch Buffer
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = btIndex;
}
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup 3D surface State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Setup3DSurfaceState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
PRENDERHAL_INTERFACE renderHal;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
RENDERHAL_GET_SURFACE_INFO info;
uint8_t *src;
uint8_t *dst;
int32_t nSurfaceEntries;
uint32_t index;
uint32_t btIndex;
uint16_t memObjCtl;
uint32_t i;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
CM_SURFACE_BTI_INFO surfBTIInfo;
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
//GT-PIN
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
// Get the Index to 3dsurface array from the kernel data
CM_ASSERT(argParam->unitSize == sizeof(index));
src = argParam->firstValue + (threadIndex * argParam->unitSize);
index = *((uint32_t*)src) & CM_SURFACE_MASK;
if (index == CM_NULL_SURFACE)
{
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
}
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
memObjCtl = state->surf3DTable[index].memObjCtl;
if (!memObjCtl)
{
memObjCtl = CM_DEFAULT_CACHE_TYPE;
}
// check to see if the data present for the 3d surface in the array
if ((index >= state->cmDeviceParam.max3DSurfaceTableSize) ||
Mos_ResourceIsNull(&state->surf3DTable[index].osResource))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid 2D surface array index '%d'", index);
goto finish;
}
// Check to see if surface is already assigned
btIndex = state->bti3DIndexTable[index].BTI.regularSurfIndex;
if ( btIndex == ( unsigned char )CM_INVALID_INDEX )
{
uint32_t tempPlaneIndex = 0;
nSurfaceEntries = 0;
// Get Details of 3D surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE3D, index, 0));
// Setup 3D surface
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
surfaceParam.Type = renderHal->SurfaceTypeDefault;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
surfaceParam.bRenderTarget = true;
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr));
MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
state->osInterface,
&info,
&surface.OsSurface));
btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
{
*(surfaceEntries[i]->pSurface) = surface.OsSurface;
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[i]));
if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
(taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
state,
indexParam,
btIndex + i,
surface.OsSurface,
0,
surfaceEntries[i],
tempPlaneIndex,
surfaceParam,
CM_ARGUMENT_SURFACE3D));
}
}
// Update index to table
state->bti3DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->bti3DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
else
{
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
int nEntryIndex = (int)((uint32_t*)( state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
{
nSurfaceEntries = state->bti3DIndexTable[ index ].nPlaneNumber;
btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
// Bind the surface State
CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
// Get Offset to Current Binding Table
uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
// Update index to table
state->bti3DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
}
}
// Update the Batch Buffer
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = btIndex;
}
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
/*----------------------------------------------------------------------------
| Purpose : Set's surface state interlaced settings
| Returns : dword value
\---------------------------------------------------------------------------*/
MOS_STATUS HalCm_HwSetSurfaceProperty(
PCM_HAL_STATE state,
CM_FRAME_TYPE frameType,
PRENDERHAL_SURFACE_STATE_PARAMS params)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
switch (frameType)
{
case CM_FRAME:
params->bVertStride = 0;
params->bVertStrideOffs = 0;
break;
case CM_TOP_FIELD:
params->bVertStride = 1;
params->bVertStrideOffs = 0;
break;
case CM_BOTTOM_FIELD:
params->bVertStride = 1;
params->bVertStrideOffs = 1;
break;
default:
eStatus = MOS_STATUS_UNKNOWN;
}
return eStatus;
}
// A special treatment of NV12 format. Offset of the UV plane in an NV12 surface is adjusted, so
// this plane can be accessed as a separate R8G8 surface in kernels.
static bool UpdateSurfaceAliasPlaneOffset(
CM_HAL_SURFACE2D_SURFACE_STATE_PARAM *surfaceStateParam,
MOS_SURFACE *mosSurface)
{
if (Format_R8G8UN != surfaceStateParam->format
|| Format_NV12 != mosSurface->Format)
{
mosSurface->Format
= static_cast<MOS_FORMAT>(surfaceStateParam->format);
return false; // No need to update offset.
}
mosSurface->dwOffset = mosSurface->UPlaneOffset.iSurfaceOffset;
mosSurface->Format = Format_R8G8UN;
return false;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup 2D surface State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Setup2DSurfaceStateBasic(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
bool pixelPitch,
uint8_t *buffer,
bool multipleBinding )
{
MOS_STATUS eStatus;
RENDERHAL_SURFACE renderHalSurface;
PMOS_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[ MHW_MAX_SURFACE_PLANES ];
uint8_t *src;
uint8_t *dst;
int32_t nSurfaceEntries = 0;
uint32_t index;
uint32_t btIndex;
uint16_t memObjCtl;
uint32_t i;
uint32_t tempPlaneIndex = 0;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
UNUSED(multipleBinding);
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
MOS_ZeroMemory(&renderHalSurface, sizeof(renderHalSurface));
surface = &renderHalSurface.OsSurface;
nSurfaceEntries = 0;
//GT-PIN
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
// Get the Index to 2dsurface array from the kernel data
CM_ASSERT( argParam->unitSize == sizeof( index ) );
src = argParam->firstValue + ( threadIndex * argParam->unitSize );
index = *( ( uint32_t *)src ) & CM_SURFACE_MASK;
if ( index == CM_NULL_SURFACE )
{
if ( buffer )
{
dst = buffer + argParam->payloadOffset;
*( ( uint32_t *)dst ) = CM_NULL_SURFACE_BINDING_INDEX;
}
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
memObjCtl = state->umdSurf2DTable[index].memObjCtl;
if ( !memObjCtl )
{
memObjCtl = CM_DEFAULT_CACHE_TYPE;
}
// check to see if the data present for the 2d surface in the array
if ( index >= state->cmDeviceParam.max2DSurfaceTableSize ||
Mos_ResourceIsNull( &state->umdSurf2DTable[ index ].osResource ) )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid 2D surface array index '%d'", index );
goto finish;
}
// Check to see if surface is already assigned
unsigned char nBTIRegularSurf, nBTISamplerSurf;
nBTIRegularSurf = state->bti2DIndexTable[ index ].BTI.regularSurfIndex;
nBTISamplerSurf = state->bti2DIndexTable[ index ].BTI.samplerSurfIndex;
if (((!pixelPitch && (nBTIRegularSurf != (unsigned char)CM_INVALID_INDEX)) || (pixelPitch && (nBTISamplerSurf != (unsigned char)CM_INVALID_INDEX))) && argParam->aliasCreated == false )
{
if ( pixelPitch )
{
btIndex = nBTISamplerSurf;
}
else
{
btIndex = nBTIRegularSurf;
}
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
int nEntryIndex = 0;
if ( pixelPitch )
{
nEntryIndex = (int)((uint32_t*)( state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition ) - currentBTStart);
}
else
{
nEntryIndex = (int)((uint32_t*)( state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
}
if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
{
nSurfaceEntries = state->bti2DIndexTable[ index ].nPlaneNumber;
btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
// Get Offset to Current Binding Table
uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
if ( pixelPitch )
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
}
else
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
}
// update index to table
if ( pixelPitch )
{
state->bti2DIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = bindingTableEntry;
}
else
{
state->bti2DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
}
}
// Update the Batch Buffer
if ( buffer )
{
dst = buffer + argParam->payloadOffset;
*( ( uint32_t *)dst ) = btIndex;
}
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &renderHalSurface, CM_ARGUMENT_SURFACE2D, index, pixelPitch ) );
// Setup 2D surface
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
surfaceParam.Type = renderHal->SurfaceTypeDefault;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
surfaceParam.bVertStride = 0;
surfaceParam.bVertStrideOffs = 0;
if (!pixelPitch) {
surfaceParam.bWidthInDword_UV = true;
surfaceParam.bWidthInDword_Y = true;
}
surfaceParam.bRenderTarget = isRenderTarget(state, index);
surfStateParam = &(state->umdSurf2DTable[index].surfaceStateParam[argParam->aliasIndex / state->surfaceArraySize]);
if (surfStateParam->width)
{
surface->dwWidth = surfStateParam->width;
}
if (surfStateParam->height)
{
surface->dwHeight = surfStateParam->height;
}
if (surfStateParam->depth)
{
surface->dwDepth = surfStateParam->depth;
}
if (surfStateParam->pitch)
{
surface->dwPitch= surfStateParam->pitch;
}
if (surfStateParam->format)
{
UpdateSurfaceAliasPlaneOffset(surfStateParam, surface);
}
if (surfStateParam->surfaceXOffset)
{
surface->YPlaneOffset.iXOffset = surfStateParam->surfaceXOffset;
if (surface->Format == Format_NV12)
{
surface->UPlaneOffset.iXOffset += surfStateParam->surfaceXOffset;
}
}
if (surfStateParam->surfaceYOffset)
{
surface->YPlaneOffset.iYOffset = surfStateParam->surfaceYOffset;
if (surface->Format == Format_NV12)
{
surface->UPlaneOffset.iYOffset += surfStateParam->surfaceYOffset/2;
}
}
if (surfStateParam->memoryObjectControl)
{
memObjCtl = surfStateParam->memoryObjectControl;
}
if(pixelPitch)
renderHalSurface.Rotation = state->umdSurf2DTable[index].rotationFlag;
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
// interlace setting
HalCm_HwSetSurfaceProperty(state,
state->umdSurf2DTable[index].frameType,
&surfaceParam);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
renderHal,
&renderHalSurface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr));
nSurfaceEntries = MOS_MIN( nSurfaceEntries, MHW_MAX_SURFACE_PLANES );
btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[i]));
if ((taskParam->surfEntryInfoArrays.kernelNum !=0) &&
(taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
{
//GT-Pin
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
state,
indexParam,
btIndex + i,
*surface,
0,
surfaceEntries[i],
tempPlaneIndex,
surfaceParam,
CM_ARGUMENT_SURFACE2D));
}
surfaceEntries[i]->pSurface->dwWidth = state->umdSurf2DTable[index].width;
surfaceEntries[i]->pSurface->dwHeight = state->umdSurf2DTable[index].height;
}
// only update the reuse table for non-aliased surface
if ( argParam->aliasCreated == false )
{
state->bti2DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
// Get Offset to Current Binding Table
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
if ( pixelPitch )
{
state->bti2DIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
else
{
state->bti2DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
}
// Update the Batch Buffer
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = btIndex;
}
// reset surface height and width
surface->dwWidth = state->umdSurf2DTable[index].width;
surface->dwHeight = state->umdSurf2DTable[index].height;
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
MOS_STATUS HalCm_Setup2DSurfaceState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
if (state->cmHalInterface->GetDecompressFlag())
{
HalCm_DecompressSurface(state, argParam, threadIndex);
}
//Binding surface based at the unit of dword
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateBasic(
state, argParam, indexParam, bindingTable, threadIndex, false, buffer, false));
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
MOS_STATUS HalCm_Setup2DSurfaceSamplerState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
//Binding surface based at the unit of dword
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateBasic(
state, argParam, indexParam, bindingTable, threadIndex, true, buffer, false));
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup 2D surface State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Setup2DSurfaceUPStateBasic(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer,
bool pixelPitch)
{
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
uint8_t *src;
uint8_t *dst;
int32_t nSurfaceEntries;
uint32_t index;
uint32_t btIndex;
uint16_t memObjCtl;
uint32_t i;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
//GT-PIN
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
// Get the Index to sampler array from the kernel data
CM_ASSERT(argParam->unitSize == sizeof(index));
src = argParam->firstValue + (threadIndex * argParam->unitSize);
index = *((uint32_t*)src) & CM_SURFACE_MASK;
if (index == CM_NULL_SURFACE)
{
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
}
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
memObjCtl = state->surf2DUPTable[index].memObjCtl;
if (!memObjCtl)
{
memObjCtl = CM_DEFAULT_CACHE_TYPE;
}
// check to see if the data present for the sampler in the array
if (index >= state->cmDeviceParam.max2DSurfaceUPTableSize ||
(state->surf2DUPTable[index].width == 0))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid 2D SurfaceUP array index '%d'", index);
goto finish;
}
// Check to see if surface is already assigned
if ( pixelPitch )
{
btIndex = state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex;
}
else
{
btIndex = state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex;
}
if ( btIndex == ( unsigned char )CM_INVALID_INDEX )
{
uint32_t tempPlaneIndex = 0;
// Get Details of 2DUP surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE2D_UP, index, pixelPitch));
// Setup 2D surface
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
surfaceParam.Type = renderHal->SurfaceTypeDefault;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
if (!pixelPitch) {
surfaceParam.bWidthInDword_UV = true;
surfaceParam.bWidthInDword_Y = true;
}
surfaceParam.bRenderTarget = true;
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
// interlace setting
HalCm_HwSetSurfaceProperty(state,
state->umdSurf2DTable[index].frameType,
&surfaceParam);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr));
//GT-PIN
btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[i]));
//GT-Pin
if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
(taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
state,
indexParam,
btIndex + i,
surface.OsSurface,
0,
surfaceEntries[i],
tempPlaneIndex,
surfaceParam,
CM_ARGUMENT_SURFACE2D_UP));
}
}
state->bti2DUPIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
if ( pixelPitch )
{
state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
else
{
state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
}
else
{
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
int nEntryIndex = 0;
if ( pixelPitch )
{
nEntryIndex = (int) ((uint32_t*)( state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition ) - currentBTStart);
}
else
{
nEntryIndex = (int) ((uint32_t*)( state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
}
if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
{
uint32_t tmpSurfaceEntries = state->bti2DUPIndexTable[ index ].nPlaneNumber;
btIndex = HalCm_GetFreeBindingIndex( state, indexParam, tmpSurfaceEntries );
// Get Offset to Current Binding Table
uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
if ( pixelPitch )
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
}
else
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
}
// update index to table
if ( pixelPitch )
{
state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = bindingTableEntry;
}
else
{
state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex = btIndex;
state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
}
}
}
// Update the Batch Buffer
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = btIndex;
}
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
MOS_STATUS HalCm_Setup2DSurfaceUPState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
//Binding surface based at the unit of dword
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateBasic(
state, argParam, indexParam, bindingTable, threadIndex, buffer, false));
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
MOS_STATUS HalCm_Setup2DSurfaceUPSamplerState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
//Binding surface based at the unit of pixel
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateBasic(
state, argParam, indexParam, bindingTable, threadIndex, buffer, true));
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
MOS_STATUS HalCm_SetupSpecificVmeSurfaceState(
PCM_HAL_STATE state,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t surfIndex,
uint32_t btIndex,
uint16_t memObjCtl,
uint32_t surfaceStateWidth,
uint32_t surfaceStateHeight)
{
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
int32_t nSurfaceEntries = 0;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
uint32_t tempPlaneIndex = 0;
PMOS_SURFACE mosSurface = nullptr;
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
nSurfaceEntries = 0;
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
// Get Details of VME surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_VME_STATE, surfIndex, 0));
// Setup 2D surface
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
surfaceParam.bRenderTarget = true;
surfaceParam.bWidthInDword_Y = false;
surfaceParam.bWidthInDword_UV = false;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
surfaceParam.bVmeUse = true;
// Overwrite the width and height if specified
if (surfaceStateWidth && surfaceStateHeight)
{
mosSurface = &surface.OsSurface;
if (surfaceStateWidth > mosSurface->dwWidth || surfaceStateHeight > mosSurface->dwHeight)
{
CM_ASSERTMESSAGE("Error: VME surface state's resolution is larger than the original surface.");
eStatus = MOS_STATUS_INVALID_PARAMETER;
goto finish;
}
mosSurface->dwWidth = surfaceStateWidth;
mosSurface->dwHeight = surfaceStateHeight;
}
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr));
CM_ASSERT(nSurfaceEntries == 1);
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex,
surfaceEntries[0]));
if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
(taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
state,
indexParam,
btIndex,
surface.OsSurface,
0,
surfaceEntries[0],
tempPlaneIndex,
surfaceParam,
CM_ARGUMENT_SURFACE2D));
}
}
state->bti2DIndexTable[ surfIndex ].BTI.vmeSurfIndex = btIndex;
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup VME surface State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupVmeSurfaceState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
PRENDERHAL_INTERFACE renderHal;
PCM_HAL_VME_ARG_VALUE vmeSrc;
uint8_t *dst;
uint32_t index[CM_MAX_VME_BINDING_INDEX_1];
uint16_t memObjCtl[CM_MAX_VME_BINDING_INDEX_1];
uint32_t fwSurfCount = 0;
uint32_t bwSurfCount = 0;
bool alreadyBind = true;
uint32_t surfPairNum;
uint32_t idx;
uint32_t curBTIndex;
uint32_t btIndex;
uint32_t surfaceStateWidth = 0;
uint32_t surfaceStateHeight = 0;
uint32_t *fPtr = nullptr;
uint32_t *bPtr = nullptr;
uint32_t *refSurfaces = nullptr;
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
btIndex = 0;
MOS_ZeroMemory(memObjCtl, CM_MAX_VME_BINDING_INDEX_1*sizeof(uint16_t));
MOS_ZeroMemory(index, CM_MAX_VME_BINDING_INDEX_1*sizeof(uint32_t));
CM_ASSERT(argParam->unitSize <= sizeof(uint32_t)*(CM_MAX_VME_BINDING_INDEX_1 + 2));
CM_ASSERT(threadIndex == 0); // VME surface is not allowed in thread arg
vmeSrc = (PCM_HAL_VME_ARG_VALUE)argParam->firstValue;
fwSurfCount = vmeSrc->fwRefNum;
bwSurfCount = vmeSrc->bwRefNum;
refSurfaces = findRefInVmeArg(vmeSrc);
index[0] = vmeSrc->curSurface & CM_SURFACE_MASK;
// check to see if index[0] is valid
if (index[0] == CM_NULL_SURFACE)
{
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
}
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
if (index[0] >= state->cmDeviceParam.max2DSurfaceTableSize ||
Mos_ResourceIsNull(&state->umdSurf2DTable[index[0]].osResource))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid 2D surface array index '%d'", index[0]);
goto finish;
}
memObjCtl[0] = state->umdSurf2DTable[index[0]].memObjCtl;
if (!memObjCtl[0])
{
memObjCtl[0] = CM_DEFAULT_CACHE_TYPE;
}
for (idx = 0; idx < (vmeSrc->fwRefNum + vmeSrc->bwRefNum); idx++)
{
index[idx + 1] = refSurfaces[idx] & CM_SURFACE_MASK;
memObjCtl[idx + 1] = state->umdSurf2DTable[index[idx + 1]].memObjCtl;
if (!memObjCtl[idx + 1])
{
memObjCtl[idx + 1] = CM_DEFAULT_CACHE_TYPE;
}
}
surfaceStateWidth = vmeSrc->surfStateParam.surfaceStateWidth;
surfaceStateHeight = vmeSrc->surfStateParam.surfaceStateHeight;
fPtr = index + 1;
bPtr = index + 1 + fwSurfCount;
//Max surface pair number
surfPairNum = fwSurfCount > bwSurfCount ? fwSurfCount : bwSurfCount;
btIndex = curBTIndex = HalCm_GetFreeBindingIndex(state, indexParam, surfPairNum*2 + 1);
HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, index[0], curBTIndex, memObjCtl[0], surfaceStateWidth, surfaceStateHeight);
curBTIndex++;
//Setup surface states interleavely for backward and forward surfaces pairs.
for (idx = 0; idx < surfPairNum; idx++)
{
if (idx < fwSurfCount)
{
HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, fPtr[idx], curBTIndex, memObjCtl[idx + 1], surfaceStateWidth, surfaceStateHeight);
}
curBTIndex++;
if (idx < bwSurfCount)
{
HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, bPtr[idx], curBTIndex, memObjCtl[idx+ 1 + fwSurfCount], surfaceStateWidth, surfaceStateHeight);
}
curBTIndex++;
}
// Update the Batch Buffer
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = btIndex;
}
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
static bool
UpdateMosSurfaceFromAliasState(CM_HAL_STATE *state,
CM_HAL_KERNEL_ARG_PARAM *argParam,
uint32_t surface_index,
MOS_SURFACE *surface)
{
uint32_t surface_state_index = argParam->aliasIndex/state->surfaceArraySize;
const CM_HAL_SURFACE2D_SURFACE_STATE_PARAM &surface_state_param
= state->umdSurf2DTable[surface_index].surfaceStateParam[
surface_state_index];
if (surface_state_param.width)
{
surface->dwWidth = surface_state_param.width;
}
if (surface_state_param.height)
{
surface->dwHeight = surface_state_param.height;
}
if (surface_state_param.depth)
{
surface->dwDepth = surface_state_param.depth;
}
if (surface_state_param.pitch)
{
surface->dwPitch= surface_state_param.pitch;
}
if (surface_state_param.format)
{
surface->Format
= static_cast<MOS_FORMAT>(surface_state_param.format);
}
if (surface_state_param.surfaceXOffset)
{
surface->YPlaneOffset.iXOffset = surface_state_param.surfaceXOffset;
}
if (surface_state_param.surfaceYOffset)
{
surface->YPlaneOffset.iYOffset = surface_state_param.surfaceYOffset;
}
if (surface_state_param.surfaceOffset)
{
surface->dwOffset = surface_state_param.surfaceOffset;
}
return true;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup VME surface State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupSampler8x8SurfaceState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer)
{
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
uint8_t *src;
uint8_t *dst;
int32_t nSurfaceEntries;
uint32_t index;
uint16_t memObjCtl;
int32_t i;
uint32_t btIndex;
uint32_t tempPlaneIndex = 0;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
nSurfaceEntries = 0;
CM_ASSERT(argParam->unitSize == sizeof(uint32_t));
src = argParam->firstValue + (threadIndex * argParam->unitSize);
index = *((uint32_t*)src) & CM_SURFACE_MASK;
if (index == CM_NULL_SURFACE)
{
if (buffer)
{
dst = buffer + argParam->payloadOffset;
*((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
}
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
memObjCtl = state->umdSurf2DTable[index].memObjCtl;
if (!memObjCtl)
{
memObjCtl = CM_DEFAULT_CACHE_TYPE;
}
// check to see if index is valid
if (index >= state->cmDeviceParam.max2DSurfaceTableSize ||
Mos_ResourceIsNull(&state->umdSurf2DTable[index].osResource))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid 2D surface array index '%d'", index);
goto finish;
}
renderHal->bEnableP010SinglePass = state->cmHalInterface->IsP010SinglePassSupported();
btIndex = state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex;
if (btIndex == ( unsigned char )CM_INVALID_INDEX || argParam->aliasCreated)
{
// Get Details of Sampler8x8 surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, argParam->kind, index, 0 ) );
// Setup surface
MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
surfaceParam.bRenderTarget = true;
surfaceParam.bWidthInDword_Y = false;
surfaceParam.bWidthInDword_UV = false;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
surfaceParam.bVASurface = ( argParam->kind == CM_ARGUMENT_SURFACE_SAMPLER8X8_VA ) ? 1 : 0;
surfaceParam.AddressControl = argParam->nCustomValue;
UpdateMosSurfaceFromAliasState(state, argParam, index,
&surface.OsSurface);
//Set memory object control
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
surface.Rotation = state->umdSurf2DTable[index].rotationFlag;
surface.ChromaSiting = state->umdSurf2DTable[index].chromaSiting;
nSurfaceEntries = 0;
// interlace setting
HalCm_HwSetSurfaceProperty(state,
state->umdSurf2DTable[index].frameType,
&surfaceParam);
CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr ) );
CM_ASSERT( nSurfaceEntries == 1 );
btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
for ( i = 0; i < nSurfaceEntries; i++ )
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[ i ] ) );
if ( ( taskParam->surfEntryInfoArrays.kernelNum != 0 ) &&
( taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr ) )
{
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceDetails(
state,
indexParam,
btIndex + i,
surface.OsSurface,
0,
surfaceEntries[ i ],
tempPlaneIndex,
surfaceParam,
CM_ARGUMENT_SURFACE2D ) );
}
}
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
state->bti2DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex = btIndex;
}
else
{
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
int nEntryIndex = 0;
nEntryIndex = ( int )( ( uint32_t *)( state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition ) - currentBTStart );
if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
{
uint32_t tmpSurfaceEntries = state->bti2DIndexTable[ index ].nPlaneNumber;
btIndex = HalCm_GetFreeBindingIndex( state, indexParam, tmpSurfaceEntries );
// Get Offset to Current Binding Table
uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
// update index to table
state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex = btIndex;
state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition = bindingTableEntry;
}
}
// Update the Batch Buffer
if ( buffer )
{
dst = buffer + argParam->payloadOffset;
*( ( uint32_t *)dst ) = state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex;
}
eStatus = MOS_STATUS_SUCCESS;
finish:
renderHal->bEnableP010SinglePass = false;
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup State Buffer surface State
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupStateBufferSurfaceState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_ARG_PARAM argParam,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
uint32_t threadIndex,
uint8_t *buffer )
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PRENDERHAL_INTERFACE renderHal;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
RENDERHAL_SURFACE renderhalSurface;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry;
uint32_t btIndex;
CM_SURFACE_BTI_INFO surfBTIInfo;
uint16_t memObjCtl;
state->cmHalInterface->GetHwSurfaceBTIInfo( &surfBTIInfo );
uint32_t surfIndex = reinterpret_cast< uint32_t *>( argParam->firstValue )[ 0 ];
surfIndex = surfIndex & CM_SURFACE_MASK;
memObjCtl = state->bufferTable[ surfIndex ].memObjCtl;
btIndex = HalCm_GetFreeBindingIndex( state, indexParam, 1 );
renderHal = state->renderHal;
MOS_ZeroMemory( &renderhalSurface, sizeof( renderhalSurface ) );
// Get Details of Sampler8x8 surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &renderhalSurface, argParam->kind, surfIndex, 0 ) );
MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
// Set the bRenderTarget by default
surfaceParam.bRenderTarget = true;
//Cache configurations default
state->cmHalInterface->HwSetSurfaceMemoryObjectControl( memObjCtl, &surfaceParam );
// Setup Buffer surface
CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupBufferSurfaceState(
renderHal,
&renderhalSurface,
&surfaceParam,
&surfaceEntry ) );
// Bind the surface State
CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart );
CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex,
surfaceEntry ) );
if ( buffer )
{
*( ( uint32_t *)( buffer + argParam->payloadOffset ) ) = btIndex;
}
finish:
return eStatus;
}
//------------------------------------------------------------------------------
//| Purpose: Get usr defined threadcount / threadgroup
//| Returns: Result of the operation
//------------------------------------------------------------------------------
MOS_STATUS HalCm_GetMaxThreadCountPerThreadGroup(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t *threadsPerThreadGroup) // [out] Pointer to threadsPerThreadGroup
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CM_PLATFORM_INFO platformInfo;
MOS_ZeroMemory(&platformInfo, sizeof(CM_PLATFORM_INFO));
CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetPlatformInfo( state, &platformInfo, false) );
if (platformInfo.numMaxEUsPerPool)
{
*threadsPerThreadGroup = (platformInfo.numHWThreadsPerEU) * (platformInfo.numMaxEUsPerPool);
}
else
{
*threadsPerThreadGroup = (platformInfo.numHWThreadsPerEU) * (platformInfo.numEUsPerSubSlice);
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Decodes hints to get number and size of kernel groups
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetNumKernelsPerGroup(
uint8_t hintsBits,
uint32_t numKernels,
uint32_t *numKernelsPerGroup,
uint32_t *numKernelGroups,
uint32_t *remapKernelToGroup,
uint32_t *remapGroupToKernel
)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t currGrp = 0;
uint32_t i = 0;
// first group at least has one kernel
numKernelsPerGroup[currGrp]++;
remapGroupToKernel[currGrp] = 0;
for( i = 0; i < numKernels - 1; ++i )
{
if( (hintsBits & CM_HINTS_LEASTBIT_MASK) == CM_HINTS_LEASTBIT_MASK )
{
currGrp++;
*numKernelGroups = *numKernelGroups + 1;
remapGroupToKernel[currGrp] = i + 1;
}
numKernelsPerGroup[currGrp]++;
hintsBits >>= 1;
remapKernelToGroup[i+1] = currGrp;
}
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Gets information about max parallelism graphs
//| numThreadsOnSides based on formula to sum 1 to n: (n(n+1))/2
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetParallelGraphInfo(
uint32_t maximum,
uint32_t numThreads,
uint32_t width,
uint32_t height,
PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,
CM_DEPENDENCY_PATTERN pattern,
bool noDependencyCase)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t numThreadsOnSides = 0;
uint32_t numMaxRepeat = 0;
uint32_t numSteps = 0;
switch( pattern )
{
case CM_NONE_DEPENDENCY:
if (noDependencyCase)
{
maximum = 1;
numMaxRepeat = width * height;
numSteps = width * height;
}
// do nothing will depend on other kernels
break;
case CM_VERTICAL_WAVE:
numMaxRepeat = width;
numSteps = width;
break;
case CM_HORIZONTAL_WAVE:
numMaxRepeat = height;
numSteps = height;
break;
case CM_WAVEFRONT:
numThreadsOnSides = ( maximum - 1 ) * maximum;
numMaxRepeat = (numThreads - numThreadsOnSides ) / maximum;
numSteps = ( maximum - 1) * 2 + numMaxRepeat;
break;
case CM_WAVEFRONT26:
numThreadsOnSides = ( maximum - 1 ) * maximum * 2;
numMaxRepeat = (numThreads - numThreadsOnSides ) / maximum;
numSteps = ( (maximum - 1) * 2 ) * 2 + numMaxRepeat;
break;
case CM_WAVEFRONT26Z:
// do nothing already set outside of this function
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
goto finish;
}
graphInfo->maxParallelism = maximum;
graphInfo->numMaxRepeat = numMaxRepeat;
graphInfo->numSteps = numSteps;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Sets dispatch pattern based on max parallelism for media objects
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetDispatchPattern(
CM_HAL_PARALLELISM_GRAPH_INFO graphInfo,
CM_DEPENDENCY_PATTERN pattern,
uint32_t *dispatchFreq
)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t i = 0;
uint32_t j = 0;
uint32_t k = 0;
switch( pattern )
{
case CM_NONE_DEPENDENCY:
break;
case CM_HORIZONTAL_WAVE:
case CM_VERTICAL_WAVE:
for( i = 0; i < graphInfo.numSteps; ++i )
{
dispatchFreq[i] = graphInfo.maxParallelism;
}
break;
case CM_WAVEFRONT:
for( i = 1; i < graphInfo.maxParallelism; ++i )
{
dispatchFreq[i-1] = i;
}
for( j = 0; j < graphInfo.numMaxRepeat; ++i, ++j )
{
dispatchFreq[i-1] = graphInfo.maxParallelism;
}
for( j = graphInfo.maxParallelism - 1; i <= graphInfo.numSteps; ++i, --j )
{
dispatchFreq[i-1] = j;
}
break;
case CM_WAVEFRONT26:
for( i = 1, j = 0; i < graphInfo.maxParallelism; ++i, j +=2 )
{
dispatchFreq[j] = i;
dispatchFreq[j+1] = i;
}
for( k = 0; k < graphInfo.numMaxRepeat; ++k, ++j)
{
dispatchFreq[j] = graphInfo.maxParallelism;
}
for( i = graphInfo.maxParallelism - 1; j < graphInfo.numSteps; j +=2, --i )
{
dispatchFreq[j] = i;
dispatchFreq[j+1] = i;
}
break;
case CM_WAVEFRONT26Z:
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
goto finish;
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Sets dispatch frequency for kernel group based on number of steps
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetKernelGrpFreqDispatch(
PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,
PCM_HAL_KERNEL_GROUP_INFO groupInfo,
uint32_t numKernelGroups,
uint32_t *minSteps)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t i = 0;
uint32_t j = 0;
uint32_t tmpSteps = 0;
uint32_t kerIndex = 0;
for( i = 0; i < numKernelGroups; ++i)
{
for( j = 0; j < groupInfo[i].numKernelsInGroup; ++j )
{
tmpSteps += graphInfo[kerIndex].numSteps;
kerIndex++;
}
if ( tmpSteps )
{
*minSteps = MOS_MIN(*minSteps, tmpSteps);
groupInfo[i].numStepsInGrp = tmpSteps;
}
tmpSteps = 0;
}
for( i = 0; i < numKernelGroups; ++i )
{
groupInfo[i].freqDispatch = (uint32_t)ceil( (groupInfo[i].numStepsInGrp / (double)*minSteps) );
}
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Sets dispatch pattern for kernel with no dependency based on
//| the minimum number of steps calculated from kernels with dependency
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetNoDependKernelDispatchPattern(
uint32_t numThreads,
uint32_t minSteps,
uint32_t *dispatchFreq)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t i = 0;
uint32_t numEachStep = 0;
uint32_t total = 0;
numEachStep = numThreads / minSteps;
for( i = 0; i < minSteps; ++i )
{
dispatchFreq[i] = numEachStep;
total += numEachStep;
}
while( total != numThreads )
{
// dispatch more at beginning
i = 0;
dispatchFreq[i]++;
total++;
i++;
}
return eStatus;
}
MOS_STATUS HalCm_FinishStatesForKernel(
PCM_HAL_STATE state, // [in] Pointer to CM State
PRENDERHAL_MEDIA_STATE mediaState,
PMHW_BATCH_BUFFER batchBuffer, // [in] Pointer to Batch Buffer
int32_t taskId, // [in] Task ID
PCM_HAL_KERNEL_PARAM kernelParam,
int32_t kernelIndex,
PCM_HAL_INDEX_PARAM indexParam,
int32_t bindingTable,
int32_t mediaID,
PRENDERHAL_KRN_ALLOCATION krnAllocation
)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PCM_HAL_WALKER_PARAMS mediaWalkerParams = &kernelParam->walkerParams;
PCM_GPGPU_WALKER_PARAMS perKernelGpGpuWalkerParams = &kernelParam->gpgpuWalkerParams;
PCM_HAL_SCOREBOARD threadCoordinates = nullptr;
PCM_HAL_MASK_AND_RESET dependencyMask = nullptr;
bool enableThreadSpace = false;
bool enableKernelThreadSpace = false;
PCM_HAL_SCOREBOARD kernelThreadCoordinates = nullptr;
UNUSED(taskId);
MHW_MEDIA_OBJECT_PARAMS mediaObjectParam;
PCM_HAL_KERNEL_ARG_PARAM argParam;
MHW_PIPE_CONTROL_PARAMS pipeControlParam;
uint32_t i;
uint32_t hdrSize;
uint32_t aIndex;
uint32_t tIndex;
uint32_t index;
//GT-PIN
taskParam->curKernelIndex = kernelIndex;
CmSafeMemSet(&mediaObjectParam, 0, sizeof(MHW_MEDIA_OBJECT_PARAMS));
if (perKernelGpGpuWalkerParams->gpgpuEnabled)
{
// GPGPU_WALKER, just update ID here. other fields are already filled.
perKernelGpGpuWalkerParams->interfaceDescriptorOffset = mediaID;// mediaObjectParam.dwInterfaceDescriptorOffset;
}
else if (mediaWalkerParams->cmWalkerEnable)
{
// Media walker, just update ID here. other fields are already filled.
mediaWalkerParams->interfaceDescriptorOffset = mediaID;
}
else
{
// MEDIA_OBJECT
mediaObjectParam.dwInterfaceDescriptorOffset = mediaID;
hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
if (kernelParam->indirectDataParam.indirectDataSize)
{
mediaObjectParam.dwInlineDataSize = 0;
}
else
{
mediaObjectParam.dwInlineDataSize = MOS_MAX(kernelParam->payloadSize, 4);
}
if (taskParam->threadCoordinates)
{
threadCoordinates = taskParam->threadCoordinates[kernelIndex];
if (threadCoordinates)
{
enableThreadSpace = true;
}
}
else if (kernelParam->kernelThreadSpaceParam.threadCoordinates)
{
kernelThreadCoordinates = kernelParam->kernelThreadSpaceParam.threadCoordinates;
if (kernelThreadCoordinates)
{
enableKernelThreadSpace = true;
}
}
if (taskParam->dependencyMasks)
{
dependencyMask = taskParam->dependencyMasks[kernelIndex];
}
CM_CHK_NULL_GOTOFINISH_MOSERROR( batchBuffer );
uint8_t inlineData[CM_MAX_THREAD_PAYLOAD_SIZE];
uint8_t *cmdInline = inlineData;
uint32_t cmdSize = mediaObjectParam.dwInlineDataSize + hdrSize;
// Setup states for arguments and threads
if (((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount > 1)
{
uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
{
argParam = &kernelParam->argParams[aIndex];
if ((kernelParam->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParam->perThread)
{
continue;
}
for (tIndex = 0; tIndex < kernelParam->numThreads; tIndex++)
{
index = tIndex * argParam->perThread;
//-----------------------------------------------------
CM_ASSERT(argParam->payloadOffset < kernelParam->payloadSize);
//-----------------------------------------------------
switch(argParam->kind)
{
case CM_ARGUMENT_GENERAL:
break;
case CM_ARGUMENT_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
state, kernelParam, argParam, indexParam, mediaID, index, nullptr));
break;
case CM_ARGUMENT_SURFACEBUFFER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
state, argParam, indexParam, bindingTable, -1, index, nullptr));
break;
case CM_ARGUMENT_SURFACE2D_UP:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
state, argParam, indexParam, bindingTable, index, nullptr));
break;
case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
state, argParam, indexParam, bindingTable, index, nullptr));
break;
case CM_ARGUMENT_SURFACE2D_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
state, argParam, indexParam, bindingTable, 0, nullptr));
break;
case CM_ARGUMENT_SURFACE2D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
state, argParam, indexParam, bindingTable, index, nullptr));
break;
case CM_ARGUMENT_SURFACE3D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
state, argParam, indexParam, bindingTable, index, nullptr));
break;
case CM_ARGUMENT_SURFACE_VME:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
state, argParam, indexParam, bindingTable, 0, nullptr));
break;
case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
state, argParam, indexParam, bindingTable, 0, nullptr));
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Argument kind '%d' is not supported", argParam->kind);
goto finish;
}
}
if( dependencyMask )
{
if( dependencyMask[tIndex].resetMask == CM_RESET_DEPENDENCY_MASK )
{
MOS_SecureMemcpy(bBuffer + (CM_SCOREBOARD_MASK_POS_IN_MEDIA_OBJECT_CMD*sizeof(uint32_t)),
sizeof(uint8_t), &dependencyMask[tIndex].mask, sizeof(uint8_t));
}
}
batchBuffer->iCurrent += cmdSize;
bBuffer += cmdSize;
}
}
else
{
//Insert synchronization if needed (PIPE_CONTROL)
// 1. synchronization is set
// 2. the next kernel has dependency pattern
if((kernelIndex > 0) && ((taskParam->syncBitmap & ((uint64_t)1 << (kernelIndex-1))) || (kernelParam->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY)))
{
pipeControlParam = g_cRenderHal_InitPipeControlParams;
pipeControlParam.presDest = nullptr;
pipeControlParam.dwFlushMode = MHW_FLUSH_CUSTOM; // Use custom flags
pipeControlParam.dwPostSyncOp = MHW_FLUSH_NOWRITE;
pipeControlParam.bDisableCSStall = false;
pipeControlParam.bTlbInvalidate = false;
pipeControlParam.bFlushRenderTargetCache = true;
pipeControlParam.bInvalidateTextureCache = true;
renderHal->pMhwMiInterface->AddPipeControl(nullptr, batchBuffer, &pipeControlParam);
}
uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
for (tIndex = 0; tIndex < kernelParam->numThreads; tIndex++)
{
if (enableThreadSpace)
{
mediaObjectParam.VfeScoreboard.ScoreboardEnable = (state->scoreboardParams.ScoreboardMask==0) ? 0:1;
mediaObjectParam.VfeScoreboard.Value[0] = threadCoordinates[tIndex].x;
mediaObjectParam.VfeScoreboard.Value[1] = threadCoordinates[tIndex].y;
mediaObjectParam.VfeScoreboard.ScoreboardColor = threadCoordinates[tIndex].color;
mediaObjectParam.dwSliceDestinationSelect = threadCoordinates[tIndex].sliceSelect;
mediaObjectParam.dwHalfSliceDestinationSelect = threadCoordinates[tIndex].subSliceSelect;
if( !dependencyMask )
{
mediaObjectParam.VfeScoreboard.ScoreboardMask = (1 << state->scoreboardParams.ScoreboardMask)-1;
}
else
{
mediaObjectParam.VfeScoreboard.ScoreboardMask = dependencyMask[tIndex].mask;
}
}
else if (enableKernelThreadSpace)
{
mediaObjectParam.VfeScoreboard.ScoreboardEnable = (state->scoreboardParams.ScoreboardMask == 0) ? 0 : 1;
mediaObjectParam.VfeScoreboard.Value[0] = kernelThreadCoordinates[tIndex].x;
mediaObjectParam.VfeScoreboard.Value[1] = kernelThreadCoordinates[tIndex].y;
mediaObjectParam.VfeScoreboard.ScoreboardColor = kernelThreadCoordinates[tIndex].color;
mediaObjectParam.dwSliceDestinationSelect = kernelThreadCoordinates[tIndex].sliceSelect;
mediaObjectParam.dwHalfSliceDestinationSelect = kernelThreadCoordinates[tIndex].subSliceSelect;
if (!dependencyMask)
{
mediaObjectParam.VfeScoreboard.ScoreboardMask = (1 << state->scoreboardParams.ScoreboardMask) - 1;
}
else
{
mediaObjectParam.VfeScoreboard.ScoreboardMask = dependencyMask[tIndex].mask;
}
}
else
{
mediaObjectParam.VfeScoreboard.Value[0] = tIndex % taskParam->threadSpaceWidth;
mediaObjectParam.VfeScoreboard.Value[1] = tIndex / taskParam->threadSpaceWidth;
}
for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
{
argParam = &kernelParam->argParams[aIndex];
index = tIndex * argParam->perThread;
if ((kernelParam->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParam->perThread)
{
continue;
}
//-----------------------------------------------------
CM_ASSERT(argParam->payloadOffset < kernelParam->payloadSize);
//-----------------------------------------------------
switch(argParam->kind)
{
case CM_ARGUMENT_GENERAL:
MOS_SecureMemcpy(
cmdInline + argParam->payloadOffset,
argParam->unitSize,
argParam->firstValue + index * argParam->unitSize,
argParam->unitSize);
break;
case CM_ARGUMENT_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
state, kernelParam, argParam, indexParam, mediaID, index, cmdInline));
break;
case CM_ARGUMENT_SURFACEBUFFER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
state, argParam, indexParam, bindingTable, -1, index, cmdInline));
break;
case CM_ARGUMENT_SURFACE2D_UP:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
state, argParam, indexParam, bindingTable, index, cmdInline));
break;
case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
state, argParam, indexParam, bindingTable, index, cmdInline));
break;
case CM_ARGUMENT_SURFACE2D_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
state, argParam, indexParam, bindingTable, index, cmdInline));
break;
case CM_ARGUMENT_SURFACE2D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
state, argParam, indexParam, bindingTable, index, cmdInline));
break;
case CM_ARGUMENT_SURFACE3D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
state, argParam, indexParam, bindingTable, index, cmdInline));
break;
case CM_ARGUMENT_SURFACE_VME:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
state, argParam, indexParam, bindingTable, 0, cmdInline));
break;
case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
state, argParam, indexParam, bindingTable, 0, cmdInline));
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Argument kind '%d' is not supported", argParam->kind);
goto finish;
}
}
mediaObjectParam.pInlineData = inlineData;
state->renderHal->pMhwRenderInterface->AddMediaObject(nullptr, batchBuffer, &mediaObjectParam);
}
}
}
for (i = 0; i < CM_MAX_GLOBAL_SURFACE_NUMBER; i++) {
if ((kernelParam->globalSurface[i] & CM_SURFACE_MASK) != CM_NULL_SURFACE)
{
CM_HAL_KERNEL_ARG_PARAM tempArgParam;
argParam = &tempArgParam;
tempArgParam.kind = CM_ARGUMENT_SURFACEBUFFER;
tempArgParam.payloadOffset = 0;
tempArgParam.unitCount = 1;
tempArgParam.unitSize = sizeof(uint32_t);
tempArgParam.perThread = false;
tempArgParam.firstValue = (uint8_t*)&kernelParam->globalSurface[i];
tempArgParam.aliasIndex = 0;
tempArgParam.aliasCreated = false;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
state, argParam, indexParam, bindingTable, (int16_t)i, 0, nullptr));
}
}
// set number of samplers
krnAllocation->Params.Sampler_Count = indexParam->samplerIndexCount;
// add SIP surface
if (kernelParam->kernelDebugEnabled)
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSipSurfaceState(state, indexParam, bindingTable));
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Finishes setting up HW states for the kernel
//| Used by EnqueueWithHints
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_FinishStatesForKernelMix(
PCM_HAL_STATE state,
PMHW_BATCH_BUFFER batchBuffer,
int32_t taskId,
PCM_HAL_KERNEL_PARAM* cmExecKernels,
PCM_HAL_INDEX_PARAM indexParams,
int32_t *bindingTableEntries,
int32_t *mediaIds,
PRENDERHAL_KRN_ALLOCATION *krnAllocations,
uint32_t numKernels,
uint32_t hints,
bool lastTask)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PMHW_MEDIA_OBJECT_PARAMS mediaObjectParams = nullptr;
PCM_HAL_KERNEL_PARAM* kernelParams = nullptr;
PCM_HAL_KERNEL_ARG_PARAM* argParams = nullptr;
PCM_HAL_BB_ARGS bbCmArgs = nullptr;
PMHW_VFE_SCOREBOARD scoreboardParams = nullptr;
PCM_HAL_PARALLELISM_GRAPH_INFO parallelGraphInfo = nullptr;
PCM_HAL_KERNEL_ARG_PARAM argParam = nullptr;
PCM_HAL_KERNEL_SUBSLICE_INFO kernelsSliceInfo = nullptr;
PCM_HAL_KERNEL_THREADSPACE_PARAM kernelTSParam = nullptr;
PCM_HAL_KERNEL_GROUP_INFO groupInfo = nullptr;
CM_HAL_DEPENDENCY vfeDependencyInfo ;
CM_PLATFORM_INFO platformInfo ;
CM_GT_SYSTEM_INFO systemInfo ;
CM_HAL_SCOREBOARD_XY_MASK threadCoordinates ;
uint32_t **dependRemap = nullptr;
uint32_t **dispatchFreq = nullptr;
uint8_t **cmdInline = nullptr;
uint32_t *cmdSizes = nullptr;
uint32_t *remapKrnToGrp = nullptr;
uint32_t *remapGrpToKrn = nullptr;
uint32_t *numKernelsPerGrp = nullptr;
uint8_t *kernelScoreboardMask = nullptr;
uint8_t hintsBits = 0;
uint8_t tmpThreadScoreboardMask = 0;
uint8_t scoreboardMask = 0;
bool singleSubSlice = false;
bool enableThreadSpace = false;
bool kernelFound = false;
bool updateCurrKernel = false;
bool noDependencyCase = false;
bool sufficientSliceInfo = true;
uint32_t adjustedYCoord = 0;
uint32_t numKernelGroups = CM_HINTS_DEFAULT_NUM_KERNEL_GRP;
uint32_t totalNumThreads = 0;
uint32_t hdrSize = 0;
uint32_t i = 0;
uint32_t j = 0;
uint32_t k = 0;
uint32_t tmp = 0;
uint32_t tmp1 = 0;
uint32_t loopCount = 0;
uint32_t aIndex = 0;
uint32_t index = 0;
uint32_t totalReqSubSlices = 0;
uint32_t difference = 0;
uint32_t curKernel = 0;
uint32_t numSet = 0;
uint32_t numSubSlicesEnabled = 0;
uint32_t sliceIndex = 0;
uint32_t tmpNumSubSlice = 0;
uint32_t tmpNumKernelsPerGrp = 0;
uint32_t maximum = 0;
uint32_t count = 0;
uint32_t numDispatched = 0;
uint32_t tmpIndex = 0;
uint32_t numStepsDispatched = 0;
uint32_t minSteps = UINT_MAX;
uint32_t grpId = 0;
uint32_t allocSize = 0;
uint32_t currentKernel = 0;
uint32_t roundRobinCount = 0;
uint32_t numTasks = 0;
uint32_t extraSWThreads = 0;
UNUSED(taskId);
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
MOS_ZeroMemory(&threadCoordinates, sizeof(CM_HAL_SCOREBOARD_XY_MASK));
MOS_ZeroMemory(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY));
MOS_ZeroMemory(&platformInfo, sizeof(CM_PLATFORM_INFO));
MOS_ZeroMemory(&systemInfo, sizeof(CM_GT_SYSTEM_INFO));
mediaObjectParams = (PMHW_MEDIA_OBJECT_PARAMS)MOS_AllocAndZeroMemory(sizeof(MHW_MEDIA_OBJECT_PARAMS)*numKernels);
kernelParams = (PCM_HAL_KERNEL_PARAM*)MOS_AllocAndZeroMemory(sizeof(PCM_HAL_KERNEL_PARAM)*numKernels);
argParams = (PCM_HAL_KERNEL_ARG_PARAM*)MOS_AllocAndZeroMemory(sizeof(PCM_HAL_KERNEL_ARG_PARAM)*numKernels);
cmdInline = (uint8_t**)MOS_AllocAndZeroMemory(sizeof(uint8_t*)*numKernels);
cmdSizes = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
remapKrnToGrp = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
remapGrpToKrn = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
kernelScoreboardMask = (uint8_t*)MOS_AllocAndZeroMemory(sizeof(uint8_t)*numKernels);
dependRemap = (uint32_t**)MOS_AllocAndZeroMemory(sizeof(uint32_t*)*numKernels);
parallelGraphInfo = (PCM_HAL_PARALLELISM_GRAPH_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_PARALLELISM_GRAPH_INFO)*numKernels);
dispatchFreq = (uint32_t**)MOS_AllocAndZeroMemory(sizeof(uint32_t*)*numKernels);
numKernelsPerGrp = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
if( !mediaObjectParams || !kernelParams || !argParams ||
!cmdInline || !cmdSizes ||
!remapKrnToGrp || !remapGrpToKrn || !kernelScoreboardMask || !dependRemap ||
!parallelGraphInfo || !dispatchFreq || !numKernelsPerGrp )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
goto finish;
}
state->euSaturationEnabled = true;
hintsBits = (hints & CM_HINTS_MASK_KERNEL_GROUPS) >> CM_HINTS_NUM_BITS_WALK_OBJ;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNumKernelsPerGroup(hintsBits, numKernels, numKernelsPerGrp,
&numKernelGroups, remapKrnToGrp, remapGrpToKrn));
kernelsSliceInfo = (PCM_HAL_KERNEL_SUBSLICE_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_SUBSLICE_INFO)*numKernelGroups);
groupInfo = (PCM_HAL_KERNEL_GROUP_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_GROUP_INFO)*numKernelGroups);
if( !kernelsSliceInfo || !groupInfo )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
goto finish;
}
for( i = 0; i < numKernelGroups; ++i)
{
groupInfo[i].numKernelsInGroup = numKernelsPerGrp[i];
}
hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
for ( i = 0; i < numKernels; ++i )
{
kernelParams[i] = cmExecKernels[i];
mediaObjectParams[i].dwInterfaceDescriptorOffset = mediaIds[i];
mediaObjectParams[i].dwInlineDataSize = MOS_MAX(kernelParams[i]->payloadSize, 4);
cmdInline[i] = (uint8_t*)MOS_AllocAndZeroMemory(sizeof(uint8_t) * 1024);
cmdSizes[i] = mediaObjectParams[i].dwInlineDataSize + hdrSize;
totalNumThreads += kernelParams[i]->numThreads;
}
numTasks = ( hints & CM_HINTS_MASK_NUM_TASKS ) >> CM_HINTS_NUM_BITS_TASK_POS;
if( numTasks > 1 )
{
if( lastTask )
{
extraSWThreads = totalNumThreads % numTasks;
}
totalNumThreads = (totalNumThreads / numTasks) + extraSWThreads;
}
for( i = 0; i < numKernels; ++i )
{
dependRemap[i] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t) * CM_HAL_MAX_DEPENDENCY_COUNT);
for( k = 0; k < CM_HAL_MAX_DEPENDENCY_COUNT; ++k )
{
// initialize each index to map to itself
dependRemap[i][k] = k;
}
}
for( i = 0; i < numKernels; ++i )
{
kernelTSParam = &kernelParams[i]->kernelThreadSpaceParam;
// calculate union dependency vector of all kernels with dependency
if( kernelTSParam->dependencyInfo.count )
{
if( vfeDependencyInfo.count == 0 )
{
MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyInfo, sizeof(CM_HAL_DEPENDENCY));
kernelScoreboardMask[i] = ( 1 << vfeDependencyInfo.count ) - 1;
}
else
{
for( j = 0; j < kernelTSParam->dependencyInfo.count; ++j )
{
for( k = 0; k < vfeDependencyInfo.count; ++k )
{
if( (kernelTSParam->dependencyInfo.deltaX[j] == vfeDependencyInfo.deltaX[k]) &&
(kernelTSParam->dependencyInfo.deltaY[j] == vfeDependencyInfo.deltaY[k]) )
{
CM_HAL_SETBIT(kernelScoreboardMask[i], k);
dependRemap[i][j] = k;
break;
}
}
if ( k == vfeDependencyInfo.count )
{
vfeDependencyInfo.deltaX[vfeDependencyInfo.count] = kernelTSParam->dependencyInfo.deltaX[j];
vfeDependencyInfo.deltaY[vfeDependencyInfo.count] = kernelTSParam->dependencyInfo.deltaY[j];
CM_HAL_SETBIT(kernelScoreboardMask[i], vfeDependencyInfo.count);
vfeDependencyInfo.count++;
dependRemap[i][j] = k;
}
}
}
}
} // for num kernels
if( vfeDependencyInfo.count > CM_HAL_MAX_DEPENDENCY_COUNT )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Union of kernel dependencies exceeds max dependency count (8)");
goto finish;
}
// set VFE scoreboarding information from union of kernel dependency vectors
scoreboardParams = &state->scoreboardParams;
scoreboardParams->ScoreboardMask = (uint8_t)vfeDependencyInfo.count;
for( i = 0; i < scoreboardParams->ScoreboardMask; ++i )
{
scoreboardParams->ScoreboardDelta[i].x = vfeDependencyInfo.deltaX[i];
scoreboardParams->ScoreboardDelta[i].y = vfeDependencyInfo.deltaY[i];
}
if (vfeDependencyInfo.count == 0)
{
noDependencyCase = true;
}
CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetPlatformInfo(state, &platformInfo, true));
singleSubSlice = (platformInfo.numSubSlices == 1) ? true : false;
CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGTSystemInfo(state, &systemInfo));
if( !singleSubSlice )
{
for( i = 0; i < numKernelGroups; ++i )
{
tmpNumKernelsPerGrp = numKernelsPerGrp[i];
for( j = 0; j < tmpNumKernelsPerGrp; ++j )
{
kernelTSParam = &kernelParams[count]->kernelThreadSpaceParam;
switch( kernelTSParam->patternType )
{
case CM_NONE_DEPENDENCY:
maximum = kernelParams[count]->numThreads;
break;
case CM_WAVEFRONT:
maximum = MOS_MIN(kernelTSParam->threadSpaceWidth, kernelTSParam->threadSpaceHeight);
break;
case CM_WAVEFRONT26:
maximum = MOS_MIN( ((kernelTSParam->threadSpaceWidth + 1) >> 1), kernelTSParam->threadSpaceHeight);
break;
case CM_VERTICAL_WAVE:
maximum = kernelTSParam->threadSpaceHeight;
break;
case CM_HORIZONTAL_WAVE:
maximum = kernelTSParam->threadSpaceWidth;
break;
case CM_WAVEFRONT26Z:
maximum = MOS_MIN( ((kernelTSParam->threadSpaceWidth - 1) >> 1), kernelTSParam->threadSpaceHeight);
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
goto finish;
}
if( kernelTSParam->patternType != CM_WAVEFRONT26Z )
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetParallelGraphInfo(maximum, kernelParams[count]->numThreads,
kernelTSParam->threadSpaceWidth, kernelTSParam->threadSpaceHeight,
&parallelGraphInfo[count], kernelTSParam->patternType, noDependencyCase));
}
else
{
parallelGraphInfo[count].numSteps = kernelTSParam->dispatchInfo.numWaves;
}
if( kernelTSParam->patternType != CM_NONE_DEPENDENCY )
{
dispatchFreq[count] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*parallelGraphInfo[count].numSteps);
if( !dispatchFreq[count] )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Memory allocation failed for EnqueueWithHints");
goto finish;
}
if( kernelTSParam->patternType != CM_WAVEFRONT26Z )
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetDispatchPattern(parallelGraphInfo[count], kernelTSParam->patternType, dispatchFreq[count]));
}
else
{
MOS_SecureMemcpy(dispatchFreq[count], sizeof(uint32_t)*parallelGraphInfo[count].numSteps,
kernelTSParam->dispatchInfo.numThreadsInWave, sizeof(uint32_t)*parallelGraphInfo[count].numSteps);
}
}
if (!noDependencyCase)
{
tmpNumSubSlice =
(maximum / (platformInfo.numEUsPerSubSlice * platformInfo.numHWThreadsPerEU)) + 1;
if (tmpNumSubSlice > platformInfo.numSubSlices)
{
tmpNumSubSlice = platformInfo.numSubSlices - 1;
}
if (tmpNumSubSlice > kernelsSliceInfo[i].numSubSlices)
{
kernelsSliceInfo[i].numSubSlices = tmpNumSubSlice;
}
}
else
{
kernelsSliceInfo[i].numSubSlices = platformInfo.numSubSlices;
}
count++;
}
}
if (!noDependencyCase)
{
for (i = 0; i < numKernelGroups; ++i)
{
totalReqSubSlices += kernelsSliceInfo[i].numSubSlices;
}
// adjust if requested less or more subslices than architecture has
if (totalReqSubSlices < platformInfo.numSubSlices)
{
// want to add subslices starting from K0
difference = platformInfo.numSubSlices - totalReqSubSlices;
tmp = tmp1 = 0;
for (i = 0; i < difference; ++i)
{
tmp = tmp1 % numKernelGroups;
kernelsSliceInfo[tmp].numSubSlices++;
totalReqSubSlices++;
tmp1++;
}
}
else if (totalReqSubSlices > platformInfo.numSubSlices)
{
// want to subtract subslices starting from last kernel
difference = totalReqSubSlices - platformInfo.numSubSlices;
tmp = 0;
tmp1 = numKernelGroups - 1;
for (i = numKernelGroups - 1, j = 0; j < difference; --i, ++j)
{
tmp = tmp1 % numKernelGroups;
kernelsSliceInfo[tmp].numSubSlices--;
totalReqSubSlices--;
tmp1 += numKernelGroups - 1;
}
}
if (totalReqSubSlices != platformInfo.numSubSlices)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Total requested sub-slices does not match platform's number of sub-slices");
goto finish;
}
}
for(i = 0; i < numKernelGroups; ++i)
{
kernelsSliceInfo[i].destination = (PCM_HAL_KERNEL_SLICE_SUBSLICE)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_SLICE_SUBSLICE)*kernelsSliceInfo[i].numSubSlices);
if( !kernelsSliceInfo[i].destination )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
goto finish;
}
}
// set slice, subslice for each kernel group
if (systemInfo.isSliceInfoValid)
{
for (i = 0; i < systemInfo.numMaxSlicesSupported; ++i)
{
for (j = 0; j < (systemInfo.numMaxSubSlicesSupported / systemInfo.numMaxSlicesSupported); ++j)
{
if (systemInfo.sliceInfo[i].SubSliceInfo[j].Enabled && systemInfo.sliceInfo[i].Enabled)
{
if (curKernel < numKernelGroups)
{
if (kernelsSliceInfo[curKernel].numSubSlices == numSet)
{
curKernel++;
numSet = 0;
}
}
if (curKernel < numKernelGroups)
{
kernelsSliceInfo[curKernel].destination[numSet].slice = i;
kernelsSliceInfo[curKernel].destination[numSet].subSlice = j;
numSet++;
}
numSubSlicesEnabled++;
}
}
}
if (numSubSlicesEnabled != platformInfo.numSubSlices)
{
// not enough slice information, do not assign sub-slice destination
sufficientSliceInfo = false;
}
}
// set freq dispatch ratio for each group
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetKernelGrpFreqDispatch(parallelGraphInfo, groupInfo, numKernelGroups, &minSteps));
// set dispatch pattern for kernel with no dependency
for( i = 0; i < numKernels; ++i )
{
if( kernelParams[i]->kernelThreadSpaceParam.patternType == CM_NONE_DEPENDENCY )
{
grpId = remapKrnToGrp[i];
allocSize = 0;
if( groupInfo[grpId].freqDispatch == 0 )
{
allocSize = minSteps;
groupInfo[grpId].freqDispatch = 1;
}
else
{
allocSize = minSteps * groupInfo[grpId].freqDispatch;
groupInfo[grpId].freqDispatch = groupInfo[grpId].freqDispatch * 2;
}
dispatchFreq[i] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*allocSize);
if( !dispatchFreq[i] )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
goto finish;
}
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetNoDependKernelDispatchPattern(kernelParams[i]->numThreads,
allocSize, dispatchFreq[i]));
}
}
}
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
bbCmArgs = (PCM_HAL_BB_ARGS) batchBuffer->pPrivateData;
if( bbCmArgs->refCount > 1 )
{
uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
updateCurrKernel = false;
for( i = 0; i < totalNumThreads; ++i )
{
if( !singleSubSlice )
{
if( (dispatchFreq[currentKernel][state->hintIndexes.dispatchIndexes[currentKernel]] == numDispatched) ||
(state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads) )
{
numDispatched = 0;
numStepsDispatched++;
state->hintIndexes.dispatchIndexes[currentKernel]++;
if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
{
updateCurrKernel = true;
groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished++;
if( groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished ==
groupInfo[remapKrnToGrp[currentKernel]].numKernelsInGroup )
{
groupInfo[remapKrnToGrp[currentKernel]].groupFinished = 1;
}
else
{
remapGrpToKrn[tmpIndex]++;
}
}
if( (groupInfo[remapKrnToGrp[currentKernel]].freqDispatch == numStepsDispatched) ||
updateCurrKernel )
{
numStepsDispatched = 0;
roundRobinCount++;
tmpIndex = roundRobinCount % numKernelGroups;
if( groupInfo[tmpIndex].groupFinished )
{
loopCount = 0;
while( (loopCount < numKernelGroups) && (!kernelFound) )
{
roundRobinCount++;
tmpIndex = roundRobinCount % numKernelGroups;
if( state->hintIndexes.kernelIndexes[remapGrpToKrn[tmpIndex]] < kernelParams[remapGrpToKrn[tmpIndex]]->numThreads )
{
kernelFound = true;
}
loopCount++;
}
if( !kernelFound )
{
// Error shouldn't be here
// if still in for loop totalNumThreads, needs to be a kernel with threads left
eStatus = MOS_STATUS_UNKNOWN;
CM_ASSERTMESSAGE("Couldn't find kernel with threads left for EnqueueWithHints");
goto finish;
}
}
currentKernel = remapGrpToKrn[tmpIndex];
}
}
}
else
{
if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
{
currentKernel++;
}
}
if( kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates )
{
threadCoordinates.y = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].y;
threadCoordinates.mask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].mask;
enableThreadSpace = true;
threadCoordinates.resetMask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].resetMask;
}
if( enableThreadSpace )
{
if( threadCoordinates.mask != CM_DEFAULT_THREAD_DEPENDENCY_MASK )
{
tmpThreadScoreboardMask = kernelScoreboardMask[currentKernel];
// do the remapping
for( k = 0; k < kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count; ++k )
{
if( (threadCoordinates.mask & CM_HINTS_LEASTBIT_MASK) == 0 )
{
CM_HAL_UNSETBIT(tmpThreadScoreboardMask, dependRemap[currentKernel][k]);
}
threadCoordinates.mask = threadCoordinates.mask >> 1;
}
scoreboardMask = tmpThreadScoreboardMask;
}
else
{
scoreboardMask = kernelScoreboardMask[currentKernel];
}
}
else
{
threadCoordinates.y = state->hintIndexes.kernelIndexes[currentKernel] / kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
scoreboardMask = kernelScoreboardMask[currentKernel];
}
adjustedYCoord = 0;
if( currentKernel > 0 )
{
// if not first kernel, and has dependency,
// and along scoreboard border top need to mask out dependencies with y < 0
if( kernelScoreboardMask[currentKernel] )
{
if( threadCoordinates.y == 0 )
{
for( k = 0; k < vfeDependencyInfo.count; ++k )
{
if( vfeDependencyInfo.deltaY[k] < 0 )
{
CM_HAL_UNSETBIT(scoreboardMask, k);
}
}
}
}
}
if( currentKernel < numKernels - 1 )
{
// if not last kernel, and has dependency,
// along scoreboard border bottom need to mask out dependencies with y > 0
if( kernelScoreboardMask[currentKernel] )
{
if( threadCoordinates.y == (kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceHeight - 1))
{
for( k = 0; k < vfeDependencyInfo.count; ++k)
{
if( vfeDependencyInfo.deltaY[k] > 0 )
{
CM_HAL_UNSETBIT(scoreboardMask, k);
}
}
}
}
}
for( aIndex = 0; aIndex < kernelParams[currentKernel]->numArgs; aIndex++ )
{
argParams[currentKernel] = &kernelParams[currentKernel]->argParams[aIndex];
index = state->hintIndexes.kernelIndexes[currentKernel] * argParams[currentKernel]->perThread;
if( (kernelParams[currentKernel]->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParams[currentKernel]->perThread )
{
continue;
}
CM_ASSERT(argParams[currentKernel]->payloadOffset < kernelParams[currentKernel]->payloadSize);
switch(argParams[currentKernel]->kind)
{
case CM_ARGUMENT_GENERAL:
break;
case CM_ARGUMENT_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
state, kernelParams[currentKernel], argParams[currentKernel], &indexParams[currentKernel],
mediaIds[currentKernel], index, nullptr));
break;
case CM_ARGUMENT_SURFACEBUFFER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], -1, index, nullptr));
break;
case CM_ARGUMENT_SURFACE2D_UP:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, nullptr));
break;
case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, nullptr));
break;
case CM_ARGUMENT_SURFACE2D_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], 0, nullptr));
break;
case CM_ARGUMENT_SURFACE2D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, nullptr));
break;
case CM_ARGUMENT_SURFACE3D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, nullptr));
break;
case CM_ARGUMENT_SURFACE_VME:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], 0, nullptr));
break;
case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], 0, nullptr));
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Argument kind '%d' is not supported", argParams[currentKernel]->kind);
goto finish;
} // switch argKind
} // for numArgs
if( threadCoordinates.resetMask == CM_RESET_DEPENDENCY_MASK )
{
MOS_SecureMemcpy(bBuffer + (CM_SCOREBOARD_MASK_POS_IN_MEDIA_OBJECT_CMD*sizeof(uint32_t)),
sizeof(uint8_t), &scoreboardMask, sizeof(uint8_t));
}
batchBuffer->iCurrent += cmdSizes[currentKernel];
bBuffer += cmdSizes[currentKernel];
state->hintIndexes.kernelIndexes[currentKernel]++;
enableThreadSpace = false;
kernelFound = false;
updateCurrKernel = false;
numDispatched++;
} // for totalNumThreads
} // if uiRefCount > 1
else
{
uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
updateCurrKernel = false;
for( i = 0; i < totalNumThreads; ++i)
{
if( !singleSubSlice )
{
if( (dispatchFreq[currentKernel][state->hintIndexes.dispatchIndexes[currentKernel]] == numDispatched) ||
(state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads) )
{
numDispatched = 0;
numStepsDispatched++;
state->hintIndexes.dispatchIndexes[currentKernel]++;
if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
{
updateCurrKernel = true;
groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished++;
if( groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished ==
groupInfo[remapKrnToGrp[currentKernel]].numKernelsInGroup )
{
groupInfo[remapKrnToGrp[currentKernel]].groupFinished = 1;
}
else
{
remapGrpToKrn[tmpIndex]++;
}
}
if( (groupInfo[remapKrnToGrp[currentKernel]].freqDispatch == numStepsDispatched) ||
updateCurrKernel )
{
numStepsDispatched = 0;
roundRobinCount++;
tmpIndex = roundRobinCount % numKernelGroups;
if( groupInfo[tmpIndex].groupFinished )
{
loopCount = 0;
while( (loopCount < numKernelGroups) && (!kernelFound) )
{
roundRobinCount++;
tmpIndex = roundRobinCount % numKernelGroups;
if( state->hintIndexes.kernelIndexes[remapGrpToKrn[tmpIndex]] < kernelParams[remapGrpToKrn[tmpIndex]]->numThreads )
{
kernelFound = true;
}
loopCount++;
}
if( !kernelFound )
{
// Error shouldn't be here
// if still in for loop totalNumThreads, needs to be a kernel with threads left
eStatus = MOS_STATUS_UNKNOWN;
CM_ASSERTMESSAGE("Couldn't find kernel with threads left for EnqueueWithHints");
goto finish;
}
}
currentKernel = remapGrpToKrn[tmpIndex];
}
}
}
else
{
if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
{
currentKernel++;
}
}
if( kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates )
{
threadCoordinates.x = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].x;
threadCoordinates.y = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].y;
threadCoordinates.mask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].mask;
enableThreadSpace = true;
}
mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardEnable =
(kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count == 0) ? 0:1;
if( !singleSubSlice && systemInfo.isSliceInfoValid && sufficientSliceInfo )
{
sliceIndex = kernelsSliceInfo[remapKrnToGrp[currentKernel]].counter % kernelsSliceInfo[remapKrnToGrp[currentKernel]].numSubSlices;
mediaObjectParams[currentKernel].dwSliceDestinationSelect = kernelsSliceInfo[remapKrnToGrp[currentKernel]].destination[sliceIndex].slice;
mediaObjectParams[currentKernel].dwHalfSliceDestinationSelect = kernelsSliceInfo[remapKrnToGrp[currentKernel]].destination[sliceIndex].subSlice;
mediaObjectParams[currentKernel].bForceDestination = true;
kernelsSliceInfo[remapKrnToGrp[currentKernel]].counter++;
}
if( enableThreadSpace )
{
mediaObjectParams[currentKernel].VfeScoreboard.Value[0] = threadCoordinates.x;
mediaObjectParams[currentKernel].VfeScoreboard.Value[1] = threadCoordinates.y;
if( threadCoordinates.mask != CM_DEFAULT_THREAD_DEPENDENCY_MASK )
{
tmpThreadScoreboardMask = kernelScoreboardMask[currentKernel];
// do the remapping
for( k = 0; k < kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count; ++k )
{
if( (threadCoordinates.mask & CM_HINTS_LEASTBIT_MASK) == 0 )
{
CM_HAL_UNSETBIT(tmpThreadScoreboardMask, dependRemap[currentKernel][k]);
}
threadCoordinates.mask = threadCoordinates.mask >> 1;
}
mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = tmpThreadScoreboardMask;
}
else
{
mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = kernelScoreboardMask[currentKernel];
}
}
else
{
mediaObjectParams[currentKernel].VfeScoreboard.Value[0] = state->hintIndexes.kernelIndexes[currentKernel] %
kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
mediaObjectParams[currentKernel].VfeScoreboard.Value[1] = state->hintIndexes.kernelIndexes[currentKernel] /
kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = kernelScoreboardMask[currentKernel];
}
adjustedYCoord = 0;
// adjust y coordinate for kernels after the first one
if( currentKernel > 0 )
{
// if not first kernel, and has dependency,
// and along scoreboard border need to mask out dependencies with y < 0
if( kernelScoreboardMask[currentKernel] )
{
if (mediaObjectParams[currentKernel].VfeScoreboard.Value[1] == 0)
{
for( k = 0; k < vfeDependencyInfo.count; ++k )
{
if( vfeDependencyInfo.deltaY[k] < 0 )
{
CM_HAL_UNSETBIT(mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask, k);
}
}
}
}
for( j = currentKernel; j > 0; --j )
{
adjustedYCoord += kernelParams[j-1]->kernelThreadSpaceParam.threadSpaceHeight;
}
}
if( currentKernel < numKernels - 1 )
{
// if not last kernel, and has dependency,
// along scoreboard border bottom need to mask out dependencies with y > 0
if( kernelScoreboardMask[currentKernel] )
{
if (mediaObjectParams[currentKernel].VfeScoreboard.Value[1] ==
(kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceHeight - 1))
{
for( k = 0; k < vfeDependencyInfo.count; ++k )
{
if( vfeDependencyInfo.deltaY[k] > 0 )
{
CM_HAL_UNSETBIT(mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask, k);
}
}
}
}
}
mediaObjectParams[currentKernel].VfeScoreboard.Value[1] =
mediaObjectParams[currentKernel].VfeScoreboard.Value[1] + adjustedYCoord;
for( aIndex = 0; aIndex < kernelParams[currentKernel]->numArgs; aIndex++ )
{
argParams[currentKernel] = &kernelParams[currentKernel]->argParams[aIndex];
index = state->hintIndexes.kernelIndexes[currentKernel] * argParams[currentKernel]->perThread;
if( (kernelParams[currentKernel]->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParams[currentKernel]->perThread )
{
continue;
}
CM_ASSERT(argParams[currentKernel]->payloadOffset < kernelParams[currentKernel]->payloadSize);
switch(argParams[currentKernel]->kind)
{
case CM_ARGUMENT_GENERAL:
MOS_SecureMemcpy(
cmdInline[currentKernel] + argParams[currentKernel]->payloadOffset,
argParams[currentKernel]->unitSize,
argParams[currentKernel]->firstValue + index * argParams[currentKernel]->unitSize,
argParams[currentKernel]->unitSize);
break;
case CM_ARGUMENT_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
state, kernelParams[currentKernel], argParams[currentKernel], &indexParams[currentKernel],
mediaIds[currentKernel], index, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACEBUFFER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], -1, index, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACE2D_UP:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACE2D_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACE2D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACE3D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACE_VME:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], 0, cmdInline[currentKernel]));
break;
case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
state, argParams[currentKernel], &indexParams[currentKernel],
bindingTableEntries[currentKernel], 0, cmdInline[currentKernel]));
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Argument kind '%d' is not supported", argParams[currentKernel]->kind);
goto finish;
}
}
mediaObjectParams[currentKernel].pInlineData = cmdInline[currentKernel];
state->renderHal->pMhwRenderInterface->AddMediaObject(nullptr, batchBuffer, &mediaObjectParams[currentKernel]);
state->hintIndexes.kernelIndexes[currentKernel]++;
enableThreadSpace = false;
kernelFound = false;
updateCurrKernel = false;
numDispatched++;
} // for totalNumThreads
} // else refCount <= 1
// setup global surfaces
for( j = 0; j < numKernels; ++j )
{
for( i = 0; i < CM_MAX_GLOBAL_SURFACE_NUMBER; ++i )
{
if(( kernelParams[j]->globalSurface[i] & CM_SURFACE_MASK) != CM_NULL_SURFACE)
{
CM_HAL_KERNEL_ARG_PARAM tmpArgParam;
argParam = &tmpArgParam;
tmpArgParam.kind = CM_ARGUMENT_SURFACEBUFFER;
tmpArgParam.payloadOffset = 0;
tmpArgParam.unitCount = 1;
tmpArgParam.unitSize = sizeof(uint32_t);
tmpArgParam.perThread = false;
tmpArgParam.firstValue = (uint8_t*)&kernelParams[j]->globalSurface[i];
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
state, argParam, &indexParams[j], bindingTableEntries[j],
(int16_t)i, 0, nullptr));
}
}
// set number of samplers
krnAllocations[j]->Params.Sampler_Count = indexParams[j].samplerIndexCount;
}
// check to make sure we did all threads for all kernels
if (numTasks <= 1 || lastTask )
{
for( i = 0; i < numKernels; ++i )
{
if( state->hintIndexes.kernelIndexes[i] < kernelParams[i]->numThreads )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Not all threads for all kernels were put into batch buffer");
goto finish;
}
}
}
if ( lastTask )
{
MOS_ZeroMemory(&state->hintIndexes.kernelIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
MOS_ZeroMemory(&state->hintIndexes.dispatchIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
}
finish:
// free memory
if( mediaObjectParams ) MOS_FreeMemory(mediaObjectParams);
if( kernelParams ) MOS_FreeMemory(kernelParams);
if( argParams ) MOS_FreeMemory(argParams);
if( cmdSizes ) MOS_FreeMemory(cmdSizes);
if( remapKrnToGrp ) MOS_FreeMemory(remapKrnToGrp);
if( remapGrpToKrn ) MOS_FreeMemory(remapGrpToKrn);
if( kernelScoreboardMask ) MOS_FreeMemory(kernelScoreboardMask);
if( parallelGraphInfo ) MOS_FreeMemory(parallelGraphInfo);
if( numKernelsPerGrp ) MOS_FreeMemory(numKernelsPerGrp);
if( groupInfo ) MOS_FreeMemory(groupInfo);
if( cmdInline )
{
for( i = 0; i < numKernels; ++i )
{
if( cmdInline[i] )
MOS_FreeMemory(cmdInline[i]);
}
MOS_FreeMemory(cmdInline);
}
if( kernelsSliceInfo )
{
for( i = 0; i < numKernelGroups; ++i )
{
if( kernelsSliceInfo[i].destination )
MOS_FreeMemory(kernelsSliceInfo[i].destination);
}
MOS_FreeMemory(kernelsSliceInfo);
}
if( dependRemap )
{
for( i = 0; i < numKernels; ++i )
{
if( dependRemap[i] )
MOS_FreeMemory(dependRemap[i]);
}
MOS_FreeMemory(dependRemap);
}
if( dispatchFreq )
{
for( i = 0; i < numKernels; ++i )
{
if( dispatchFreq[i] )
MOS_FreeMemory(dispatchFreq[i]);
}
MOS_FreeMemory(dispatchFreq);
}
return eStatus;
}
uint32_t HalCm_ThreadsNumberPerGroup_MW(PCM_HAL_WALKER_PARAMS walkerParams)
{
int localInnerCount = 0, localMidCount = 0, localOuterCount = 0, globalInnerCount = 0, globalOuterCount = 0;
int localInnerCountMax = 0, localMidCountMax = 0, localOuterCountMax = 0, globalInnerCountMax = 0;
int midX = 0, midY = 0, midStep = 0;
int outerX = 0, outerY = 0;
int localInnerX = 0, localInnerY = 0;
int blockSizeX = 0, blockSizeY = 0;
//int x, y;
int localLoopExecCount = walkerParams->localLoopExecCount;
int globalLoopExecCount = walkerParams->globalLoopExecCount;
int globalresX = walkerParams->globalResolution.x, globalresY = walkerParams->globalResolution.y;
int globalOuterX = walkerParams->globalStart.x, globalOuterY = walkerParams->globalStart.y;
int globalOuterStepX = walkerParams->globalOutlerLoopStride.x, globalOuterStepY = walkerParams->globalOutlerLoopStride.y;
int globalInnerStepX = walkerParams->globalInnerLoopUnit.x, globalInnerStepY = walkerParams->globalInnerLoopUnit.y;
int middleStepX = walkerParams->midLoopUnitX, middleStepY = walkerParams->midLoopUnitY, extraSteps = walkerParams->middleLoopExtraSteps;
int localblockresX = walkerParams->blockResolution.x, localblockresY = walkerParams->blockResolution.y;
int localStartX = walkerParams->localStart.x, localStartY = walkerParams->localStart.y;
int localOuterStepX = walkerParams->localOutLoopStride.x, localOuterStepY = walkerParams->localOutLoopStride.y;
int localInnerStepX = walkerParams->localInnerLoopUnit.x, localInnerStepY = walkerParams->localInnerLoopUnit.y;
uint32_t threadsNumberPergroup = 0;
//do global_outer_looper initialization
while (((globalOuterX >= globalresX) && (globalInnerStepX < 0)) ||
(((globalOuterX + localblockresX) < 0) && (globalInnerStepX > 0)) ||
((globalOuterY >= globalresY) && (globalInnerStepY < 0)) ||
(((globalOuterX + localblockresY) < 0) && (globalInnerStepY > 0)))
{
globalOuterX += globalInnerStepX;
globalOuterY += globalInnerStepY;
}
//global_ouer_loop_in_bounds()
while ((globalOuterX < globalresX) &&
(globalOuterY < globalresY) &&
(globalOuterX + localblockresX > 0) &&
(globalOuterY + localblockresY > 0) &&
(globalOuterCount <= globalLoopExecCount))
{
int globalInnerX = globalOuterX;
int globalInnerY = globalOuterY;
if (globalInnerCountMax < globalInnerCount)
{
globalInnerCountMax = globalInnerCount;
}
globalInnerCount = 0;
//global_inner_loop_in_bounds()
while ((globalInnerX < globalresX) &&
(globalInnerY < globalresY) &&
(globalInnerX + localblockresX > 0) &&
(globalInnerY + localblockresY > 0))
{
int globalInnerXCopy = globalInnerX;
int globalInnerYCopy = globalInnerY;
if (globalInnerX < 0)
globalInnerXCopy = 0;
if (globalInnerY < 0)
globalInnerYCopy = 0;
if (globalInnerX < 0)
blockSizeX = localblockresX + globalInnerX;
else if ((globalresX - globalInnerX) < localblockresX)
blockSizeX = globalresX - globalInnerX;
else
blockSizeX = localblockresX;
if (globalInnerY < 0)
blockSizeY = localblockresY + globalInnerY;
else if ((globalresY - globalInnerY) < localblockresY)
blockSizeY = globalresY - globalInnerY;
else
blockSizeY = localblockresY;
outerX = localStartX;
outerY = localStartY;
if (localOuterCountMax < localOuterCount)
{
localOuterCountMax = localOuterCount;
}
localOuterCount = 0;
while ((outerX >= blockSizeX && localInnerStepX < 0) ||
(outerX < 0 && localInnerStepX > 0) ||
(outerY >= blockSizeY && localInnerStepY < 0) ||
(outerY < 0 && localInnerStepY > 0))
{
outerX += localInnerStepX;
outerY += localInnerStepY;
}
//local_outer_loop_in_bounds()
while ((outerX < blockSizeX) &&
(outerY < blockSizeY) &&
(outerX >= 0) &&
(outerY >= 0) &&
(localOuterCount <= localLoopExecCount))
{
midX = outerX;
midY = outerY;
midStep = 0;
if (localMidCountMax < localMidCount)
{
localMidCountMax = localMidCount;
}
localMidCount = 0;
//local_middle_steps_remaining()
while ((midStep <= extraSteps) &&
(midX < blockSizeX) &&
(midY < blockSizeY) &&
(midX >= 0) &&
(midY >= 0))
{
localInnerX = midX;
localInnerY = midY;
if (localInnerCountMax < localInnerCount)
{
localInnerCountMax = localInnerCount;
}
localInnerCount = 0;
//local_inner_loop_shrinking()
while ((localInnerX < blockSizeX) &&
(localInnerY < blockSizeY) &&
(localInnerX >= 0) &&
(localInnerY >= 0))
{
//x = localInnerX + globalInnerXCopy;
//y = localInnerY + globalInnerYCopy;
localInnerCount ++;
localInnerX += localInnerStepX;
localInnerY += localInnerStepY;
}
localMidCount++;
midStep++;
midX += middleStepX;
midY += middleStepY;
}
localOuterCount += 1;
outerX += localOuterStepX;
outerY += localOuterStepY;
while ((outerX >= blockSizeX && localInnerStepX < 0) ||
(outerX <0 && localInnerStepX > 0) ||
(outerY >= blockSizeY && localInnerStepY < 0) ||
(outerY <0 && localInnerStepY > 0))
{
outerX += localInnerStepX;
outerY += localInnerStepY;
}
}
globalInnerCount++;
globalInnerX += globalInnerStepX;
globalInnerY += globalInnerStepY;
}
globalOuterCount += 1;
globalOuterX += globalOuterStepX;
globalOuterY += globalOuterStepY;
while (((globalOuterX >= globalresX) && (globalInnerStepX < 0)) ||
(((globalOuterX + localblockresX) < 0) && (globalInnerStepX > 0)) ||
((globalOuterY >= globalresY) && (globalInnerStepY < 0)) ||
(((globalOuterX + localblockresY) < 0) && (globalInnerStepY > 0)))
{
globalOuterX += globalInnerStepX;
globalOuterY += globalInnerStepY;
}
}
switch (walkerParams->groupIdLoopSelect)
{
case CM_MW_GROUP_COLORLOOP:
threadsNumberPergroup = walkerParams->colorCountMinusOne + 1;
break;
case CM_MW_GROUP_INNERLOCAL:
threadsNumberPergroup = localInnerCount * (walkerParams->colorCountMinusOne + 1);
break;
case CM_MW_GROUP_MIDLOCAL:
threadsNumberPergroup = localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
break;
case CM_MW_GROUP_OUTERLOCAL:
threadsNumberPergroup = localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
break;
case CM_MW_GROUP_INNERGLOBAL:
threadsNumberPergroup = globalInnerCount * localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
break;
default:
threadsNumberPergroup = globalOuterCount * globalInnerCount * localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
break;
}
return threadsNumberPergroup;
}
MOS_STATUS HalCm_SetupMediaWalkerParams(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
PCM_HAL_WALKER_PARAMS walkerParams = &kernelParam->walkerParams;
//Using global walker enable flag
walkerParams->cmWalkerEnable = state->walkerParams.CmWalkerEnable;
if (walkerParams->cmWalkerEnable)
{
// MEDIA_WALKER
CM_HAL_KERNEL_THREADSPACE_PARAM kernelThreadSpace;
if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth)
{
kernelThreadSpace.threadSpaceWidth = kernelParam->kernelThreadSpaceParam.threadSpaceWidth;
kernelThreadSpace.threadSpaceHeight = kernelParam->kernelThreadSpaceParam.threadSpaceHeight;
kernelThreadSpace.patternType = kernelParam->kernelThreadSpaceParam.patternType;
kernelThreadSpace.walkingPattern = kernelParam->kernelThreadSpaceParam.walkingPattern;
kernelThreadSpace.groupSelect = kernelParam->kernelThreadSpaceParam.groupSelect;
kernelThreadSpace.colorCountMinusOne = kernelParam->kernelThreadSpaceParam.colorCountMinusOne;
}
else
{
kernelThreadSpace.threadSpaceWidth = (uint16_t)taskParam->threadSpaceWidth;
kernelThreadSpace.threadSpaceHeight = (uint16_t)taskParam->threadSpaceHeight;
kernelThreadSpace.patternType = taskParam->dependencyPattern;
kernelThreadSpace.walkingPattern = taskParam->walkingPattern;
kernelThreadSpace.groupSelect = taskParam->mediaWalkerGroupSelect;
kernelThreadSpace.colorCountMinusOne = taskParam->colorCountMinusOne;
}
// check for valid thread space width and height here since different from media object
if (kernelThreadSpace.threadSpaceWidth > state->cmHalInterface->GetMediaWalkerMaxThreadWidth())
{
CM_ASSERTMESSAGE("Error: Exceeds the maximum thread space width.");
eStatus = MOS_STATUS_INVALID_PARAMETER;
goto finish;
}
if (kernelThreadSpace.threadSpaceHeight > state->cmHalInterface->GetMediaWalkerMaxThreadHeight())
{
CM_ASSERTMESSAGE("Error: Exceeds the maximum thread space height.");
eStatus = MOS_STATUS_INVALID_PARAMETER;
goto finish;
}
//walkerParams->InterfaceDescriptorOffset = mediaID;// mediaObjectParam.dwInterfaceDescriptorOffset;
walkerParams->inlineDataLength = MOS_ALIGN_CEIL(kernelParam->indirectDataParam.indirectDataSize, 4);
walkerParams->inlineData = kernelParam->indirectDataParam.indirectData;
walkerParams->colorCountMinusOne = kernelThreadSpace.colorCountMinusOne;// taskParam->ColorCountMinusOne;
walkerParams->groupIdLoopSelect = (uint32_t)kernelThreadSpace.groupSelect;
CM_WALKING_PATTERN walkPattern = kernelThreadSpace.walkingPattern;
switch (kernelThreadSpace.patternType)
{
case CM_NONE_DEPENDENCY:
break;
case CM_HORIZONTAL_WAVE:
walkPattern = CM_WALK_HORIZONTAL;
break;
case CM_VERTICAL_WAVE:
walkPattern = CM_WALK_VERTICAL;
break;
case CM_WAVEFRONT:
walkPattern = CM_WALK_WAVEFRONT;
break;
case CM_WAVEFRONT26:
walkPattern = CM_WALK_WAVEFRONT26;
break;
case CM_WAVEFRONT26X:
if (kernelThreadSpace.threadSpaceWidth > 1)
{
walkPattern = CM_WALK_WAVEFRONT26X;
}
else
{
walkPattern = CM_WALK_DEFAULT;
}
break;
case CM_WAVEFRONT26ZIG:
if (kernelThreadSpace.threadSpaceWidth > 2)
{
walkPattern = CM_WALK_WAVEFRONT26ZIG;
}
else
{
walkPattern = CM_WALK_DEFAULT;
}
break;
default:
CM_ASSERTMESSAGE("Error: Invalid walking pattern.");
walkPattern = CM_WALK_DEFAULT;
break;
}
if (taskParam->walkingParamsValid)
{
CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SetMediaWalkerParams
(taskParam->walkingParams, walkerParams));
if (walkPattern == CM_WALK_HORIZONTAL || walkPattern == CM_WALK_DEFAULT)
{
walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
}
else if (walkPattern == CM_WALK_VERTICAL)
{
walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
}
}
else if (kernelParam->kernelThreadSpaceParam.walkingParamsValid)
{
CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SetMediaWalkerParams(
kernelParam->kernelThreadSpaceParam.walkingParams, walkerParams));
if (walkPattern == CM_WALK_HORIZONTAL || walkPattern == CM_WALK_DEFAULT)
{
walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
}
else if (walkPattern == CM_WALK_VERTICAL)
{
walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
}
}
else
{
//Local loop parameters
walkerParams->blockResolution.x = kernelThreadSpace.threadSpaceWidth;
walkerParams->blockResolution.y = kernelThreadSpace.threadSpaceHeight;
walkerParams->localStart.x = 0;
walkerParams->localStart.y = 0;
walkerParams->localEnd.x = 0;
walkerParams->localEnd.y = 0;
walkerParams->globalLoopExecCount = 1;
walkerParams->midLoopUnitX = 0;
walkerParams->midLoopUnitY = 0;
walkerParams->middleLoopExtraSteps = 0;
// account for odd Height/Width for 26x and 26Zig
uint16_t adjHeight = ((kernelThreadSpace.threadSpaceHeight + 1) >> 1) << 1;
uint16_t adjWidth = ((kernelThreadSpace.threadSpaceWidth + 1) >> 1) << 1;
uint32_t maxThreadWidth = state->cmHalInterface->GetMediaWalkerMaxThreadWidth();
switch (walkPattern)
{
case CM_WALK_DEFAULT:
case CM_WALK_HORIZONTAL:
if (kernelThreadSpace.threadSpaceWidth == kernelParam->numThreads &&
kernelThreadSpace.threadSpaceHeight == 1)
{
walkerParams->blockResolution.x = MOS_MIN(kernelParam->numThreads, maxThreadWidth);
walkerParams->blockResolution.y = 1 + kernelParam->numThreads / maxThreadWidth;
}
walkerParams->localLoopExecCount = walkerParams->blockResolution.y - 1;
walkerParams->localOutLoopStride.x = 0;
walkerParams->localOutLoopStride.y = 1;
walkerParams->localInnerLoopUnit.x = 1;
walkerParams->localInnerLoopUnit.y = 0;
walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
break;
case CM_WALK_WAVEFRONT:
walkerParams->localLoopExecCount = kernelThreadSpace.threadSpaceWidth + (kernelThreadSpace.threadSpaceHeight - 1) * 1 - 1;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
walkerParams->localInnerLoopUnit.y = 1;
break;
case CM_WALK_WAVEFRONT26:
walkerParams->globalResolution.x = kernelThreadSpace.threadSpaceWidth;
walkerParams->globalResolution.y = kernelThreadSpace.threadSpaceHeight;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
walkerParams->localInnerLoopUnit.y = 1;
walkerParams->localLoopExecCount = kernelThreadSpace.threadSpaceWidth +
(kernelThreadSpace.threadSpaceHeight - 1) * 2 - 1;
//localLoopExecCount has limitation, it should be less than 2^12
while (walkerParams->localLoopExecCount >= 0xFFF)
{
//separate to multiple global levels
if (walkerParams->blockResolution.x > (walkerParams->blockResolution.y * 2))
{
walkerParams->blockResolution.x = (walkerParams->blockResolution.x+1) >> 1;
walkerParams->globalLoopExecCount = (walkerParams->globalResolution.x +
walkerParams->blockResolution.x - 1) / walkerParams->blockResolution.x;
}
else
{
walkerParams->blockResolution.y = (walkerParams->blockResolution.y + 1) >> 1;
}
walkerParams->localLoopExecCount = walkerParams->blockResolution.x +
(walkerParams->blockResolution.y - 1) * 2 - 1;
}
walkerParams->globalOutlerLoopStride.x = walkerParams->blockResolution.x;
walkerParams->globalOutlerLoopStride.y = 0;
walkerParams->globalInnerLoopUnit.x = 0;
walkerParams->globalInnerLoopUnit.y = walkerParams->blockResolution.y;
break;
case CM_WALK_WAVEFRONT26X:
case CM_WALK_WAVEFRONT26XALT:
walkerParams->localLoopExecCount = 0x7ff;
walkerParams->globalLoopExecCount = 0;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
walkerParams->localInnerLoopUnit.y = 2;
walkerParams->middleLoopExtraSteps = 1;
walkerParams->midLoopUnitX = 0;
walkerParams->midLoopUnitY = 1;
break;
case CM_WALK_WAVEFRONT26ZIG:
walkerParams->localLoopExecCount = 1;
walkerParams->globalLoopExecCount = (adjHeight / 2 - 1) * 2 + (adjWidth / 2) - 1;
walkerParams->localOutLoopStride.x = 0;
walkerParams->localOutLoopStride.y = 1;
walkerParams->localInnerLoopUnit.x = 1;
walkerParams->localInnerLoopUnit.y = 0;
walkerParams->blockResolution.x = 2;
walkerParams->blockResolution.y = 2;
walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
break;
case CM_WALK_VERTICAL:
walkerParams->localLoopExecCount = walkerParams->blockResolution.x - 1;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0;
walkerParams->localInnerLoopUnit.y = 1;
walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
break;
case CM_WALK_WAVEFRONT45D:
walkerParams->localLoopExecCount = 0x7ff;
walkerParams->globalLoopExecCount = 0x7ff;
walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
walkerParams->localInnerLoopUnit.y = 1;
break;
case CM_WALK_WAVEFRONT45XD_2:
walkerParams->localLoopExecCount = 0x7ff;
walkerParams->globalLoopExecCount = 0x7ff;
// Local
walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
walkerParams->localInnerLoopUnit.y = 2;
// Mid
walkerParams->middleLoopExtraSteps = 1;
walkerParams->midLoopUnitX = 0;
walkerParams->midLoopUnitY = 1;
break;
case CM_WALK_WAVEFRONT26D:
walkerParams->localLoopExecCount = 0x7ff;
walkerParams->globalLoopExecCount = 0x7ff;
walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
walkerParams->localInnerLoopUnit.y = 1;
break;
case CM_WALK_WAVEFRONT26XD:
walkerParams->localLoopExecCount = 0x7ff;
walkerParams->globalLoopExecCount = 0x7ff;
// Local
walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
walkerParams->localOutLoopStride.x = 1;
walkerParams->localOutLoopStride.y = 0;
walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
walkerParams->localInnerLoopUnit.y = 2;
// Mid
walkerParams->middleLoopExtraSteps = 1;
walkerParams->midLoopUnitX = 0;
walkerParams->midLoopUnitY = 1;
break;
default:
walkerParams->localLoopExecCount = MOS_MIN(kernelParam->numThreads, 0x3FF);
walkerParams->localOutLoopStride.x = 0;
walkerParams->localOutLoopStride.y = 1;
walkerParams->localInnerLoopUnit.x = 1;
walkerParams->localInnerLoopUnit.y = 0;
break;
}
//Global loop parameters: execution count, resolution and strides
//Since no global loop, global resolution equals block resolution.
walkerParams->globalStart.x = 0;
walkerParams->globalStart.y = 0;
walkerParams->globalOutlerLoopStride.y = 0;
if (walkPattern == CM_WALK_WAVEFRONT26ZIG)
{
walkerParams->globalResolution.x = kernelThreadSpace.threadSpaceWidth;
walkerParams->globalResolution.y = kernelThreadSpace.threadSpaceHeight;
walkerParams->globalOutlerLoopStride.x = 2;
walkerParams->globalInnerLoopUnit.x = 0xFFFC;
walkerParams->globalInnerLoopUnit.y = 2;
}
else if(walkPattern != CM_WALK_WAVEFRONT26)
{
walkerParams->globalResolution.x = walkerParams->blockResolution.x;
walkerParams->globalResolution.y = walkerParams->blockResolution.y;
walkerParams->globalOutlerLoopStride.x = walkerParams->globalResolution.x;
walkerParams->globalInnerLoopUnit.x = 0;
walkerParams->globalInnerLoopUnit.y = walkerParams->globalResolution.y;
}
}
//Need calculate number threads per group for media walker, the minimum value is 1
if (kernelThreadSpace.groupSelect > CM_MW_GROUP_NONE)
{
kernelParam->numberThreadsInGroup = HalCm_ThreadsNumberPerGroup_MW(walkerParams);
}
else
{
kernelParam->numberThreadsInGroup = 1;
}
}
finish:
return eStatus;
}
MOS_STATUS HalCm_AcquireSamplerStatistics(PCM_HAL_STATE state)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t i = 0;
unsigned int maxBTIindex[MAX_ELEMENT_TYPE_COUNT] = {0}; //tempoary variable, it will hold the max BTI index in each element type
/* enumerate through the samplerTable for the one in use, then count and analyze */
for (i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) { //state->CmDeviceParam.iMaxSamplerTableSize;
if (state->samplerTable[i].bInUse) {
uint32_t samplerIndex = state->samplerIndexTable[i];
if (samplerIndex != CM_INVALID_INDEX) {
MHW_SAMPLER_ELEMENT_TYPE elementType = state->samplerTable[i].ElementType;
maxBTIindex[elementType] = (maxBTIindex[elementType] > samplerIndex) ? maxBTIindex[elementType] : samplerIndex;
}
else
state->samplerStatistics.samplerCount[state->samplerTable[i].ElementType]++;
}
}
int tempbase=0;
state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements]
= (state->samplerStatistics.samplerCount[MHW_Sampler2Elements]) ? 0 : -1;
tempbase
= state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements];
state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements]
= (state->samplerStatistics.samplerCount[MHW_Sampler4Elements]) ?
((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler2Elements], 2, 4))
: tempbase;
tempbase
= state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements];
state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements]
= (state->samplerStatistics.samplerCount[MHW_Sampler8Elements]) ?
((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler4Elements], 4, 8))
: tempbase;
tempbase
= state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements];
state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements]
= (state->samplerStatistics.samplerCount[MHW_Sampler64Elements]) ?
((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler8Elements], 8, 64))
: tempbase;
tempbase
= state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements];
state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements]
= (state->samplerStatistics.samplerCount[MHW_Sampler128Elements]) ?
((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler64Elements], 64, 128))
: tempbase;
/* There are Sampler BTI, next step needs to consider it during calculate the base */
for (int k = MHW_Sampler2Elements; k < MHW_Sampler128Elements; k++) {
if (state->samplerStatistics.samplerIndexBase[k + 1] < maxBTIindex[k])
state->samplerStatistics.samplerIndexBase[k + 1] = maxBTIindex[k];
}
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Initial setup of HW states for the kernel
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupStatesForKernelInitial(
PCM_HAL_STATE state,
PRENDERHAL_MEDIA_STATE mediaState,
PMHW_BATCH_BUFFER batchBuffer,
int32_t taskId,
PCM_HAL_KERNEL_PARAM kernelParam,
PCM_HAL_INDEX_PARAM indexParam,
uint32_t kernelCurbeOffset,
int32_t& bindingTable,
int32_t& mediaID,
PRENDERHAL_KRN_ALLOCATION &krnAllocation)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PRENDERHAL_INTERFACE renderHal = state->renderHal;
PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
PCM_INDIRECT_SURFACE_INFO indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
PCM_GPGPU_WALKER_PARAMS perKernelGpGpuWalkerParames = &kernelParam->gpgpuWalkerParams;
UNUSED(batchBuffer);
UNUSED(taskId);
MHW_MEDIA_OBJECT_PARAMS mediaObjectParam;
PCM_HAL_KERNEL_ARG_PARAM argParam;
uint32_t hdrSize;
uint32_t index;
uint32_t value;
uint32_t btIndex;
uint32_t surfIndex;
uint32_t aIndex;
uint32_t idZ;
uint32_t idY;
uint32_t idX;
uint32_t localIdIndex;
CM_SURFACE_BTI_INFO surfBTIInfo;
bool vmeUsed = false;
CM_PLATFORM_INFO platformInfo;
localIdIndex = kernelParam->localIdIndex;
state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
HalCm_PreSetBindingIndex(indexParam, CM_NULL_SURFACE_BINDING_INDEX, CM_NULL_SURFACE_BINDING_INDEX);
HalCm_PreSetBindingIndex(indexParam, surfBTIInfo.reservedSurfaceStart,
surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER - 1);
if (kernelParam->indirectDataParam.surfaceCount)
{
for (index = 0; index < kernelParam->indirectDataParam.surfaceCount; index++)
{
value = (indirectSurfaceInfo + index)->bindingTableIndex;
HalCm_PreSetBindingIndex(indexParam, value, value);
}
}
// Get the binding table for this kernel
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(renderHal, &bindingTable));
if (state->dshEnabled)
{
// Kernels are already pre-loaded in GSH
// krnAllocation is the head of a linked list
if (!krnAllocation)
{
CM_ASSERTMESSAGE("Error: Invalid kernel allocation.");
goto finish;
}
}
else
{
// Load the Kernel to GSH
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_LoadKernel(
state,
kernelParam,
0,
krnAllocation));
}
// initialize curbe buffer
if (kernelParam->totalCurbeSize > 0)
{
// Update Curbe offset after curbe load command
if (state->dshEnabled)
{
mediaState->pDynamicState->Curbe.iCurrent += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
}
else
{
mediaState->iCurbeOffset += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
}
}
//Setup media walker parameters if it is
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupMediaWalkerParams(state, kernelParam));
// Allocate Interface Descriptor
mediaID = HalCm_AllocateMediaID(
state,
kernelParam,
krnAllocation,
bindingTable,
kernelCurbeOffset);
if (mediaID < 0)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Unable to get Media ID");
goto finish;
}
// Setup the Media object
hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
mediaObjectParam.dwInterfaceDescriptorOffset = mediaID;
if (kernelParam->indirectDataParam.indirectDataSize)
{
mediaObjectParam.dwInlineDataSize = 0;
}
else
{
mediaObjectParam.dwInlineDataSize = MOS_MAX(kernelParam->payloadSize, 4);
}
// set surface state and binding table
if (kernelParam->indirectDataParam.surfaceCount)
{
for (index = 0; index < kernelParam->indirectDataParam.surfaceCount; index++)
{
btIndex = (indirectSurfaceInfo + index)->bindingTableIndex;
surfIndex = (indirectSurfaceInfo + index)->surfaceIndex;
switch ((indirectSurfaceInfo + index)->kind)
{
case CM_ARGUMENT_SURFACEBUFFER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceStateWithBTIndex(
state, bindingTable, surfIndex, btIndex, 0));
break;
case CM_ARGUMENT_SURFACE2D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateWithBTIndex(
state, bindingTable, surfIndex, btIndex, 0));
break;
case CM_ARGUMENT_SURFACE2D_UP:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateWithBTIndex(
state, bindingTable, surfIndex, btIndex, 0));
break;
case CM_ARGUMENT_SURFACE2D_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateWithBTIndex(
state, bindingTable, surfIndex, btIndex, 1));
break;
case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateWithBTIndex(
state, bindingTable, surfIndex, btIndex, 1));
break;
case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceStateWithBTIndex(
state, bindingTable, surfIndex, btIndex, 0, (CM_HAL_KERNEL_ARG_KIND)(indirectSurfaceInfo + index)->kind, 0));
break;
case CM_ARGUMENT_SURFACE3D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceStateWithBTIndex(
state, bindingTable, surfIndex, btIndex));
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Indirect Data surface kind is not supported");
goto finish;
}
}
}
// set sampler bti
if (kernelParam->samplerBTIParam.samplerCount > 0)
{
for (uint32_t i = 0; i < kernelParam->samplerBTIParam.samplerCount; i++)
{
HalCm_SetupSamplerStateWithBTIndex(state, kernelParam, &kernelParam->samplerBTIParam.samplerInfo[0], i, mediaID);
}
}
if ( ( kernelParam->curbeSizePerThread > 0 ) && ( kernelParam->stateBufferType == CM_STATE_BUFFER_NONE ) )
{
uint8_t data[CM_MAX_THREAD_PAYLOAD_SIZE + 32];
uint8_t curbe[CM_MAX_CURBE_SIZE_PER_TASK + 32];
MOS_ZeroMemory(data, sizeof(data));
MOS_ZeroMemory(curbe, sizeof(curbe));
for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
{
argParam = &kernelParam->argParams[aIndex];
if (argParam->perThread || argParam->isNull)
{
continue;
}
switch (argParam->kind)
{
case CM_ARGUMENT_GENERAL:
case CM_ARGUMENT_IMPLICT_GROUPSIZE:
case CM_ARGUMENT_IMPLICT_LOCALSIZE:
case CM_ARGUMENT_IMPLICIT_LOCALID:
case CM_ARGUMENT_GENERAL_DEPVEC:
HalCm_SetArgData(argParam, 0, data);
break;
case CM_ARGUMENT_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
state, kernelParam, argParam, indexParam, mediaID, 0, data));
break;
case CM_ARGUMENT_SURFACEBUFFER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
state, argParam, indexParam, bindingTable, -1, 0, data));
break;
case CM_ARGUMENT_SURFACE2D_UP:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
state, argParam, indexParam, bindingTable, 0, data));
break;
case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
state, argParam, indexParam, bindingTable, 0, data));
break;
case CM_ARGUMENT_SURFACE2D_SAMPLER:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
state, argParam, indexParam, bindingTable, 0, data));
break;
case CM_ARGUMENT_SURFACE2D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
state, argParam, indexParam, bindingTable, 0, data));
break;
case CM_ARGUMENT_SURFACE3D:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
state, argParam, indexParam, bindingTable, 0, data));
break;
case CM_ARGUMENT_SURFACE_VME: // 3 surface indices
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
state, argParam, indexParam, bindingTable, 0, data));
vmeUsed = true;
break;
case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS: // sampler 8x8 surface
case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA: // sampler 8x8 surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
state, argParam, indexParam, bindingTable, 0, data));
break;
case CM_ARGUMENT_STATE_BUFFER:
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_SetupStateBufferSurfaceState(
state, argParam, indexParam, bindingTable, 0, data ) );
break;
case CM_ARGUMENT_SURFACE:
// Allow null surface
break;
case CM_ARGUMENT_SURFACE2D_SCOREBOARD:
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
state, argParam, indexParam, bindingTable, 0, data));
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Argument kind '%d' is not supported", argParam->kind);
goto finish;
}
}
if (perKernelGpGpuWalkerParames->gpgpuEnabled)
{
uint32_t offset = 0;
uint32_t localIdXOffset = kernelParam->argParams[localIdIndex].payloadOffset;
uint32_t localIdYOffset = localIdXOffset + 4;
uint32_t localIdZOffset = localIdXOffset + 8;
//totalCurbeSize aligned when parsing task
int32_t crossThreadSize = kernelParam->crossThreadConstDataLen;
//Cross thread constant data
MOS_SecureMemcpy(curbe + offset, crossThreadSize, data, crossThreadSize);
offset += crossThreadSize;
//Per-thread data
for (idZ = 0; idZ < perKernelGpGpuWalkerParames->threadDepth; idZ++)
{
for (idY = 0; idY < perKernelGpGpuWalkerParames->threadHeight; idY++)
{
for (idX = 0; idX < perKernelGpGpuWalkerParames->threadWidth; idX++)
{
*((uint32_t *)(data + localIdXOffset)) = idX;
*((uint32_t *)(data + localIdYOffset)) = idY;
*((uint32_t *)(data + localIdZOffset)) = idZ;
MOS_SecureMemcpy(curbe + offset, kernelParam->curbeSizePerThread, data + crossThreadSize, kernelParam->curbeSizePerThread);
offset += kernelParam->curbeSizePerThread;
}
}
}
// tell pfnLoadCurbeData the current curbe offset
if (state->dshEnabled)
{
PRENDERHAL_DYNAMIC_STATE dynamicState = stateHeap->pCurMediaState->pDynamicState;
dynamicState->Curbe.iCurrent -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
kernelParam->curbeOffset = dynamicState->Curbe.iCurrent;
}
else
{
stateHeap->pCurMediaState->iCurbeOffset -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
kernelParam->curbeOffset = stateHeap->pCurMediaState->iCurbeOffset;
}
// update curbe with data.
renderHal->pfnLoadCurbeData(renderHal,
stateHeap->pCurMediaState,
curbe,
kernelParam->totalCurbeSize);
}
else
{
CM_ASSERT(kernelParam->totalCurbeSize == kernelParam->curbeSizePerThread);
// tell pfnLoadCurbeData the current curbe offset
if (state->dshEnabled)
{
PRENDERHAL_DYNAMIC_STATE dynamicState = stateHeap->pCurMediaState->pDynamicState;
dynamicState->Curbe.iCurrent -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
kernelParam->curbeOffset = dynamicState->Curbe.iCurrent;
}
else
{
stateHeap->pCurMediaState->iCurbeOffset -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
kernelParam->curbeOffset = stateHeap->pCurMediaState->iCurbeOffset;
}
// update curbe with data.
renderHal->pfnLoadCurbeData(renderHal,
stateHeap->pCurMediaState,
data,
kernelParam->totalCurbeSize);
}
if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext() == false) // false means override per Batch.
{
if ((vmeUsed == true) && state->cmHalInterface->IsRequestShutdownSubslicesForVmeUsage())
{
CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetPlatformInfo(state, &platformInfo, true));
CM_POWER_OPTION cmPower;
cmPower.nSlice = 1;
cmPower.nSubSlice = platformInfo.numSubSlices / 2;
cmPower.nEU = (uint16_t)platformInfo.numEUsPerSubSlice;
state->pfnSetPowerOption(state, &cmPower);
}
}
}
#if MDF_CURBE_DATA_DUMP
if (state->dumpCurbeData)
{
HalCm_DumpCurbeData(state);
}
#endif
#if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
if (state->dumpIDData)
{
HalCm_DumpInterfaceDescriptorData(state);
}
#endif
finish:
return eStatus;
}
MOS_STATUS HalCm_SetConditionalEndInfo(
PCM_HAL_STATE state,
PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,
PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS conditionalBBEndParams,
uint32_t index
)
{
if (index >= CM_MAX_CONDITIONAL_END_CMDS)
{
return MOS_STATUS_INVALID_PARAMETER;
}
MOS_ZeroMemory(&conditionalBBEndParams[index], sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
conditionalBBEndParams[index].presSemaphoreBuffer = &(state->bufferTable[conditionalEndInfo[index].bufferTableIndex].osResource);
conditionalBBEndParams[index].dwValue = conditionalEndInfo[index].compareValue;
conditionalBBEndParams[index].bDisableCompareMask = conditionalEndInfo[index].disableCompareMask;
conditionalBBEndParams[index].dwOffset = conditionalEndInfo[index].offset;
return MOS_STATUS_SUCCESS;
}
//===============<Interface Functions>==========================================
//*-----------------------------------------------------------------------------
//| Purpose: Allocate Structures required for HW Rendering
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Allocate(
PCM_HAL_STATE state) // [in] Pointer to CM State
{
MOS_STATUS eStatus;
PCM_HAL_DEVICE_PARAM deviceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_STATE_HEAP_SETTINGS stateHeapSettings;
uint32_t i;
MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable;
RENDERHAL_SETTINGS renderHalSettings;
uint32_t maxTasks;
PMHW_BATCH_BUFFER batchBuffer = nullptr;
//------------------------------------
CM_ASSERT(state);
//------------------------------------
eStatus = MOS_STATUS_UNKNOWN;
deviceParam = &state->cmDeviceParam;
renderHal = state->renderHal;
stateHeapSettings = &renderHal->StateHeapSettings;
stateHeapSettings->iCurbeSize = CM_MAX_CURBE_SIZE_PER_TASK;
stateHeapSettings->iMediaStateHeaps = deviceParam->maxTasks + 1; // + 1 to handle sync issues with current RenderHal impl (we can remove this once we insert sync value in 2nd level BB)
stateHeapSettings->iMediaIDs = deviceParam->maxKernelsPerTask; // Number of Media IDs = Number of Kernels/Task
stateHeapSettings->iKernelCount = deviceParam->maxGshKernelEntries;
stateHeapSettings->iKernelBlockSize = deviceParam->maxKernelBinarySize; // The kernel occupied memory need be this block size aligned 256K for IVB/HSW
stateHeapSettings->iKernelHeapSize = deviceParam->maxGshKernelEntries * CM_32K; // CM_MAX_GSH_KERNEL_ENTRIES * 32*1024;
state->totalKernelSize = (int32_t*)MOS_AllocAndZeroMemory(sizeof(int32_t) * deviceParam->maxGshKernelEntries);
if(!state->totalKernelSize)
{
CM_ASSERTMESSAGE("Could not allocate enough memory for state->totalKernelSize\n");
eStatus = MOS_STATUS_NO_SPACE;
goto finish;
}
stateHeapSettings->iPerThreadScratchSize = deviceParam->maxPerThreadScratchSpaceSize;
stateHeapSettings->iSipSize = CM_MAX_SIP_SIZE;
stateHeapSettings->iBindingTables = deviceParam->maxKernelsPerTask; // Number of Binding tables = Number of Kernels/Task
stateHeapSettings->iSurfacesPerBT = CM_MAX_SURFACE_STATES_PER_BT; // Allocate Max Binding Table indices per binding table
stateHeapSettings->iSurfaceStates = CM_MAX_SURFACE_STATES; // Allocate Max Surfaces that can be indexed
stateHeapSettings->iSamplersAVS = deviceParam->maxAvsSamplers; // Allocate Max AVS samplers
// Initialize RenderHal Interface
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitialize(renderHal, nullptr));
// Initialize Vebox Interface
CM_CHK_MOSSTATUS_GOTOFINISH(state->veboxInterface->CreateHeap());
// Initialize the table only in Static Mode (DSH doesn't use this table at all)
if (!state->dshEnabled)
{
// Init the data in kernel entries for Dynamic GSH
for (int32_t kernelID = 0; kernelID < stateHeapSettings->iKernelCount; ++kernelID)
{
if (kernelID > 0)
{
state->totalKernelSize[kernelID] = 0;
}
else
{
state->totalKernelSize[kernelID] = stateHeapSettings->iKernelHeapSize;
}
}
state->kernelNumInGsh = 1;
}
// Allocate BB (one for each media-state heap)
state->numBatchBuffers = stateHeapSettings->iMediaStateHeaps;
state->batchBuffers = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(
state->numBatchBuffers *
sizeof(MHW_BATCH_BUFFER));
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->batchBuffers);
batchBuffer = state->batchBuffers;
for (i = 0; i < (uint32_t)state->numBatchBuffers; i ++, batchBuffer ++)
{
batchBuffer->dwSyncTag = 0;
batchBuffer->bMatch = false;
batchBuffer->iPrivateType = RENDERHAL_BB_TYPE_CM;
batchBuffer->iPrivateSize = sizeof(CM_HAL_BB_ARGS);
batchBuffer->pPrivateData = (PCM_HAL_BB_ARGS)MOS_AllocAndZeroMemory(sizeof(CM_HAL_BB_ARGS));
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount = 1;
}
// Allocate TimeStamp Buffer
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTsResource(state));
// Allocate tracker resources
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTrackerResource(state));
// Initialize dynamic general state heap
CM_HAL_HEAP_PARAM heapParams;
heapParams.behaviorGSH = HeapManager::Behavior::destructiveExtend;
heapParams.initialSizeGSH = 0x0080000;
heapParams.extendSizeGSH = 0x0080000;
heapParams.trackerProducer = &state->renderHal->trackerProducer;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_InitializeDynamicStateHeaps(state, &heapParams));
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTables(state));
// Allocate Task Param to hold max tasks
state->taskParam = (PCM_HAL_TASK_PARAM)MOS_AllocAndZeroMemory(sizeof(CM_HAL_TASK_PARAM));
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->taskParam);
state->currentTaskEntry = 0;
// Allocate Task TimeStamp to hold time stamps
state->taskTimeStamp = (PCM_HAL_TASK_TIMESTAMP)MOS_AllocAndZeroMemory(sizeof(CM_HAL_TASK_TIMESTAMP));
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->taskTimeStamp);
// Setup Registration table entries
state->surfaceRegTable.count = state->cmDeviceParam.max2DSurfaceTableSize;
state->surfaceRegTable.entries = state->surf2DTable;
maxTasks = state->cmDeviceParam.maxTasks;
// Initialize the task status table
MOS_FillMemory(state->taskStatusTable, (size_t)maxTasks, CM_INVALID_INDEX);
// Init the null render flag
nullHWAccelerationEnable = state->osInterface->pfnGetNullHWRenderFlags(state->osInterface);
state->nullHwRenderCm = nullHWAccelerationEnable.Cm || nullHWAccelerationEnable.VPGobal;
//during initialization stage to allocate sip resource and Get sip binary.
if ((state->midThreadPreemptionDisabled == false)
|| (state->kernelDebugEnabled == true))
{
CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->AllocateSIPCSRResource());
state->pfnGetSipBinary(state);
}
//Init flag for conditional batch buffer
state->cbbEnabled = HalCm_IsCbbEnabled(state);
//Turn Turbo boost on
CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnEnableTurboBoost(state));
state->tsFrequency = Mos_Specific_GetTsFrequency(state->osInterface);
if (state->refactor)
{
state->advExecutor = CmExtensionCreator<CmExecutionAdv>::CreateClass();
}
else
{
state->advExecutor = CmExtensionCreator<CmExecutionAdv>::CreateBaseClass();
}
if (state->advExecutor == nullptr)
{
CM_ASSERTMESSAGE("Could not allocate enough memory for state->advExecutor\n");
eStatus = MOS_STATUS_NO_SPACE;
goto finish;
}
state->advExecutor->Initialize(state);
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
uint16_t HalCm_GetKernelPerfTag(
PCM_HAL_STATE cmState,
PCM_HAL_KERNEL_PARAM *kernelParams,
uint32_t numKernels)
{
using namespace std;
CM_ASSERT(cmState);
CM_ASSERT(kernelParams);
int perfTagKernelNum = numKernels - 1;
if (numKernels > MAX_COMBINE_NUM_IN_PERFTAG)
{
perfTagKernelNum = MAX_COMBINE_NUM_IN_PERFTAG - 1;
}
// get a combined kernel name
uint32_t len = numKernels * CM_MAX_KERNEL_NAME_SIZE_IN_BYTE;
char *combinedName = MOS_NewArray(char, len);
if (combinedName == nullptr)
{ // Not need to abort the process as this is only for pnp profiling
CM_ASSERTMESSAGE("Error: Memory allocation error in getPertTag.");
return 0; // return the default perftag
}
CmSafeMemSet(combinedName, 0, len);
MOS_SecureStrcat(combinedName, len, kernelParams[0]->kernelName);
for (uint32_t i = 1; i < numKernels; i++)
{
MOS_SecureStrcat(combinedName, len, ";");
MOS_SecureStrcat(combinedName, len, kernelParams[i]->kernelName);
}
// get perftag index
int perfTagIndex = 0;
map<string, int>::iterator ite = cmState->perfTagIndexMap[perfTagKernelNum]->find(combinedName);
if (ite == cmState->perfTagIndexMap[perfTagKernelNum]->end())
{
if (cmState->currentPerfTagIndex[perfTagKernelNum] <= MAX_CUSTOMIZED_PERFTAG_INDEX)
{
cmState->perfTagIndexMap[perfTagKernelNum]->insert(pair<string, int>(combinedName, cmState->currentPerfTagIndex[perfTagKernelNum]));
perfTagIndex = cmState->currentPerfTagIndex[perfTagKernelNum] ++;
}
}
else
{
perfTagIndex = ite->second;
}
perfTagIndex = (perfTagIndex &0xFF) | (perfTagKernelNum << 8);
MosSafeDeleteArray(combinedName);
return (uint16_t)perfTagIndex;
}
//*-----------------------------------------------------------------------------
//| Purpose: Executes the CM Task
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_ExecuteTask(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_EXEC_TASK_PARAM execParam) // [in] Pointer to Task Param
{
MOS_STATUS eStatus;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_MEDIA_STATE mediaState;
PMHW_BATCH_BUFFER batchBuffer;
PCM_HAL_BB_ARGS bbCmArgs;
PCM_HAL_KERNEL_PARAM kernelParam;
int32_t taskId;
int32_t remBindingTables;
int32_t bindingTable;
int32_t bti;
int32_t mediaID;
PRENDERHAL_KRN_ALLOCATION krnAllocations[CM_MAX_KERNELS_PER_TASK];
uint32_t vfeCurbeSize;
uint32_t maxInlineDataSize, maxIndirectDataSize;
uint32_t i;
void *cmdBuffer = nullptr;
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
uint32_t btsizePower2;
PMOS_INTERFACE osInterface = nullptr;
//-----------------------------------
CM_ASSERT(state);
CM_ASSERT(execParam);
//-----------------------------------
eStatus = MOS_STATUS_SUCCESS;
renderHal = state->renderHal;
mediaState = nullptr;
batchBuffer = nullptr;
if (execParam->numKernels > state->cmDeviceParam.maxKernelsPerTask)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Number of Kernels per task exceeds maximum");
goto finish;
}
state->osInterface->pfnSetGpuContext(state->osInterface, (MOS_GPU_CONTEXT)execParam->queueOption.GPUContext);
// Reset states before execute
// (clear allocations, get GSH allocation index + any additional housekeeping)
state->osInterface->pfnResetOsStates(state->osInterface);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
MOS_FillMemory(
state->bti2DIndexTable,
state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->bti2DUPIndexTable,
state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->bti3DIndexTable,
state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->btiBufferIndexTable,
state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->samplerIndexTable,
state->cmDeviceParam.maxSamplerTableSize,
CM_INVALID_INDEX);
MOS_FillMemory(
state->sampler8x8IndexTable,
state->cmDeviceParam.maxSampler8x8TableSize,
CM_INVALID_INDEX);
state->walkerParams.CmWalkerEnable = 0;
vfeCurbeSize = 0;
maxInlineDataSize = 0;
maxIndirectDataSize = 0;
// Get the Task Id
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
// Parse the task
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseTask(state, execParam));
// Reset the SSH configuration according to the property of the task
renderHal->pStateHeap->iBindingTableSize = MOS_ALIGN_CEIL(taskParam->surfacePerBT * // Reconfigure the binding table size
renderHal->pRenderHalPltInterface->GetBTStateCmdSize(), renderHal->StateHeapSettings.iBTAlignment);
taskParam->surfacePerBT = renderHal->pStateHeap->iBindingTableSize/renderHal->pRenderHalPltInterface->GetBTStateCmdSize();
renderHal->StateHeapSettings.iBindingTables = renderHal->StateHeapSettings.iBindingTables * // Reconfigure the binding table number
renderHal->StateHeapSettings.iSurfacesPerBT / taskParam->surfacePerBT;
renderHal->StateHeapSettings.iSurfacesPerBT = taskParam->surfacePerBT; // Reconfigure the surface per BT
if (execParam->numKernels > (uint32_t)renderHal->StateHeapSettings.iBindingTables)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Number of Kernels per task exceeds the number can be hold by binding table");
goto finish;
}
if (execParam->kernelDebugEnabled && Mos_ResourceIsNull(&state->sipResource.osResource))
{
HalCm_AllocateSipResource( state); // create sip resource if it does not exist
}
// Assign a MediaState from the MediaStateHeap
// !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
// since this method syncs the batch buffer and media state.
if (state->dshEnabled)
{
if ( execParam->userDefinedMediaState != nullptr )
{
// use exsiting media state as current state
mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execParam->userDefinedMediaState );
// update current state to dsh
renderHal->pStateHeap->pCurMediaState = mediaState;
// Refresh sync tag for all media states in submitted queue
state->criticalSectionDSH->Acquire();
renderHal->pfnRefreshSync( renderHal );
state->criticalSectionDSH->Release();
}
else
{
// Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
state->criticalSectionDSH->Acquire();
HalCm_DSH_GetDynamicStateConfiguration( state, &params, execParam->numKernels, execParam->kernels, execParam->kernelCurbeOffset );
// Prepare Media States to accommodate all parameters - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs
mediaState = renderHal->pfnAssignDynamicState( renderHal, &params, RENDERHAL_COMPONENT_CM );
state->criticalSectionDSH->Release();
}
}
else
{
mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
}
CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
// Assign/Reset SSH instance
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
// Dynamic Batch Buffer allocation
if (!state->walkerParams.CmWalkerEnable)
{
// Get the Batch buffer
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBatchBuffer(state, execParam->numKernels, execParam->kernels, &batchBuffer));
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
bbCmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
// Lock the batch buffer
if ( (bbCmArgs->refCount == 1) ||
(state->taskParam->reuseBBUpdateMask == 1) )
{
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnLockBB(renderHal, batchBuffer));
}
}
if (state->useNewSamplerHeap == false)
{
HalCm_AcquireSamplerStatistics(state);
}
// Load all kernels in the same state heap - expand ISH if necessary BEFORE programming media states.
// This is better than having to expand ISH in the middle of loading, when part of MediaIDs are
// already programmed - not a problem in the old implementation where it would simply remove old
// kernels out of the way.
if (state->dshEnabled)
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execParam->kernels, execParam->numKernels, krnAllocations));
}
for (i = 0; i < execParam->numKernels; i++)
{
CM_HAL_INDEX_PARAM indexParam;
MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
kernelParam = execParam->kernels[i];
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, batchBuffer, taskId, kernelParam, &indexParam,
execParam->kernelCurbeOffset[i], bti, mediaID, krnAllocations[i]));
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernel(state, mediaState, batchBuffer, taskId, kernelParam, i, &indexParam,
bti, mediaID, krnAllocations[i]));
vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
if (kernelParam->payloadSize > maxInlineDataSize)
{
maxInlineDataSize = kernelParam->payloadSize;
}
if (kernelParam->indirectDataParam.indirectDataSize > maxIndirectDataSize)
{
maxIndirectDataSize = kernelParam->indirectDataParam.indirectDataSize;
}
if (execParam->conditionalEndBitmap & (uint64_t)1 << i)
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetConditionalEndInfo(state, taskParam->conditionalEndInfo, taskParam->conditionalBBEndParams, i));
}
}
// Store the Max Payload Sizes in the Task params
state->taskParam->vfeCurbeSize = vfeCurbeSize;
if (maxIndirectDataSize)
{
state->taskParam->urbEntrySize = maxIndirectDataSize;
}
else
{
state->taskParam->urbEntrySize = maxInlineDataSize;
}
// We may have to send additional Binding table commands in command buffer.
// This is needed because the surface offset (from the base on SSH)
// calculation takes into account the max binding tables allocated in the
// SSH.
remBindingTables = renderHal->StateHeapSettings.iBindingTables - execParam->numKernels;
if (remBindingTables > 0)
{
for (i = 0; i < (uint32_t)remBindingTables; i++)
{
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
renderHal,
&bindingTable));
}
}
// until now, we know binding table index for debug surface
// let's get system thread
osInterface = state->osInterface;
osInterface->pfnResetPerfBufferID(osInterface);
if (osInterface->pfnIsPerfTagSet(osInterface) == false)
{
osInterface->pfnIncPerfFrameID(osInterface);
uint16_t perfTag = HalCm_GetKernelPerfTag(state, execParam->kernels, execParam->numKernels);
osInterface->pfnSetPerfTag(osInterface, perfTag);
}
#if (_RELEASE_INTERNAL || _DEBUG)
#if defined(CM_DIRECT_GUC_SUPPORT)
// Update the task ID table
state->taskStatusTable[taskId] = (char)taskId;
//for GuC direct submission, need to send out dummy command buffer to make sure PDP table got binded
CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitDummyCommands(
batchBuffer, taskId, execParam->kernels, &cmdBuffer));
/* make sure Dummy submission is done */
CM_HAL_QUERY_TASK_PARAM queryParam;
queryParam.taskId = taskId;
queryParam.status = CM_TASK_IN_PROGRESS;
do {
state->pfnQueryTask(state, &queryParam);
} while (queryParam.status != CM_TASK_FINISHED);
#endif
#endif
// Submit HW commands and states
CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
batchBuffer, taskId, execParam->kernels, &cmdBuffer));
// Set the Task ID
execParam->taskIdOut = taskId;
// Set OS data
if(cmdBuffer)
{
execParam->osData = cmdBuffer;
}
// Update the task ID table
state->taskStatusTable[taskId] = (char)taskId;
finish:
if (state->dshEnabled)
{
state->criticalSectionDSH->Acquire();
if (mediaState && eStatus != MOS_STATUS_SUCCESS)
{
// Failed, release media state and heap resources
renderHal->pfnReleaseDynamicState(renderHal, mediaState);
}
else
{
renderHal->pfnSubmitDynamicState(renderHal, mediaState);
}
state->criticalSectionDSH->Release();
}
if (batchBuffer) // for Media Walker, batchBuffer is empty
{
if (batchBuffer->bLocked)
{
// Only happens in Error cases
CM_CHK_NULL_RETURN_MOSERROR(batchBuffer->pPrivateData);
if (((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount == 1)
{
renderHal->pfnUnlockBB(renderHal, batchBuffer);
}
}
}
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Executes the CM Group Task
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_ExecuteGroupTask(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam) // [in] Pointer to Task Param
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PRENDERHAL_INTERFACE renderHal = state->renderHal;
CM_HAL_INDEX_PARAM indexParam;
int32_t taskId;
uint32_t remBindingTables;
int32_t bindingTable;
int32_t bti;
int32_t mediaID;
PRENDERHAL_MEDIA_STATE mediaState = nullptr;
uint32_t i;
void *cmdBuffer = nullptr;
PCM_HAL_KERNEL_PARAM kernelParam = nullptr;
PCM_HAL_TASK_PARAM taskParam = state->taskParam;
uint32_t btsizePower2;
uint32_t vfeCurbeSize = 0;
PRENDERHAL_KRN_ALLOCATION krnAllocations[CM_MAX_KERNELS_PER_TASK];
PMOS_INTERFACE osInterface = nullptr;
//-----------------------------------
CM_ASSERT(state);
CM_ASSERT(execGroupParam);
//-----------------------------------
state->osInterface->pfnSetGpuContext(state->osInterface, (MOS_GPU_CONTEXT)execGroupParam->queueOption.GPUContext);
MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
MOS_FillMemory(
state->bti2DIndexTable,
state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->bti2DUPIndexTable,
state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->bti3DIndexTable,
state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->btiBufferIndexTable,
state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->samplerIndexTable,
state->cmDeviceParam.maxSamplerTableSize,
CM_INVALID_INDEX);
MOS_FillMemory(
state->sampler8x8IndexTable,
state->cmDeviceParam.maxSampler8x8TableSize,
CM_INVALID_INDEX);
// Reset states before execute
// (clear allocations, get GSH allocation index + any additional housekeeping)
state->osInterface->pfnResetOsStates(state->osInterface);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
state->walkerParams.CmWalkerEnable = 0;
state->taskParam->blGpGpuWalkerEnabled = true;
// Get the Task Id
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
// Parse the task
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseGroupTask(state, execGroupParam));
// Reset the SSH configuration according to the property of the task
renderHal->pStateHeap->iBindingTableSize = MOS_ALIGN_CEIL(taskParam->surfacePerBT * // Reconfigure the binding table size
renderHal->pRenderHalPltInterface->GetBTStateCmdSize(),
renderHal->StateHeapSettings.iBTAlignment);
taskParam->surfacePerBT = renderHal->pStateHeap->iBindingTableSize / renderHal->pRenderHalPltInterface->GetBTStateCmdSize();
renderHal->StateHeapSettings.iBindingTables = renderHal->StateHeapSettings.iBindingTables * // Reconfigure the binding table number
renderHal->StateHeapSettings.iSurfacesPerBT / taskParam->surfacePerBT;
renderHal->StateHeapSettings.iSurfacesPerBT = taskParam->surfacePerBT; // Reconfigure the surface per BT
if (execGroupParam->numKernels > (uint32_t)renderHal->StateHeapSettings.iBindingTables)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Number of Kernels per task exceeds the number can be hold by binding table");
goto finish;
}
if (execGroupParam->kernelDebugEnabled && Mos_ResourceIsNull(&state->sipResource.osResource))
{
HalCm_AllocateSipResource( state); // create sip resource if it does not exist
}
// Assign a MediaState from the MediaStateHeap
// !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
// since this method syncs the batch buffer and media state.
if (state->dshEnabled)
{
if ( execGroupParam->userDefinedMediaState != nullptr )
{
// Preload all kernels
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_DSH_LoadKernelArray( state, execGroupParam->kernels, execGroupParam->numKernels, krnAllocations ) );
// use exsiting media state as current state
mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execGroupParam->userDefinedMediaState );
// update current state to dsh
renderHal->pStateHeap->pCurMediaState = mediaState;
state->criticalSectionDSH->Acquire();
// Refresh sync tag for all media states in submitted queue
renderHal->pfnRefreshSync( renderHal );
state->criticalSectionDSH->Release();
}
else
{
// Preload all kernels
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execGroupParam->kernels, execGroupParam->numKernels, krnAllocations));
// Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
state->criticalSectionDSH->Acquire();
HalCm_DSH_GetDynamicStateConfiguration(state, &params, execGroupParam->numKernels, execGroupParam->kernels, execGroupParam->kernelCurbeOffset);
// Prepare Media States to accommodate all parameters
mediaState = renderHal->pfnAssignDynamicState(renderHal, &params, RENDERHAL_COMPONENT_CM);
state->criticalSectionDSH->Release();
}
}
else
{
// Assign a MediaState from the MediaStateHeap
// !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
// since this method syncs the batch buffer and media state.
mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
}
CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
// Assign/Reset SSH instance
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
if (state->useNewSamplerHeap == false)
{
HalCm_AcquireSamplerStatistics(state);
}
for (i = 0; i < execGroupParam->numKernels; i++)
{
CM_HAL_INDEX_PARAM indexParam;
MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
kernelParam = execGroupParam->kernels[i];
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, nullptr, taskId, kernelParam, &indexParam,
execGroupParam->kernelCurbeOffset[i], bti, mediaID, krnAllocations[i]));
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernel(state, mediaState, nullptr, taskId, kernelParam, i, &indexParam,
bti, mediaID, krnAllocations[i]));
vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
if (execGroupParam->conditionalEndBitmap & (uint64_t)1 << i)
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetConditionalEndInfo(state, taskParam->conditionalEndInfo, taskParam->conditionalBBEndParams, i));
}
}
// Store the Max Payload Sizes in the Task params
state->taskParam->vfeCurbeSize = vfeCurbeSize;
state->taskParam->urbEntrySize = 0;
// We may have to send additional Binding table commands in command buffer.
// This is needed because the surface offset (from the base on SSH)
// calculation takes into account the max binding tables allocated in the
// SSH.
remBindingTables = renderHal->StateHeapSettings.iBindingTables - execGroupParam->numKernels;
if (remBindingTables > 0)
{
for (i = 0; i < remBindingTables; i++)
{
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
renderHal,
&bindingTable));
}
}
// until now, we know binding table index for debug surface
// let's get system thread
if (execGroupParam->kernelDebugEnabled)
{
CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetSipBinary(state));
}
osInterface = state->osInterface;
osInterface->pfnResetPerfBufferID(osInterface);
if (osInterface->pfnIsPerfTagSet(osInterface) == false)
{
osInterface->pfnIncPerfFrameID(osInterface);
int perfTag = HalCm_GetKernelPerfTag(state, execGroupParam->kernels, execGroupParam->numKernels);
osInterface->pfnSetPerfTag(osInterface, (uint16_t)perfTag);
}
// Submit HW commands and states
CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
nullptr, taskId, execGroupParam->kernels, &cmdBuffer));
// Set the Task ID
execGroupParam->taskIdOut = taskId;
// Set OS data
if(cmdBuffer)
{
execGroupParam->osData = cmdBuffer;
}
// Update the task ID table
state->taskStatusTable[taskId] = (char)taskId;
finish:
if (state->dshEnabled)
{
state->criticalSectionDSH->Acquire();
if (mediaState && eStatus != MOS_STATUS_SUCCESS)
{
// Failed, release media state and heap resources
renderHal->pfnReleaseDynamicState(renderHal, mediaState);
}
else
{
renderHal->pfnSubmitDynamicState(renderHal, mediaState);
}
state->criticalSectionDSH->Release();
}
return eStatus;
}
MOS_STATUS HalCm_ExecuteHintsTask(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam) // [in] Pointer to Task Param
{
MOS_STATUS eStatus;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_MEDIA_STATE mediaState;
PMHW_BATCH_BUFFER batchBuffer;
PCM_HAL_BB_ARGS bbCmArgs;
PCM_HAL_KERNEL_PARAM kernelParam;
uint32_t i;
uint32_t numTasks;
uint64_t origKernelIds[CM_MAX_KERNELS_PER_TASK];
int32_t taskId;
int32_t remBindingTables;
int32_t bindingTable;
uint32_t vfeCurbeSize;
uint32_t maxInlineDataSize;
uint32_t maxIndirectDataSize;
int32_t *bindingTableEntries;
int32_t *mediaIds;
PRENDERHAL_KRN_ALLOCATION *krnAllocations;
PCM_HAL_INDEX_PARAM indexParams;
bool useMediaObjects;
void *cmdBuffer;
bool splitTask;
bool lastTask;
PMOS_INTERFACE osInterface = nullptr;
//------------------------------------
CM_ASSERT(state);
CM_ASSERT(execHintsParam);
//------------------------------------
eStatus = MOS_STATUS_SUCCESS;
renderHal = state->renderHal;
mediaState = nullptr;
batchBuffer = nullptr;
bindingTableEntries = nullptr;
mediaIds = nullptr;
krnAllocations = nullptr;
indexParams = nullptr;
useMediaObjects = false;
cmdBuffer = nullptr;
splitTask = false;
lastTask = false;
if (execHintsParam->numKernels > state->cmDeviceParam.maxKernelsPerTask)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Number of Kernels per task exceeds maximum");
goto finish;
}
state->osInterface->pfnSetGpuContext(state->osInterface, (MOS_GPU_CONTEXT)execHintsParam->queueOption.GPUContext);
bindingTableEntries = (int*)MOS_AllocAndZeroMemory(sizeof(int)*execHintsParam->numKernels);
mediaIds = (int*)MOS_AllocAndZeroMemory(sizeof(int)* execHintsParam->numKernels);
krnAllocations = (PRENDERHAL_KRN_ALLOCATION *)MOS_AllocAndZeroMemory(sizeof(void *)* execHintsParam->numKernels);
indexParams = (PCM_HAL_INDEX_PARAM)MOS_AllocAndZeroMemory(sizeof(CM_HAL_INDEX_PARAM)* execHintsParam->numKernels);
if (!bindingTableEntries || !mediaIds || !krnAllocations || !indexParams)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Memory allocation failed in ExecuteHints Task");
goto finish;
}
// check hints to see if need to split into multiple tasks
numTasks = ( execHintsParam->hints & CM_HINTS_MASK_NUM_TASKS ) >> CM_HINTS_NUM_BITS_TASK_POS;
if( numTasks > 1 )
{
splitTask = true;
}
MOS_FillMemory(bindingTableEntries, sizeof(int) * execHintsParam->numKernels, CM_INVALID_INDEX);
MOS_FillMemory(mediaIds, sizeof(int) * execHintsParam->numKernels, CM_INVALID_INDEX);
MOS_FillMemory(krnAllocations, sizeof(void *)* execHintsParam->numKernels, 0);
// Reset states before execute
// (clear allocations, get GSH allocation index + any additional housekeeping)
state->osInterface->pfnResetOsStates(state->osInterface);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
MOS_FillMemory(
state->bti2DIndexTable,
state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->bti2DUPIndexTable,
state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->bti3DIndexTable,
state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->btiBufferIndexTable,
state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
CM_INVALID_INDEX );
MOS_FillMemory(
state->samplerIndexTable,
state->cmDeviceParam.maxSamplerTableSize,
CM_INVALID_INDEX);
MOS_FillMemory(
state->sampler8x8IndexTable,
state->cmDeviceParam.maxSampler8x8TableSize,
CM_INVALID_INDEX);
state->walkerParams.CmWalkerEnable = 0;
vfeCurbeSize = 0;
maxInlineDataSize = 0;
maxIndirectDataSize = 0;
MOS_ZeroMemory(&origKernelIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
// Get the Task Id
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
// Parse the task
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseHintsTask(state, execHintsParam));
// Assign a MediaState from the MediaStateHeap
// !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
// since this method syncs the batch buffer and media state.
if (state->dshEnabled)
{
if ( execHintsParam->userDefinedMediaState != nullptr )
{
// use exsiting media state as current state
mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execHintsParam->userDefinedMediaState );
// update current state to dsh
renderHal->pStateHeap->pCurMediaState = mediaState;
// Refresh sync tag for all media states in submitted queue
state->criticalSectionDSH->Acquire();
renderHal->pfnRefreshSync( renderHal );
state->criticalSectionDSH->Release();
}
else
{
// Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
state->criticalSectionDSH->Acquire();
HalCm_DSH_GetDynamicStateConfiguration(state, &params, execHintsParam->numKernels, execHintsParam->kernels, execHintsParam->kernelCurbeOffset);
// Prepare Media States to accommodate all parameters - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs
mediaState = renderHal->pfnAssignDynamicState(renderHal, &params, RENDERHAL_COMPONENT_CM);
state->criticalSectionDSH->Release();
}
}
else
{
mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
}
CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
if (state->useNewSamplerHeap == false)
{
HalCm_AcquireSamplerStatistics(state);
}
// Assign/Reset SSH instance
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
if (!state->walkerParams.CmWalkerEnable)
{
if( splitTask )
{
// save original kernel IDs for kernel binary re-use in GSH
for( i = 0; i < execHintsParam->numKernels; ++i )
{
origKernelIds[i] = execHintsParam->kernels[i]->kernelId;
}
// need to add tag to kernel IDs to distinguish batch buffer
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AddKernelIDTag(execHintsParam->kernels, execHintsParam->numKernels, numTasks, execHintsParam->numTasksGenerated));
}
// Get the Batch buffer
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBatchBuffer(state, execHintsParam->numKernels, execHintsParam->kernels, &batchBuffer));
if( splitTask )
{
// restore kernel IDs for kernel binary re-use in GSH
for( i = 0; i < execHintsParam->numKernels; ++i )
{
execHintsParam->kernels[i]->kernelId = origKernelIds[i];
}
}
// Lock the batch buffer
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
bbCmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
if ( (bbCmArgs->refCount == 1) ||
( state->taskParam->reuseBBUpdateMask == 1) )
{
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnLockBB(renderHal, batchBuffer));
}
}
// Load all kernels in the same state heap - expand ISH if necessary BEFORE programming media states.
// This is better than having to expand ISH in the middle of loading, when part of MediaIDs are
// already programmed - not a problem in the old implementation where it would simply remove old
// kernels out of the way.
if (state->dshEnabled)
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execHintsParam->kernels, execHintsParam->numKernels, krnAllocations));
}
// 0: media walker
// 1: media object
if( (execHintsParam->hints & CM_HINTS_MASK_MEDIAOBJECT) == CM_HINTS_MASK_MEDIAOBJECT )
{
for (i = 0; i < execHintsParam->numKernels; ++i)
{
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, batchBuffer, taskId, execHintsParam->kernels[i], &indexParams[i],
execHintsParam->kernelCurbeOffset[i], bindingTableEntries[i], mediaIds[i], krnAllocations[i]));
}
CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernelMix(state, batchBuffer, taskId, execHintsParam->kernels,
indexParams, bindingTableEntries, mediaIds, krnAllocations, execHintsParam->numKernels, execHintsParam->hints, execHintsParam->isLastTask));
for( i = 0; i < execHintsParam->numKernels; ++i)
{
kernelParam = execHintsParam->kernels[i];
vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
if( kernelParam->payloadSize > maxInlineDataSize)
{
maxInlineDataSize = kernelParam->payloadSize;
}
if( kernelParam->indirectDataParam.indirectDataSize > maxIndirectDataSize )
{
maxIndirectDataSize = kernelParam->indirectDataParam.indirectDataSize;
}
}
// Store the Max Payload Sizes in the Task Param
state->taskParam->vfeCurbeSize = vfeCurbeSize;
if( maxIndirectDataSize)
{
state->taskParam->vfeCurbeSize = maxIndirectDataSize;
}
else
{
state->taskParam->urbEntrySize = maxInlineDataSize;
}
// We may have to send additional Binding table commands in command buffer.
// This is needed because the surface offset (from the base on SSH)
// calculation takes into account the max binding tables allocated in the
// SSH.
remBindingTables = state->cmDeviceParam.maxKernelsPerTask -
execHintsParam->numKernels;
if( remBindingTables > 0)
{
for( i = 0; i < (uint32_t)remBindingTables; ++i)
{
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
renderHal,
&bindingTable));
}
}
osInterface = state->osInterface;
osInterface->pfnResetPerfBufferID(osInterface);
if (osInterface->pfnIsPerfTagSet(osInterface) == false)
{
osInterface->pfnIncPerfFrameID(osInterface);
int perfTag = HalCm_GetKernelPerfTag(state, execHintsParam->kernels, execHintsParam->numKernels);
osInterface->pfnSetPerfTag(osInterface, (uint16_t)perfTag);
}
// Submit HW commands and states
CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
batchBuffer, taskId, execHintsParam->kernels, &cmdBuffer));
// Set the Task ID
execHintsParam->taskIdOut = taskId;
// Set OS data
if( cmdBuffer )
{
execHintsParam->osData = cmdBuffer;
}
// Update the task ID table
state->taskStatusTable[taskId] = (char)taskId;
}
else
{
// use media walker
// unimplemented for now
CM_ASSERTMESSAGE("Error: Media walker is not supported.");
eStatus = MOS_STATUS_UNKNOWN;
}
finish:
if (state->dshEnabled)
{
state->criticalSectionDSH->Acquire();
if (mediaState && eStatus != MOS_STATUS_SUCCESS)
{
// Failed, release media state and heap resources
renderHal->pfnReleaseDynamicState(renderHal, mediaState);
}
else
{
renderHal->pfnSubmitDynamicState(renderHal, mediaState);
}
state->criticalSectionDSH->Release();
}
if (batchBuffer) // for MediaWalker, batchBuffer is empty
{
if (batchBuffer->bLocked)
{
// Only happens in Error cases
CM_CHK_NULL_RETURN_MOSERROR(batchBuffer->pPrivateData);
if (((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount == 1)
{
renderHal->pfnUnlockBB(renderHal, batchBuffer);
}
}
}
// free memory
if( bindingTableEntries ) MOS_FreeMemory(bindingTableEntries);
if( mediaIds ) MOS_FreeMemory(mediaIds);
if( krnAllocations ) MOS_FreeMemory(krnAllocations);
if( indexParams ) MOS_FreeMemory( indexParams );
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Send Commands to HW
//| Returns: Get the HAL Max values
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetMaxValues(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_MAX_VALUES maxValues) // [out] Pointer to Max values
{
PRENDERHAL_INTERFACE renderHal;
renderHal = state->renderHal;
maxValues->maxTasks = state->cmDeviceParam.maxTasks;
maxValues->maxKernelsPerTask = CM_MAX_KERNELS_PER_TASK;
maxValues->maxKernelBinarySize = state->cmDeviceParam.maxKernelBinarySize;
maxValues->maxSpillSizePerHwThread = state->cmDeviceParam.maxPerThreadScratchSpaceSize;
maxValues->maxSamplerTableSize = CM_MAX_SAMPLER_TABLE_SIZE;
maxValues->maxBufferTableSize = CM_MAX_BUFFER_SURFACE_TABLE_SIZE;
maxValues->max2DSurfaceTableSize = CM_MAX_2D_SURFACE_TABLE_SIZE;
maxValues->max3DSurfaceTableSize = CM_MAX_3D_SURFACE_TABLE_SIZE;
maxValues->maxArgsPerKernel = CM_MAX_ARGS_PER_KERNEL;
maxValues->maxUserThreadsPerTask = CM_MAX_USER_THREADS;
maxValues->maxUserThreadsPerTaskNoThreadArg = CM_MAX_USER_THREADS_NO_THREADARG;
maxValues->maxArgByteSizePerKernel = CM_MAX_ARG_BYTE_PER_KERNEL;
maxValues->maxSurfacesPerKernel = renderHal->pHwCaps->dwMaxBTIndex;
maxValues->maxSamplersPerKernel = renderHal->pHwCaps->dwMaxUnormSamplers;
maxValues->maxHwThreads = renderHal->pHwCaps->dwMaxThreads;
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
//| Purpose: Get the HAL Max extended values
//| Returns: Get the HAL Max extended values
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetMaxValuesEx(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_MAX_VALUES_EX maxValuesEx) // [out] Pointer to extended Max values
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
maxValuesEx->max2DUPSurfaceTableSize = CM_MAX_2D_SURFACE_UP_TABLE_SIZE;
maxValuesEx->maxSampler8x8TableSize = CM_MAX_SAMPLER_8X8_TABLE_SIZE;
maxValuesEx->maxCURBESizePerKernel = CM_MAX_CURBE_SIZE_PER_KERNEL;
maxValuesEx->maxCURBESizePerTask = CM_MAX_CURBE_SIZE_PER_TASK;
maxValuesEx->maxIndirectDataSizePerKernel = CM_MAX_INDIRECT_DATA_SIZE_PER_KERNEL;
//MaxThreadWidth x MaxThreadHeight x ColorCount
maxValuesEx->maxUserThreadsPerMediaWalker = \
state->cmHalInterface->GetMediaWalkerMaxThreadWidth()* \
state->cmHalInterface->GetMediaWalkerMaxThreadHeight() * \
CM_THREADSPACE_MAX_COLOR_COUNT;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetMaxThreadCountPerThreadGroup( state, &maxValuesEx->maxUserThreadsPerThreadGroup ) );
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Register Sampler
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_RegisterSampler(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_SAMPLER_PARAM param) // [in] Pointer to Sampler Param
{
MOS_STATUS eStatus;
PMHW_SAMPLER_STATE_PARAM entry;
uint32_t i;
eStatus = MOS_STATUS_SUCCESS;
entry = nullptr;
// Find a free slot
for (i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
{
if (!state->samplerTable[i].bInUse)
{
entry = &state->samplerTable[i];
param->handle = (uint32_t)i;
break;
}
}
if (!entry)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Sampler table is full");
goto finish;
}
entry->SamplerType = MHW_SAMPLER_TYPE_3D;
if (state->useNewSamplerHeap == true)
{
entry->ElementType = MHW_Sampler1Element;
}
else
{
entry->ElementType = MHW_Sampler4Elements;
}
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetGfxMapFilter(param->minFilter, &entry->Unorm.MinFilter));
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetGfxMapFilter(param->magFilter, &entry->Unorm.MagFilter));
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetGfxTextAddress(param->addressU, &entry->Unorm.AddressU));
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetGfxTextAddress(param->addressV, &entry->Unorm.AddressV));
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetGfxTextAddress(param->addressW, &entry->Unorm.AddressW));
entry->Unorm.SurfaceFormat = (MHW_SAMPLER_SURFACE_PIXEL_TYPE)param->surfaceFormat;
switch (entry->Unorm.SurfaceFormat)
{
case MHW_SAMPLER_SURFACE_PIXEL_UINT:
entry->Unorm.BorderColorRedU = param->borderColorRedU;
entry->Unorm.BorderColorGreenU = param->borderColorGreenU;
entry->Unorm.BorderColorBlueU = param->borderColorBlueU;
entry->Unorm.BorderColorAlphaU = param->borderColorAlphaU;
break;
case MHW_SAMPLER_SURFACE_PIXEL_SINT:
entry->Unorm.BorderColorRedS = param->borderColorRedS;
entry->Unorm.BorderColorGreenS = param->borderColorGreenS;
entry->Unorm.BorderColorBlueS = param->borderColorBlueS;
entry->Unorm.BorderColorAlphaS = param->borderColorAlphaS;
break;
default:
entry->Unorm.BorderColorRedF = param->borderColorRedF;
entry->Unorm.BorderColorGreenF = param->borderColorGreenF;
entry->Unorm.BorderColorBlueF = param->borderColorBlueF;
entry->Unorm.BorderColorAlphaF = param->borderColorAlphaF;
}
entry->Unorm.bBorderColorIsValid = true;
entry->bInUse = true;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: UnRegister Sampler
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_UnRegisterSampler(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle) // [in] Pointer to Sampler Param
{
MOS_STATUS eStatus;
PMHW_SAMPLER_STATE_PARAM entry;
eStatus = MOS_STATUS_SUCCESS;
if (handle >= state->cmDeviceParam.maxSamplerTableSize)
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
goto finish;
}
entry = &state->samplerTable[handle];
// need to clear the state entirely instead of just setting bInUse to false
MOS_ZeroMemory(entry, sizeof(MHW_SAMPLER_STATE_PARAM));
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Register Sampler8x8
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_RegisterSampler8x8(
PCM_HAL_STATE state,
PCM_HAL_SAMPLER_8X8_PARAM param)
{
return state->cmHalInterface->RegisterSampler8x8(param);
}
//*-----------------------------------------------------------------------------
//| Purpose: UnRegister Sampler
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_UnRegisterSampler8x8(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle) // [in] Pointer to Sampler8x8 Param
{
MOS_STATUS eStatus;
uint32_t index8x8;
PMHW_SAMPLER_STATE_PARAM entry;
PCM_HAL_SAMPLER_8X8_ENTRY sampler8x8Entry;
eStatus = MOS_STATUS_SUCCESS;
if (handle >= state->cmDeviceParam.maxSamplerTableSize) {
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
goto finish;
}
entry = &state->samplerTable[handle];
entry->bInUse = false;
if ( entry->SamplerType == MHW_SAMPLER_TYPE_AVS )
{
index8x8 = entry->Avs.stateID;
if ( index8x8 >= state->cmDeviceParam.maxSampler8x8TableSize )
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE( "Invalid 8x8 handle '%d'", handle );
goto finish;
}
sampler8x8Entry = &state->sampler8x8Table[ index8x8 ];
sampler8x8Entry->inUse = false;
}
// need to clear the state entirely instead of just setting bInUse to false
MOS_ZeroMemory(entry, sizeof(MHW_SAMPLER_STATE_PARAM));
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Frees the buffer and removes from the table
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_FreeBuffer(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle) // [in] Pointer to Buffer Param
{
MOS_STATUS eStatus;
PCM_HAL_BUFFER_ENTRY entry;
PMOS_INTERFACE osInterface;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
// Get the Buffer Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, handle, &entry));
if (state->advExecutor)
{
state->advExecutor->DeleteBufferStateMgr(entry->surfStateMgr);
}
if (entry->isAllocatedbyCmrtUmd)
{
osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, SURFACE_FLAG_ASSUME_NOT_IN_USE);
}
else
{
HalCm_OsResource_Unreference(&entry->osResource);
}
osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
entry->size = 0;
entry->address = nullptr;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Set surface read flag used in on demand sync
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetSurfaceReadFlag(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle, // [in] index of surface 2d
bool readSync,
MOS_GPU_CONTEXT gpuContext)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PCM_HAL_SURFACE2D_ENTRY entry;
// Get the Buffer Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, handle, &entry));
if (HalCm_IsValidGpuContext(gpuContext))
{
entry->readSyncs[gpuContext] = readSync;
state->advExecutor->Set2DRenderTarget(entry->surfStateMgr, !readSync);
}
else
{
return MOS_STATUS_UNKNOWN;
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Read the data from buffer and return
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_LockBuffer(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_BUFFER_PARAM param) // [in] Pointer to Buffer Param
{
MOS_STATUS eStatus;
PCM_HAL_BUFFER_ENTRY entry;
PMOS_INTERFACE osInterface;
MOS_LOCK_PARAMS lockFlags;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, param->handle, &entry));
if ((param->lockFlag != CM_HAL_LOCKFLAG_READONLY) && (param->lockFlag != CM_HAL_LOCKFLAG_WRITEONLY) )
{
eStatus = MOS_STATUS_INVALID_HANDLE;
CM_ASSERTMESSAGE("Invalid lock flag!");
eStatus = MOS_STATUS_UNKNOWN;
goto finish;
}
// Lock the resource
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
if (param->lockFlag == CM_HAL_LOCKFLAG_READONLY)
{
lockFlags.ReadOnly = true;
}
else
{
lockFlags.WriteOnly = true;
}
lockFlags.ForceCached = true;
param->data = osInterface->pfnLockResource(
osInterface,
&entry->osResource,
&lockFlags);
CM_CHK_NULL_GOTOFINISH_MOSERROR(param->data);
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Writes the data to buffer
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_UnlockBuffer(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_BUFFER_PARAM param) // [in] Pointer to Buffer Param
{
MOS_STATUS eStatus;
PCM_HAL_BUFFER_ENTRY entry;
PMOS_INTERFACE osInterface;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, param->handle, &entry));
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnUnlockResource(osInterface, &entry->osResource));
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Frees the buffer and removes from the table
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_FreeSurface2DUP(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle) // [in] Pointer to Buffer Param
{
MOS_STATUS eStatus;
PCM_HAL_SURFACE2D_UP_ENTRY entry;
PMOS_INTERFACE osInterface;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
// Get the Buffer Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetResourceUPEntry(state, handle, &entry));
if (state->advExecutor)
{
state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
}
osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, SURFACE_FLAG_ASSUME_NOT_IN_USE);
osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
entry->width = 0;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Get 2D surface pitch and physical size
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetSurface2DTileYPitch(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_SURFACE2D_PARAM param) // [in] Pointer to Buffer Param
{
MOS_STATUS eStatus;
MOS_SURFACE surface;
PRENDERHAL_INTERFACE renderHal;
uint32_t index;
RENDERHAL_GET_SURFACE_INFO info;
//-----------------------------------------------
CM_ASSERT(state);
//-----------------------------------------------
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
index = param->handle;
// Get Details of 2D surface and fill the surface
MOS_ZeroMemory(&surface, sizeof(surface));
surface.OsResource = state->umdSurf2DTable[index].osResource;
surface.dwWidth = state->umdSurf2DTable[index].width;
surface.dwHeight = state->umdSurf2DTable[index].height;
surface.Format = state->umdSurf2DTable[index].format;
surface.dwDepth = 1;
MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
state->osInterface,
&info,
&surface));
param->pitch = surface.dwPitch;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Sets width and height values for 2D surface state
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Set2DSurfaceStateParam(
PCM_HAL_STATE state,
PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM param,
uint32_t aliasIndex,
uint32_t handle)
{
MOS_STATUS eStatus;
uint32_t width;
uint32_t height;
CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
CM_CHK_NULL_GOTOFINISH_MOSERROR(param);
eStatus = MOS_STATUS_SUCCESS;
if (aliasIndex < state->surfaceArraySize)
{
state->umdSurf2DTable[handle].surfStateSet = true;
}
state->umdSurf2DTable[handle].surfaceStateParam[aliasIndex / state->surfaceArraySize] = *param;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Sets width and height values for 2D surface state
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetBufferSurfaceStateParameters(
PCM_HAL_STATE state,
PCM_HAL_BUFFER_SURFACE_STATE_PARAM param)
{
MOS_STATUS eStatus;
uint32_t size;
uint32_t offset;
uint32_t index;
uint32_t aliasIndex;
CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
CM_CHK_NULL_GOTOFINISH_MOSERROR(param);
eStatus = MOS_STATUS_SUCCESS;
index = param->handle;
aliasIndex = param->aliasIndex;
if (aliasIndex < state->surfaceArraySize)
state->bufferTable[index].surfStateSet = true;
state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateSize = param->size;
state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateOffset = param->offset;
state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateMOCS = param->mocs;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Sets mocs value for surface
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetSurfaceMOCS(
PCM_HAL_STATE state,
uint32_t handle,
uint16_t mocs,
uint32_t argKind)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
switch (argKind)
{
case CM_ARGUMENT_SURFACEBUFFER:
state->bufferTable[handle].memObjCtl = mocs;
state->advExecutor->SetBufferMemoryObjectControl(state->bufferTable[handle].surfStateMgr, mocs);
break;
case CM_ARGUMENT_SURFACE2D:
case CM_ARGUMENT_SURFACE2D_SAMPLER:
case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
state->umdSurf2DTable[handle].memObjCtl = mocs;
state->advExecutor->Set2Dor3DMemoryObjectControl(state->umdSurf2DTable[handle].surfStateMgr, mocs);
break;
case CM_ARGUMENT_SURFACE2D_UP:
case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
state->surf2DUPTable[handle].memObjCtl = mocs;
state->advExecutor->Set2Dor3DMemoryObjectControl(state->surf2DUPTable[handle].surfStateMgr, mocs);
break;
case CM_ARGUMENT_SURFACE3D:
state->surf3DTable[handle].memObjCtl = mocs;
state->advExecutor->Set2Dor3DMemoryObjectControl(state->surf3DTable[handle].surfStateMgr, mocs);
break;
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Invalid argument type in MOCS settings");
goto finish;
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocate surface 2D
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_AllocateSurface2D(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_SURFACE2D_PARAM param) // [in] Pointer to surface 2D Param
{
MOS_STATUS eStatus;
PMOS_INTERFACE osInterface;
PCM_HAL_SURFACE2D_ENTRY entry = nullptr;
MOS_ALLOC_GFXRES_PARAMS allocParams;
uint32_t i;
//-----------------------------------------------
CM_ASSERT(param->width > 0);
//-----------------------------------------------
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
// Find a free slot
for (i = 0; i < state->cmDeviceParam.max2DSurfaceTableSize; i++)
{
if(Mos_ResourceIsNull(&state->umdSurf2DTable[i].osResource))
{
entry = &state->umdSurf2DTable[i];
param->handle = (uint32_t)i;
break;
}
}
if (!entry)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("Surface2D table is full");
goto finish;
}
if(param->isAllocatedbyCmrtUmd)
{
MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParams.Type = MOS_GFXRES_2D;
allocParams.dwWidth = param->width;
allocParams.dwHeight = param->height;
allocParams.pSystemMemory = param->data;
allocParams.Format = param->format;
allocParams.TileType = MOS_TILE_Y;
allocParams.pBufName = "CmSurface2D";
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
osInterface,
&allocParams,
&entry->osResource));
entry->width = param->width;
entry->height = param->height;
entry->format = param->format;
entry->isAllocatedbyCmrtUmd = param->isAllocatedbyCmrtUmd;
}
else
{
entry->width = param->width;
entry->height = param->height;
entry->format = param->format;
entry->isAllocatedbyCmrtUmd = false;
entry->osResource = *param->mosResource;
HalCm_OsResource_Reference(&entry->osResource);
}
if (state->advExecutor)
{
entry->surfStateMgr = state->advExecutor->Create2DStateMgr(&entry->osResource);
state->advExecutor->Set2Dor3DOrigFormat(entry->surfStateMgr, entry->format);
state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
entry->width,
entry->height,
0); // no need to change depth in 2D surface
}
for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
{
entry->readSyncs[i] = false;
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocate surface 2D
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_UpdateSurface2D(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_SURFACE2D_PARAM param) // [in] Pointer to surface 2D Param
{
MOS_STATUS hr;
PMOS_INTERFACE osInterface;
PCM_HAL_SURFACE2D_ENTRY entry = nullptr;
MOS_ALLOC_GFXRES_PARAMS allocParams;
uint32_t i = param->handle;
//-----------------------------------------------
CM_ASSERT(param->width > 0);
//-----------------------------------------------
hr = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
entry = &state->umdSurf2DTable[i];
HalCm_OsResource_Unreference(&entry->osResource);
entry->width = param->width;
entry->height = param->height;
entry->format = param->format;
entry->isAllocatedbyCmrtUmd = false;
entry->osResource = *param->mosResource;
HalCm_OsResource_Reference(&entry->osResource);
if (state->advExecutor)
{
state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
entry->surfStateMgr = state->advExecutor->Create2DStateMgr(&entry->osResource);
state->advExecutor->Set2Dor3DOrigFormat(entry->surfStateMgr, entry->format);
state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
entry->width,
entry->height,
0); // no need to change depth in 2D surface
}
for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
{
entry->readSyncs[i] = false;
}
return hr;
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocate Linear Buffer or BufferUP
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_UpdateBuffer(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_BUFFER_PARAM param) // [in] Pointer to Buffer Param
{
MOS_STATUS hr;
PMOS_INTERFACE osInterface;
PCM_HAL_BUFFER_ENTRY entry = nullptr;
MOS_ALLOC_GFXRES_PARAMS allocParams;
uint32_t i = param->handle;
PMOS_RESOURCE osResource;
//-----------------------------------------------
CM_ASSERT(param->size > 0);
//-----------------------------------------------
hr = MOS_STATUS_SUCCESS;
osInterface = state->renderHal->pOsInterface;
entry = &state->bufferTable[i];
HalCm_OsResource_Unreference(&entry->osResource);
entry->osResource = *param->mosResource;
HalCm_OsResource_Reference(&entry->osResource);
entry->size = param->size;
entry->isAllocatedbyCmrtUmd = false;
entry->surfaceStateEntry[0].surfaceStateSize = entry->size;
entry->surfaceStateEntry[0].surfaceStateOffset = 0;
entry->surfaceStateEntry[0].surfaceStateMOCS = 0;
if (state->advExecutor)
{
state->advExecutor->DeleteBufferStateMgr(entry->surfStateMgr);
entry->surfStateMgr = state->advExecutor->CreateBufferStateMgr(&entry->osResource);
state->advExecutor->SetBufferOrigSize(entry->surfStateMgr, entry->size);
}
return hr;
}
//*-----------------------------------------------------------------------------
//| Purpose: Frees the surface 2D and removes from the table
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_FreeSurface2D(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle) // [in] Pointer to Buffer Param
{
MOS_STATUS eStatus;
PCM_HAL_SURFACE2D_ENTRY entry;
PMOS_INTERFACE osInterface;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
// Get the Buffer Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, handle, &entry));
if (state->advExecutor)
{
state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
}
if(entry->isAllocatedbyCmrtUmd)
{
osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, SURFACE_FLAG_ASSUME_NOT_IN_USE);
}
else
{
HalCm_OsResource_Unreference(&entry->osResource);
}
MOS_ZeroMemory(&entry->osResource, sizeof(entry->osResource));
entry->width = 0;
entry->height = 0;
entry->frameType = CM_FRAME;
for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
{
entry->readSyncs[i] = false;
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Allocate 3D resource
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_AllocateSurface3D(CM_HAL_STATE *state, // [in] Pointer to CM State
CM_HAL_3DRESOURCE_PARAM *param) // [in] Pointer to Buffer Param)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
//-----------------------------------------------
CM_ASSERT(state);
CM_ASSERT(param->depth > 1);
CM_ASSERT(param->width > 0);
CM_ASSERT(param->height > 0);
//-----------------------------------------------
// Finds a free slot.
CM_HAL_3DRESOURCE_ENTRY *entry = nullptr;
for (uint32_t i = 0; i < state->cmDeviceParam.max3DSurfaceTableSize; i++)
{
if (Mos_ResourceIsNull(&state->surf3DTable[i].osResource))
{
entry = &state->surf3DTable[i];
param->handle = (uint32_t)i;
break;
}
}
if (!entry)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE("3D surface table is full");
return eStatus;
}
Mos_ResetResource(&entry->osResource); // Resets the Resource
MOS_ALLOC_GFXRES_PARAMS alloc_params;
MOS_ZeroMemory(&alloc_params, sizeof(alloc_params));
alloc_params.Type = MOS_GFXRES_VOLUME;
alloc_params.TileType = MOS_TILE_Y;
alloc_params.dwWidth = param->width;
alloc_params.dwHeight = param->height;
alloc_params.dwDepth = param->depth;
alloc_params.pSystemMemory = param->data;
alloc_params.Format = param->format;
alloc_params.pBufName = "CmSurface3D";
MOS_INTERFACE *osInterface = state->renderHal->pOsInterface;
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
osInterface,
&alloc_params,
&entry->osResource));
entry->width = param->width;
entry->height = param->height;
entry->depth = param->depth;
entry->format = param->format;
if (state->advExecutor)
{
entry->surfStateMgr = state->advExecutor->Create3DStateMgr(&entry->osResource);
state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
entry->width,
entry->height,
entry->depth);
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Frees the resource and removes from the table
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Free3DResource(
PCM_HAL_STATE state, // [in] Pointer to CM State
uint32_t handle) // [in] Pointer to Buffer Param
{
MOS_STATUS eStatus;
PCM_HAL_3DRESOURCE_ENTRY entry;
PMOS_INTERFACE osInterface;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
// Get the Buffer Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, handle, &entry));
if (state->advExecutor)
{
state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
}
osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, SURFACE_FLAG_ASSUME_NOT_IN_USE);
osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Lock the resource and return
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Lock3DResource(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_3DRESOURCE_PARAM param) // [in] Pointer to 3D Param
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PCM_HAL_3DRESOURCE_ENTRY entry;
MOS_LOCK_PARAMS lockFlags;
RENDERHAL_GET_SURFACE_INFO info;
PMOS_INTERFACE osInterface = nullptr;
MOS_SURFACE surface;
// Get the 3D Resource Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, param->handle, &entry));
if ((param->lockFlag != CM_HAL_LOCKFLAG_READONLY) && (param->lockFlag != CM_HAL_LOCKFLAG_WRITEONLY) )
{
CM_ASSERTMESSAGE("Invalid lock flag!");
eStatus = MOS_STATUS_UNKNOWN;
goto finish;
}
// Get resource information
MOS_ZeroMemory(&surface, sizeof(surface));
surface.OsResource = entry->osResource;
surface.Format = Format_Invalid;
osInterface = state->osInterface;
MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
osInterface,
&info,
&surface));
param->pitch = surface.dwPitch;
param->qpitch = surface.dwQPitch;
param->qpitchEnabled = state->cmHalInterface->IsSurf3DQpitchSupportedbyHw();
// Lock the resource
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
if (param->lockFlag == CM_HAL_LOCKFLAG_READONLY)
{
lockFlags.ReadOnly = true;
}
else
{
lockFlags.WriteOnly = true;
}
lockFlags.ForceCached = true;
param->data = osInterface->pfnLockResource(
osInterface,
&entry->osResource,
&lockFlags);
CM_CHK_NULL_GOTOFINISH_MOSERROR(param->data);
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Unlock the resource and return
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Unlock3DResource(
PCM_HAL_STATE state, // [in] Pointer to CM State
PCM_HAL_3DRESOURCE_PARAM param) // [in] Pointer to 3D Param
{
MOS_STATUS eStatus;
PCM_HAL_3DRESOURCE_ENTRY entry;
PMOS_INTERFACE osInterface;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
// Get the 3D Resource Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, param->handle, &entry));
// Lock the resource
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnUnlockResource(osInterface, &entry->osResource));
finish:
return eStatus;
}
MOS_STATUS HalCm_SetCompressionMode(
PCM_HAL_STATE state,
CM_HAL_SURFACE2D_COMPRESSIOM_PARAM mmcParam)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PMOS_INTERFACE osInterface = state->osInterface;
PCM_HAL_SURFACE2D_ENTRY entry;
// Get the 2D Resource Entry
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, mmcParam.handle, &entry));
//set compression bit passed down
CM_CHK_MOSSTATUS_GOTOFINISH(osInterface->pfnSetMemoryCompressionMode(osInterface, &(entry->osResource), (MOS_MEMCOMP_STATE)mmcParam.mmcMode));
finish:
return eStatus;
}
MOS_STATUS HalCm_SetL3Cache(
const L3ConfigRegisterValues *l3Values,
PCmHalL3Settings cmHalL3Cache )
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
// in legacy platforms, we map:
// ConfigRegister0->SqcReg1
// ConfigRegister1->CntlReg2
// ConfigRegister2->CntlReg3
// ConfigRegister3->CntlReg
CM_CHK_NULL_GOTOFINISH_MOSERROR( cmHalL3Cache );
CM_CHK_NULL_GOTOFINISH_MOSERROR(l3Values);
cmHalL3Cache->overrideSettings =
(l3Values->config_register0 || l3Values->config_register1 ||
l3Values->config_register2 || l3Values->config_register3 );
cmHalL3Cache->cntlRegOverride = (l3Values->config_register3 != 0);
cmHalL3Cache->cntlReg2Override = (l3Values->config_register1 != 0);
cmHalL3Cache->cntlReg3Override = (l3Values->config_register2 != 0);
cmHalL3Cache->sqcReg1Override = (l3Values->config_register0 != 0);
cmHalL3Cache->cntlReg = l3Values->config_register3;
cmHalL3Cache->cntlReg2 = l3Values->config_register1;
cmHalL3Cache->cntlReg3 = l3Values->config_register2;
cmHalL3Cache->sqcReg1 = l3Values->config_register0;
finish:
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
//| Purpose: Set Cap values
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetCaps(
PCM_HAL_STATE state,
PCM_HAL_MAX_SET_CAPS_PARAM setCapsParam)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
CM_CHK_NULL_GOTOFINISH_MOSERROR(setCapsParam);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal->pHwCaps)
switch (setCapsParam->type)
{
case CM_SET_MAX_HW_THREADS:
if( setCapsParam->maxValue <= 0 ||
setCapsParam->maxValue > state->renderHal->pHwCaps->dwMaxThreads )
{
eStatus = MOS_STATUS_UNKNOWN;
goto finish;
}
else
{
state->maxHWThreadValues.apiValue = setCapsParam->maxValue;
}
break;
case CM_SET_HW_L3_CONFIG:
eStatus = state->cmHalInterface->SetL3CacheConfig( &setCapsParam->l3CacheValues,
&state->l3Settings );
break;
default:
eStatus = MOS_STATUS_UNKNOWN;
goto finish;
}
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Task sets the power option which will be used by this task
//| Returns: Result of the operation.
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetPowerOption(
PCM_HAL_STATE state,
PCM_POWER_OPTION powerOption )
{
if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext())
{
CM_NORMALMESSAGE("WARNING: Deprecated function due to per context SSEU overriding is enabled.\n");
return MOS_STATUS_SUCCESS;
}
MOS_SecureMemcpy( &state->powerOption, sizeof( state->powerOption ), powerOption, sizeof( state->powerOption ) );
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
// Purpose: Get the time in ns from QueryPerformanceCounter
// Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetGlobalTime(LARGE_INTEGER *globalTime)
{
if(globalTime == nullptr)
{
return MOS_STATUS_NULL_POINTER;
}
if (MOS_QueryPerformanceCounter((uint64_t*)&(globalTime->QuadPart)) == false)
{
return MOS_STATUS_UNKNOWN;
}
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
// Purpose: Convert time from nanosecond to QPC time
// Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_ConvertToQPCTime(uint64_t nanoseconds, LARGE_INTEGER *qpcTime)
{
LARGE_INTEGER perfFreq;
if(qpcTime == nullptr)
{
return MOS_STATUS_NULL_POINTER;
}
if (MOS_QueryPerformanceFrequency((uint64_t*)&perfFreq.QuadPart) == false)
{
return MOS_STATUS_UNKNOWN;
}
qpcTime->QuadPart = (uint64_t)(nanoseconds * perfFreq.QuadPart / 1000000000.0);
return MOS_STATUS_SUCCESS;
}
//------------------------------------------------------------------------------
//| Purpose: Halcm updates power state to hw state
//| Returns:
//------------------------------------------------------------------------------
MOS_STATUS HalCm_UpdatePowerOption(
PCM_HAL_STATE state,
PCM_POWER_OPTION powerOption )
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext())
{
CM_NORMALMESSAGE("WARNING: Deprecated function due to per context SSEU overriding is enabled.\n");
return MOS_STATUS_SUCCESS;
}
PRENDERHAL_INTERFACE renderHal = state->renderHal;
RENDERHAL_POWEROPTION renderPowerOption;
renderPowerOption.nSlice = (uint8_t)powerOption->nSlice;
renderPowerOption.nSubSlice = (uint8_t)powerOption->nSubSlice;
renderPowerOption.nEU = (uint8_t)powerOption->nEU;
// option set in CM create device to use slice shutdown for life of CM device ( override previous value if necessary )
if ( state->requestSingleSlice == true )
{
renderPowerOption.nSlice = 1;
}
renderHal->pfnSetPowerOptionMode( renderHal, &renderPowerOption );
return eStatus;
}
MOS_STATUS HalCm_InitPerfTagIndexMap(PCM_HAL_STATE cmState)
{
using namespace std;
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CM_ASSERT(cmState);
for (int i = 0; i < MAX_COMBINE_NUM_IN_PERFTAG; i++)
{
cmState->currentPerfTagIndex[i] = 1;
#if MOS_MESSAGES_ENABLED
cmState->perfTagIndexMap[i] = MOS_NewUtil<map<string, int> >(__FUNCTION__, __FILE__, __LINE__);
#else
cmState->perfTagIndexMap[i] = MOS_NewUtil<map<string, int> >();
#endif
CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState->perfTagIndexMap[i]);
}
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_NV12_32x32", GPUCOPY_READ_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_NV12_aligned_32x32", GPUCOPY_READ_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_32x32", GPUCOPY_READ_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_aligned_32x32", GPUCOPY_READ_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_write_NV12_32x32", GPUCOPY_WRITE_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_write_32x32", GPUCOPY_WRITE_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_2DTo2D_NV12_32x32", GPUCOPY_G2G_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_2DTo2D_32x32", GPUCOPY_G2G_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_BufferToBuffer_4k", GPUCOPY_C2C_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_BufferToBuffer_4k", GPUCOPY_C2C_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_set_NV12", GPUINIT_PERFTAG_INDEX));
cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_set", GPUINIT_PERFTAG_INDEX));
finish:
return eStatus;
}
MOS_STATUS HalCm_DeleteFromStateBufferList(
PCM_HAL_STATE state,
void *kernelPtr )
{
MOS_STATUS result = MOS_STATUS_SUCCESS;
state->state_buffer_list_ptr->erase( kernelPtr );
return result;
}
PRENDERHAL_MEDIA_STATE HalCm_GetMediaStatePtrForKernel(
PCM_HAL_STATE state,
void *kernelPtr )
{
if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
{
return ( *state->state_buffer_list_ptr )[ kernelPtr ].mediaStatePtr;
}
else
{
return nullptr;
}
}
uint64_t HalCm_GetStateBufferVAPtrForSurfaceIndex(
PCM_HAL_STATE state,
uint32_t surfIndex )
{
for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
{
if ( listItem->second.stateBufferIndex == surfIndex )
{
return listItem->second.stateBufferVaPtr;
}
}
return 0;
}
PRENDERHAL_MEDIA_STATE HalCm_GetMediaStatePtrForSurfaceIndex(
PCM_HAL_STATE state,
uint32_t surfIndex )
{
for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
{
if ( listItem->second.stateBufferIndex == surfIndex )
{
return listItem->second.mediaStatePtr;
}
}
return nullptr;
}
uint64_t HalCm_GetStateBufferVAPtrForMediaStatePtr(
PCM_HAL_STATE state,
PRENDERHAL_MEDIA_STATE mediaStatePtr )
{
for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
{
if ( listItem->second.mediaStatePtr == mediaStatePtr )
{
return listItem->second.stateBufferVaPtr;
}
}
return 0;
}
uint32_t HalCm_GetStateBufferSizeForKernel(
PCM_HAL_STATE state,
void *kernelPtr )
{
if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
{
return ( *state->state_buffer_list_ptr )[ kernelPtr ].stateBufferSize;
}
else
{
return 0;
}
}
CM_STATE_BUFFER_TYPE HalCm_GetStateBufferTypeForKernel(
PCM_HAL_STATE state,
void *kernelPtr )
{
if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
{
return ( *state->state_buffer_list_ptr )[ kernelPtr ].stateBufferType;
}
else
{
return CM_STATE_BUFFER_NONE;
}
}
MOS_STATUS HalCm_CreateGPUContext(
PCM_HAL_STATE state,
MOS_GPU_CONTEXT gpuContext,
MOS_GPU_NODE gpuNode,
PMOS_GPUCTX_CREATOPTIONS pMosGpuContextCreateOption)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
// Create Compute Context on Compute Node
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(state->osInterface->pfnCreateGpuContext(
state->osInterface,
gpuContext,
gpuNode,
pMosGpuContextCreateOption));
// Register Compute Context with the Batch Buffer completion event
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(state->osInterface->pfnRegisterBBCompleteNotifyEvent(
state->osInterface,
gpuContext));
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Creates instance of HAL CM State
//| Returns: Result of the operation
//| Note: Caller must call pfnAllocate to allocate all HalCm/Mhw states and objects.
//| Caller MUST call HalCm_Destroy to destroy the instance
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Create(
PMOS_CONTEXT osDriverContext, // [in] OS Driver Context
PCM_HAL_CREATE_PARAM param, // [in] Create Param
PCM_HAL_STATE *cmState) // [out] double pointer to CM State
{
MOS_STATUS eStatus;
PCM_HAL_STATE state = nullptr;
uint32_t numCmdBuffers = 0;
MhwInterfaces *mhwInterfaces = nullptr;
MhwInterfaces::CreateParams params;
//-----------------------------------------
CM_ASSERT(osDriverContext);
CM_ASSERT(param);
CM_ASSERT(cmState);
//-----------------------------------------
eStatus = MOS_STATUS_SUCCESS;
// Allocate State structure
state = (PCM_HAL_STATE)MOS_AllocAndZeroMemory(sizeof(CM_HAL_STATE));
CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
// Allocate/Initialize OS Interface
state->osInterface = (PMOS_INTERFACE)
MOS_AllocAndZeroMemory(sizeof(MOS_INTERFACE));
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->osInterface);
state->osInterface->bDeallocateOnExit = true;
CM_CHK_HRESULT_GOTOFINISH_MOSERROR(Mos_InitInterface(state->osInterface, osDriverContext, COMPONENT_CM));
#if (_RELEASE_INTERNAL || _DEBUG)
#if defined(CM_DIRECT_GUC_SUPPORT)
state->osInterface->m_pWorkQueueMngr = new CMRTWorkQueueMngr();
#endif
#endif
state->osInterface->pfnGetPlatform(state->osInterface, &state->platform);
state->skuTable = state->osInterface->pfnGetSkuTable(state->osInterface);
state->waTable = state->osInterface->pfnGetWaTable (state->osInterface);
{
MOS_GPUCTX_CREATOPTIONS createOption;
// Create VEBOX Context
createOption.CmdBufferNumScale = MOS_GPU_CONTEXT_CREATE_DEFAULT;
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_CreateGPUContext(
state,
MOS_GPU_CONTEXT_VEBOX,
MOS_GPU_NODE_VE,
&createOption));
}
// Allocate/Initialize CM Rendering Interface
state->renderHal = (PRENDERHAL_INTERFACE)
MOS_AllocAndZeroMemory(sizeof(RENDERHAL_INTERFACE));
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal);
state->dshEnabled = param->dynamicStateHeap;
state->renderHal->bDynamicStateHeap = state->dshEnabled;
if (state->dshEnabled)
{
CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_InitInterface_Dynamic(state->renderHal, &state->cpInterface, state->osInterface));
}
else
{
CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_InitInterface(state->renderHal, &state->cpInterface, state->osInterface));
}
// Allocate/Initialize VEBOX Interface
CmSafeMemSet(&params, 0, sizeof(params));
params.Flags.m_vebox = 1;
mhwInterfaces = MhwInterfaces::CreateFactory(params, state->osInterface);
if (mhwInterfaces)
{
CM_CHK_NULL_GOTOFINISH_MOSERROR(mhwInterfaces->m_veboxInterface);
state->veboxInterface = mhwInterfaces->m_veboxInterface;
// MhwInterfaces always create CP and MI interfaces, so we have to delete those we don't need.
MOS_Delete(mhwInterfaces->m_miInterface);
Delete_MhwCpInterface(mhwInterfaces->m_cpInterface);
mhwInterfaces->m_cpInterface = nullptr;
MOS_Delete(mhwInterfaces);
}
else
{
CM_ASSERTMESSAGE("Allocate MhwInterfaces failed");
return MOS_STATUS_NO_SPACE;
}
// set IsMDFLoad to distinguish MDF context from other Media Contexts
state->renderHal->IsMDFLoad = true;
// disable YV12SinglePass as CMRT & compiler don't support it
state->renderHal->bEnableYV12SinglePass = false;
state->cmDeviceParam.maxKernelBinarySize = CM_KERNEL_BINARY_BLOCK_SIZE;
// set if the new sampler heap management is used or not
// currently new sampler heap management depends on DSH
if (state->dshEnabled)
{
state->useNewSamplerHeap = true;
}
else
{
state->useNewSamplerHeap = false;
}
//Get Max Scratch Space Size
if( param->disableScratchSpace)
{
state->cmDeviceParam.maxPerThreadScratchSpaceSize = 0;
}
else
{
//Gen7_5 + : (MaxScratchSpaceSize + 1) *16k
if(param->scratchSpaceSize == CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_DEFAULT)
{ //By default, 128K for HSW
state->cmDeviceParam.maxPerThreadScratchSpaceSize = 8 * CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_16K_STEP;
}
else
{
state->cmDeviceParam.maxPerThreadScratchSpaceSize = (param->scratchSpaceSize)*
CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_16K_STEP;
}
}
// Initialize kernel parameters
state->kernelParamsRenderHal.pMhwKernelParam = &state->kernelParamsMhw;
// Enable SLM in L3 Cache
state->l3Settings.enableSlm = true;
// Slice shutdown
state->requestSingleSlice = param->requestSliceShutdown;
//mid thread preemption on/off and SIP debug control
state->midThreadPreemptionDisabled = param->disabledMidThreadPreemption;
state->kernelDebugEnabled = param->enabledKernelDebug;
// init mapping for the state buffer
#if MOS_MESSAGES_ENABLED
state->state_buffer_list_ptr = MOS_NewUtil<std::map< void *, CM_HAL_STATE_BUFFER_ENTRY> >(__FUNCTION__, __FILE__, __LINE__);
#else
state->state_buffer_list_ptr = MOS_NewUtil<std::map< void *, CM_HAL_STATE_BUFFER_ENTRY> >();
#endif
CM_CHK_NULL_GOTOFINISH_MOSERROR( state->state_buffer_list_ptr );
MOS_ZeroMemory(&state->hintIndexes.kernelIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
MOS_ZeroMemory(&state->hintIndexes.dispatchIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
// get the global media profiler
state->perfProfiler = MediaPerfProfiler::Instance();
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->perfProfiler);
CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->Initialize((void*)state, state->osInterface));
state->criticalSectionDSH = MOS_New(CMRT_UMD::CSync);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->criticalSectionDSH);
state->cmDeviceParam.maxKernelsPerTask = CM_MAX_KERNELS_PER_TASK;
state->cmDeviceParam.maxSamplerTableSize = CM_MAX_SAMPLER_TABLE_SIZE;
state->cmDeviceParam.maxSampler8x8TableSize = state->renderHal->pHwSizes->dwSizeSampler8x8Table;
state->cmDeviceParam.maxBufferTableSize = CM_MAX_BUFFER_SURFACE_TABLE_SIZE;
state->cmDeviceParam.max2DSurfaceUPTableSize = CM_MAX_2D_SURFACE_UP_TABLE_SIZE;
state->cmDeviceParam.max2DSurfaceTableSize = CM_MAX_2D_SURFACE_TABLE_SIZE;
state->cmDeviceParam.max3DSurfaceTableSize = CM_MAX_3D_SURFACE_TABLE_SIZE;
state->cmDeviceParam.maxTasks = param->maxTaskNumber;
state->cmDeviceParam.maxAvsSamplers = CM_MAX_AVS_SAMPLER_SIZE;
state->cmDeviceParam.maxGshKernelEntries = param->kernelBinarySizeinGSH / (CM_32K);
if (state->dshEnabled)
{
// Initialize Kernel Cache Hit/Miss counters
state->dshKernelCacheMiss = 0;
state->dshKernelCacheHit = 0;
}
// Setup Function pointers
state->pfnCmAllocate = HalCm_Allocate;
state->pfnGetMaxValues = HalCm_GetMaxValues;
state->pfnGetMaxValuesEx = HalCm_GetMaxValuesEx;
state->pfnExecuteTask = HalCm_ExecuteTask;
state->pfnExecuteGroupTask = HalCm_ExecuteGroupTask;
state->pfnExecuteHintsTask = HalCm_ExecuteHintsTask;
state->pfnRegisterSampler = HalCm_RegisterSampler;
state->pfnUnRegisterSampler = HalCm_UnRegisterSampler;
state->pfnRegisterSampler8x8 = HalCm_RegisterSampler8x8;
state->pfnUnRegisterSampler8x8 = HalCm_UnRegisterSampler8x8;
state->pfnFreeBuffer = HalCm_FreeBuffer;
state->pfnLockBuffer = HalCm_LockBuffer;
state->pfnUnlockBuffer = HalCm_UnlockBuffer;
state->pfnFreeSurface2DUP = HalCm_FreeSurface2DUP;
state->pfnGetSurface2DTileYPitch = HalCm_GetSurface2DTileYPitch;
state->pfnSet2DSurfaceStateParam = HalCm_Set2DSurfaceStateParam;
state->pfnSetBufferSurfaceStatePara = HalCm_SetBufferSurfaceStateParameters;
state->pfnSetSurfaceMOCS = HalCm_SetSurfaceMOCS;
/************************************************************/
state->pfnAllocateSurface2D = HalCm_AllocateSurface2D;
state->pfnAllocate3DResource = HalCm_AllocateSurface3D;
state->pfnFreeSurface2D = HalCm_FreeSurface2D;
state->pfnLock2DResource = HalCm_Lock2DResource;
state->pfnUnlock2DResource = HalCm_Unlock2DResource;
state->pfnSetCompressionMode = HalCm_SetCompressionMode;
/************************************************************/
state->pfnFree3DResource = HalCm_Free3DResource;
state->pfnLock3DResource = HalCm_Lock3DResource;
state->pfnUnlock3DResource = HalCm_Unlock3DResource;
state->pfnSetCaps = HalCm_SetCaps;
state->pfnSetPowerOption = HalCm_SetPowerOption;
state->pfnUpdatePowerOption = HalCm_UpdatePowerOption;
state->pfnSendMediaWalkerState = HalCm_SendMediaWalkerState;
state->pfnSendGpGpuWalkerState = HalCm_SendGpGpuWalkerState;
state->pfnSetSurfaceReadFlag = HalCm_SetSurfaceReadFlag;
state->pfnSetVtuneProfilingFlag = HalCm_SetVtuneProfilingFlag;
state->pfnExecuteVeboxTask = HalCm_ExecuteVeboxTask;
state->pfnGetSipBinary = HalCm_GetSipBinary;
state->pfnGetTaskSyncLocation = HalCm_GetTaskSyncLocation;
state->pfnGetGlobalTime = HalCm_GetGlobalTime;
state->pfnConvertToQPCTime = HalCm_ConvertToQPCTime;
state->pfnDeleteFromStateBufferList = HalCm_DeleteFromStateBufferList;
state->pfnGetMediaStatePtrForKernel = HalCm_GetMediaStatePtrForKernel;
state->pfnGetStateBufferVAPtrForSurfaceIndex = HalCm_GetStateBufferVAPtrForSurfaceIndex;
state->pfnGetMediaStatePtrForSurfaceIndex = HalCm_GetMediaStatePtrForSurfaceIndex;
state->pfnGetStateBufferVAPtrForMediaStatePtr = HalCm_GetStateBufferVAPtrForMediaStatePtr;
state->pfnGetStateBufferSizeForKernel = HalCm_GetStateBufferSizeForKernel;
state->pfnGetStateBufferTypeForKernel = HalCm_GetStateBufferTypeForKernel;
state->pfnCreateGPUContext = HalCm_CreateGPUContext;
state->pfnDSHUnregisterKernel = HalCm_DSH_UnregisterKernel;
state->pfnUpdateBuffer = HalCm_UpdateBuffer;
state->pfnUpdateSurface2D = HalCm_UpdateSurface2D;
//==========<Initialize 5 OS-dependent DDI functions: pfnAllocate3DResource, pfnAllocateSurface2DUP====
// pfnAllocateBuffer,pfnRegisterKMDNotifyEventHandle, pfnGetSurface2DPitchAndSize >====
HalCm_OsInitInterface(state);
HalCm_InitPerfTagIndexMap(state);
state->maxHWThreadValues.userFeatureValue = 0;
state->maxHWThreadValues.apiValue = 0;
HalCm_GetUserFeatureSettings(state);
#if MDF_COMMAND_BUFFER_DUMP
HalCm_InitDumpCommandBuffer(state);
state->pfnInitDumpCommandBuffer = HalCm_InitDumpCommandBuffer;
state->pfnDumpCommadBuffer = HalCm_DumpCommadBuffer;
#endif //MDF_COMMAND_BUFFER_DUMP
#if MDF_CURBE_DATA_DUMP
HalCm_InitDumpCurbeData(state);
#endif
#if MDF_SURFACE_CONTENT_DUMP
HalCm_InitSurfaceDump(state);
#endif
#if MDF_SURFACE_STATE_DUMP
HalCm_InitDumpSurfaceState(state);
state->pfnInitDumpSurfaceState = HalCm_InitDumpSurfaceState;
state->pfnDumpSurfaceState = HalCm_DumpSurfaceState;
#endif
#if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
HalCm_InitDumpInterfaceDescriporData(state);
#endif
state->cmHalInterface = CMHalDevice::CreateFactory(state);
CM_CHK_NULL_GOTOFINISH_MOSERROR(state->cmHalInterface);
if (param->refactor)
{
state->refactor = true;
}
else
{
state->refactor = false;
}
state->requestCustomGpuContext = param->requestCustomGpuContext;
#if (_DEBUG || _RELEASE_INTERNAL)
{
FILE *fp1 = nullptr;
MOS_SecureFileOpen(&fp1, "refactor.key", "r");
if (fp1 != nullptr)
{
state->refactor = true;
fclose(fp1);
}
FILE *fp2 = nullptr;
MOS_SecureFileOpen(&fp2, "origin.key", "r");
if (fp2 != nullptr)
{
state->refactor = false;
fclose(fp2);
}
}
if (state->refactor)
{
CM_NORMALMESSAGE("Use refactor path!\n");
}
else
{
CM_NORMALMESSAGE("Use origin path!\n");
}
#endif
finish:
if (eStatus != MOS_STATUS_SUCCESS)
{
HalCm_Destroy(state);
*cmState = nullptr;
}
else
{
*cmState = state;
}
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Destroys instance of HAL CM State
//| Returns: N/A
//*-----------------------------------------------------------------------------
void HalCm_Destroy(
PCM_HAL_STATE state) // [in] Pointer to CM State
{
MOS_STATUS eStatus;
int32_t i;
if (state)
{
//Delete CmHal Interface
MosSafeDelete(state->cmHalInterface);
Delete_MhwCpInterface(state->cpInterface);
state->cpInterface = nullptr;
MosSafeDelete(state->state_buffer_list_ptr);
MosSafeDelete(state->criticalSectionDSH);
// Delete the unified media profiler
if (state->perfProfiler)
{
MediaPerfProfiler::Destroy(state->perfProfiler, (void*)state, state->osInterface);
state->perfProfiler = nullptr;
}
// Delete Batch Buffers
if (state->batchBuffers)
{
for (i=0; i < state->numBatchBuffers; i++)
{
if (!Mos_ResourceIsNull(&state->batchBuffers[i].OsResource))
{
eStatus = (MOS_STATUS)state->renderHal->pfnFreeBB(
state->renderHal,
&state->batchBuffers[i]);
CM_ASSERT(eStatus == MOS_STATUS_SUCCESS);
}
MOS_FreeMemory(state->batchBuffers[i].pPrivateData);
}
MOS_FreeMemory(state->batchBuffers);
state->batchBuffers = nullptr;
}
// Delete TimeStamp Buffer
HalCm_FreeTsResource(state);
if ((state->midThreadPreemptionDisabled == false) || (state->kernelDebugEnabled == true)) {
// Delete CSR surface
HalCm_FreeCsrResource(state);
// Delete sip surface
HalCm_FreeSipResource(state);
}
// Delete tracker resource
HalCm_FreeTrackerResources(state);
// Delete advance executor
MOS_Delete(state->advExecutor);
// Delete heap manager
if (state->renderHal)
{
MOS_Delete(state->renderHal->dgsheapManager);
}
if (state->hLibModule)
{
MOS_FreeLibrary(state->hLibModule);
state->hLibModule = nullptr;
}
// Delete RenderHal Interface
if (state->renderHal)
{
if (state->renderHal->pfnDestroy)
{
state->renderHal->pfnDestroy(state->renderHal);
}
MOS_FreeMemory(state->renderHal);
state->renderHal = nullptr;
}
// Delete VEBOX Interface
if (state->veboxInterface
&& state->veboxInterface->m_veboxHeap)
{
state->veboxInterface->DestroyHeap( );
MOS_Delete(state->veboxInterface);
state->veboxInterface = nullptr;
}
// Delete OS Interface
if (state->osInterface)
{
if (state->osInterface->pfnDestroy)
{
state->osInterface->pfnDestroy(state->osInterface, true);
}
if (state->osInterface->bDeallocateOnExit)
{
MOS_FreeMemory(state->osInterface);
state->osInterface = nullptr;
}
}
// Delete the TaskParam
MOS_FreeMemory(state->taskParam);
// Delete the TaskTimeStamp
MOS_FreeMemory(state->taskTimeStamp);
// Delete Tables
MOS_FreeMemory(state->tableMemories);
// Delete the pTotalKernelSize table for GSH
MOS_FreeMemory(state->totalKernelSize);
// Delete the perfTag Map
for (int i = 0; i < MAX_COMBINE_NUM_IN_PERFTAG; i++)
{
MosSafeDelete(state->perfTagIndexMap[i]);
}
// Delete the state
MOS_FreeMemory(state);
}
}
void HalCm_GetUserFeatureSettings(
PCM_HAL_STATE cmState
)
{
#if (_DEBUG || _RELEASE_INTERNAL)
PMOS_INTERFACE osInterface;
PMOS_USER_FEATURE_INTERFACE userFeatureInterface;
MOS_USER_FEATURE userFeature;
MOS_USER_FEATURE_VALUE userFeatureValue;
MOS_ZeroMemory(&userFeatureValue, sizeof(userFeatureValue));
osInterface = cmState->osInterface;
userFeatureInterface = &osInterface->UserFeatureInterface;
userFeature = *userFeatureInterface->pUserFeatureInit;
userFeature.Type = MOS_USER_FEATURE_TYPE_USER;
userFeature.pPath = (char *)__MEDIA_USER_FEATURE_SUBKEY_INTERNAL;
userFeature.pValues = &userFeatureValue;
userFeature.uiNumValues = 1;
if (userFeatureInterface->pfnReadValue(
userFeatureInterface,
&userFeature,
(char *)VPHAL_CM_MAX_THREADS,
MOS_USER_FEATURE_VALUE_TYPE_UINT32) == MOS_STATUS_SUCCESS)
{
uint32_t data = userFeature.pValues[0].u32Data;
if ((data > 0) && (data <= cmState->renderHal->pHwCaps->dwMaxThreads))
{
cmState->maxHWThreadValues.userFeatureValue = data;
}
}
#else
UNUSED(cmState);
#endif // _DEBUG || _RELEASE_INTERNAL
}
//*-----------------------------------------------------------------------------
//| Purpose: Gathers information about the surface - used by GT-Pin
//| Returns: MOS_STATUS_SUCCESS if surface type recognized, S_FAIL otherwise
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_GetSurfaceDetails(
PCM_HAL_STATE cmState,
PCM_HAL_INDEX_PARAM indexParam,
uint32_t btIndex,
MOS_SURFACE& surface,
int16_t globalSurface,
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry,
uint32_t tempPlaneIndex,
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam,
CM_HAL_KERNEL_ARG_KIND argKind
)
{
MOS_STATUS eStatus = MOS_STATUS_UNKNOWN;
PCM_SURFACE_DETAILS surfaceInfos = nullptr;
PCM_SURFACE_DETAILS pgSurfaceInfos = nullptr;
PCM_HAL_TASK_PARAM taskParam = cmState->taskParam;
uint32_t curKernelIndex = taskParam->curKernelIndex;
PMOS_PLANE_OFFSET planeOffset = 0;
uint32_t maxEntryNum = 0;
MOS_OS_FORMAT tempOsFormat ;
CM_SURFACE_BTI_INFO surfBTIInfo;
cmState->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
UNUSED(indexParam);
if(curKernelIndex+1>taskParam->surfEntryInfoArrays.kernelNum)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Mismatched kernel index: curKernelIndex '%d' vs krnNum '%d'",
curKernelIndex,taskParam->surfEntryInfoArrays.kernelNum);
goto finish;
}
surfaceInfos = taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].surfEntryInfos;
pgSurfaceInfos = taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].globalSurfInfos;
tempOsFormat = cmState->osInterface->pfnFmt_MosToOs(surface.Format);
switch (argKind)
{
case CM_ARGUMENT_SURFACEBUFFER:
if((btIndex >= surfBTIInfo.reservedSurfaceStart) &&
(btIndex < surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER))
{
btIndex = btIndex - surfBTIInfo.reservedSurfaceStart;
maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->globalSurfNum;
if ( btIndex >= maxEntryNum )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
maxEntryNum, btIndex);
goto finish;
}
MOS_ZeroMemory(&pgSurfaceInfos[btIndex], sizeof(CM_SURFACE_DETAILS));
pgSurfaceInfos[btIndex].width = surface.dwWidth;
pgSurfaceInfos[btIndex].format = DDI_FORMAT_UNKNOWN;
}
else
{
btIndex = btIndex - surfBTIInfo.reservedSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
if ( btIndex >= maxEntryNum )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
maxEntryNum, btIndex);
goto finish;
}
MOS_ZeroMemory(&surfaceInfos[btIndex], sizeof(CM_SURFACE_DETAILS));
surfaceInfos[btIndex].width = surface.dwWidth;
surfaceInfos[btIndex].format = DDI_FORMAT_UNKNOWN;
}
if (globalSurface < 0)
{
++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
}
eStatus = MOS_STATUS_SUCCESS;
break;
case CM_ARGUMENT_SURFACE2D_UP:
case CM_ARGUMENT_SURFACE2D:
// VME surface and sampler8x8 called with CM_ARGUMENT_SURFACE2D
btIndex = btIndex - surfBTIInfo.reservedSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
if ( btIndex >= maxEntryNum )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
maxEntryNum, btIndex);
goto finish;
}
surfaceInfos[btIndex].width = surfaceEntry->dwWidth;
surfaceInfos[btIndex].height = surfaceEntry->dwHeight;
surfaceInfos[btIndex].depth = 0;
surfaceInfos[btIndex].format = (DdiSurfaceFormat)tempOsFormat;
surfaceInfos[btIndex].planeIndex = tempPlaneIndex;
surfaceInfos[btIndex].pitch = surfaceEntry->dwPitch;
surfaceInfos[btIndex].slicePitch = 0;
surfaceInfos[btIndex].surfaceBaseAddress = 0;
surfaceInfos[btIndex].tileWalk = surfaceEntry->bTileWalk;
surfaceInfos[btIndex].tiledSurface = surfaceEntry->bTiledSurface;
if (surfaceEntry->YUVPlane == MHW_U_PLANE ||
surfaceEntry->YUVPlane == MHW_V_PLANE)
{
planeOffset = (surfaceEntry->YUVPlane == MHW_U_PLANE)
? &surface.UPlaneOffset
: &surface.VPlaneOffset;
surfaceInfos[btIndex].yOffset = planeOffset->iYOffset >> 1;
if ( argKind == CM_ARGUMENT_SURFACE2D_UP )
{
surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
}
else
{
uint32_t pixelsPerSampleUV = 0;
//Get Pixels Per Sample if we use dataport read
if(surfaceParam.bWidthInDword_UV)
{
RenderHal_GetPixelsPerSample(surface.Format, &pixelsPerSampleUV);
}
else
{
// If the kernel uses sampler - do not change width (it affects coordinates)
pixelsPerSampleUV = 1;
}
if(pixelsPerSampleUV == 1)
{
surfaceInfos[btIndex].xOffset = planeOffset->iXOffset >> 2;
}
else
{
surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
}
}
}
else
{
surfaceInfos[btIndex].xOffset = (surface.YPlaneOffset.iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
surfaceInfos[btIndex].yOffset = surface.YPlaneOffset.iYOffset >> 1;
}
++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
++tempPlaneIndex;
eStatus = MOS_STATUS_SUCCESS;
break;
case CM_ARGUMENT_SURFACE3D:
btIndex = btIndex - surfBTIInfo.normalSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
if ( btIndex >= maxEntryNum )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
maxEntryNum, btIndex);
goto finish;
}
surfaceInfos[btIndex].width = surfaceEntry->dwWidth;
surfaceInfos[btIndex].height = surfaceEntry->dwHeight;
surfaceInfos[btIndex].depth = surface.dwDepth;
surfaceInfos[btIndex].format = (DdiSurfaceFormat)tempOsFormat;
surfaceInfos[btIndex].pitch = surfaceEntry->dwPitch;
surfaceInfos[btIndex].planeIndex = tempPlaneIndex;
surfaceInfos[btIndex].slicePitch = surface.dwSlicePitch;
surfaceInfos[btIndex].surfaceBaseAddress = 0;
surfaceInfos[btIndex].tileWalk = surfaceEntry->bTileWalk;
surfaceInfos[btIndex].tiledSurface = surfaceEntry->bTiledSurface;
if (surfaceEntry->YUVPlane == MHW_U_PLANE ||
surfaceEntry->YUVPlane == MHW_V_PLANE)
{
planeOffset = (surfaceEntry->YUVPlane == MHW_U_PLANE)
? &surface.UPlaneOffset
: &surface.VPlaneOffset;
surfaceInfos[btIndex].yOffset = planeOffset->iYOffset >> 1;
surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
}
else
{
surfaceInfos[btIndex].xOffset = (surface.YPlaneOffset.iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
surfaceInfos[btIndex].yOffset = surface.YPlaneOffset.iYOffset >> 1;
}
++tempPlaneIndex;
++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
eStatus = MOS_STATUS_SUCCESS;
break;
default:
break;
}
finish:
return eStatus;
}
uint32_t HalCm_GetFreeBindingIndex(
PCM_HAL_STATE state,
PCM_HAL_INDEX_PARAM indexParam,
uint32_t total)
{
CM_SURFACE_BTI_INFO surfBTIInfo;
state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
uint32_t btIndex = surfBTIInfo.normalSurfaceStart;
uint32_t unAllocated = total;
while (btIndex < 256 && unAllocated > 0)
{
uint32_t arrayIndex = btIndex >> 5;
uint32_t bitMask = (uint32_t)0x1 << (btIndex % 32);
if (indexParam->btArray[arrayIndex] & bitMask)
{
// oops, occupied
if (unAllocated != total)
{
// clear previous allocation
uint32_t allocated = total - unAllocated;
uint32_t tmpIndex = btIndex - 1;
while (allocated > 0)
{
uint32_t arrayIndex = tmpIndex >> 5;
uint32_t bitMask = 1 << (tmpIndex % 32);
indexParam->btArray[arrayIndex] &= ~bitMask;
allocated--;
tmpIndex--;
}
// reset
unAllocated = total;
}
}
else
{
indexParam->btArray[arrayIndex] |= bitMask;
unAllocated--;
}
btIndex++;
}
if (unAllocated == 0)
{
// found slot
return btIndex - total;
}
// no slot
return 0;
}
void HalCm_PreSetBindingIndex(
PCM_HAL_INDEX_PARAM indexParam,
uint32_t start,
uint32_t end)
{
uint32_t btIndex;
for ( btIndex = start; btIndex <= end ; btIndex++)
{
uint32_t arrayIndex = btIndex >> 5;
uint32_t bitMask = 1 << (btIndex % 32);
indexParam->btArray[arrayIndex] |= bitMask;
}
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup surface State with BTIndex
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Setup2DSurfaceStateWithBTIndex(
PCM_HAL_STATE state,
int32_t bindingTable,
uint32_t surfIndex,
uint32_t btIndex,
bool pixelPitch)
{
PRENDERHAL_INTERFACE renderHal = state->renderHal;
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
int32_t nSurfaceEntries, i;
uint16_t memObjCtl;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
eStatus = MOS_STATUS_UNKNOWN;
nSurfaceEntries = 0;
if (surfIndex == CM_NULL_SURFACE)
{
return MOS_STATUS_SUCCESS;
}
memObjCtl = CM_DEFAULT_CACHE_TYPE;
// check the surfIndex
if (surfIndex >= state->cmDeviceParam.max2DSurfaceTableSize ||
Mos_ResourceIsNull(&state->umdSurf2DTable[surfIndex].osResource) )
{
CM_ASSERTMESSAGE(
"Invalid 2D surface array index '%d'", surfIndex);
return MOS_STATUS_UNKNOWN;
}
// Check to see if surface is already assigned
uint32_t nBTInTable = ( unsigned char )CM_INVALID_INDEX;
if ( pixelPitch )
{
nBTInTable = state->bti2DIndexTable[ surfIndex ].BTI.samplerSurfIndex;
}
else
{
nBTInTable = state->bti2DIndexTable[ surfIndex ].BTI.regularSurfIndex;
}
if ( btIndex == nBTInTable )
{
nSurfaceEntries = state->bti2DIndexTable[ surfIndex ].nPlaneNumber;
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
if ( pixelPitch )
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
}
else
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
}
return MOS_STATUS_SUCCESS;
}
// Get Details of 2D surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE2D, surfIndex, pixelPitch));
// Setup 2D surface
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
surfaceParam.Type = renderHal->SurfaceTypeDefault;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
if (!pixelPitch) {
surfaceParam.bWidthInDword_UV = true;
surfaceParam.bWidthInDword_Y = true;
}
surfaceParam.bRenderTarget = isRenderTarget(state, surfIndex);
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr));
for (i = 0; i < nSurfaceEntries; i++)
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[i]));
}
state->bti2DIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
// Get Offset to Current Binding Table
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
if ( pixelPitch )
{
state->bti2DIndexTable[ surfIndex ].BTI.samplerSurfIndex = btIndex;
state->bti2DIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
else
{
state->bti2DIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
state->bti2DIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup Buffer surface State with BTIndex
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_SetupBufferSurfaceStateWithBTIndex(
PCM_HAL_STATE state,
int32_t bindingTable,
uint32_t surfIndex,
uint32_t btIndex,
bool pixelPitch)
{
PRENDERHAL_INTERFACE renderHal = state->renderHal;
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry;
uint16_t memObjCtl;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
UNUSED(pixelPitch);
eStatus = MOS_STATUS_UNKNOWN;
if (surfIndex == CM_NULL_SURFACE)
{
return MOS_STATUS_SUCCESS;
}
memObjCtl = CM_DEFAULT_CACHE_TYPE;
// Check to see if surface is already assigned
if ( btIndex == ( uint32_t )state->btiBufferIndexTable[ surfIndex ].BTI.regularSurfIndex )
{
uint32_t nSurfaceEntries = state->btiBufferIndexTable[ surfIndex ].nPlaneNumber;
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->btiBufferIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
return MOS_STATUS_SUCCESS;
}
// Get Details of Buffer surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACEBUFFER, surfIndex, 0));
// set up buffer surface
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
// Set bRenderTarget by default
surfaceParam.bRenderTarget = true;
// Setup Buffer surface
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupBufferSurfaceState(
renderHal,
&surface,
&surfaceParam,
&surfaceEntry));
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex,
surfaceEntry));
state->btiBufferIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
state->btiBufferIndexTable[ surfIndex ].nPlaneNumber = 1;
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
state->btiBufferIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
MOS_STATUS HalCm_Setup2DSurfaceUPStateWithBTIndex(
PCM_HAL_STATE state,
int32_t bindingTable,
uint32_t surfIndex,
uint32_t btIndex,
bool pixelPitch)
{
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
int32_t nSurfaceEntries, i;
uint16_t memObjCtl;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
if (surfIndex == CM_NULL_SURFACE)
{
return MOS_STATUS_SUCCESS;
}
memObjCtl = CM_DEFAULT_CACHE_TYPE;
// Check to see if surface is already assigned
uint32_t nBTInTable = ( unsigned char )CM_INVALID_INDEX;
if ( pixelPitch )
{
nBTInTable = state->bti2DUPIndexTable[ surfIndex ].BTI.samplerSurfIndex;
}
else
{
nBTInTable = state->bti2DUPIndexTable[ surfIndex ].BTI.regularSurfIndex;
}
if ( btIndex == nBTInTable )
{
uint32_t nSurfaceEntries = state->bti2DUPIndexTable[ surfIndex ].nPlaneNumber;
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
if ( pixelPitch )
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
}
else
{
MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
}
return MOS_STATUS_SUCCESS;
}
// Get Details of 2DUP surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, CM_ARGUMENT_SURFACE2D_UP, surfIndex, pixelPitch ) );
// Setup 2D surface
MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
surfaceParam.Type = renderHal->SurfaceTypeDefault;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
if ( !pixelPitch )
{
surfaceParam.bWidthInDword_UV = true;
surfaceParam.bWidthInDword_Y = true;
}
surfaceParam.bRenderTarget = true;
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr));
for (i = 0; i < nSurfaceEntries; i++)
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[i]));
}
state->bti2DUPIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
if ( pixelPitch )
{
state->bti2DUPIndexTable[ surfIndex ].BTI.samplerSurfIndex = btIndex;
state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
else
{
state->bti2DUPIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
}
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
MOS_STATUS HalCm_SetupSampler8x8SurfaceStateWithBTIndex(
PCM_HAL_STATE state,
int32_t bindingTable,
uint32_t surfIndex,
uint32_t btIndex,
bool pixelPitch,
CM_HAL_KERNEL_ARG_KIND kind,
uint32_t addressControl )
{
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_INTERFACE renderHal;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[ MHW_MAX_SURFACE_PLANES ];
int32_t nSurfaceEntries;
uint16_t memObjCtl;
int32_t i;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
UNUSED(pixelPitch);
eStatus = MOS_STATUS_UNKNOWN;
renderHal = state->renderHal;
if ( surfIndex == CM_NULL_SURFACE )
{
eStatus = MOS_STATUS_SUCCESS;
goto finish;
}
memObjCtl = CM_DEFAULT_CACHE_TYPE;
// check to see if index is valid
if ( surfIndex >= state->cmDeviceParam.max2DSurfaceTableSize ||
Mos_ResourceIsNull( &state->umdSurf2DTable[ surfIndex ].osResource ) )
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid 2D surface array index '%d'", surfIndex );
goto finish;
}
// Get Details of Sampler8x8 surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, kind, surfIndex, 0 ) );
// Setup surface
MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
surfaceParam.bRenderTarget = true;
surfaceParam.bWidthInDword_Y = false;
surfaceParam.bWidthInDword_UV = false;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
surfaceParam.bVASurface = ( kind == CM_ARGUMENT_SURFACE_SAMPLER8X8_VA ) ? 1 : 0;
surfaceParam.AddressControl = addressControl;
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam );
renderHal->bEnableP010SinglePass = state->cmHalInterface->IsP010SinglePassSupported();
nSurfaceEntries = 0;
CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr ) );
CM_ASSERT( nSurfaceEntries == 1 );
for ( i = 0; i < nSurfaceEntries; i++ )
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[ i ] ) );
}
stateHeap = renderHal->pStateHeap;
offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
state->bti2DIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
state->bti2DIndexTable[ surfIndex ].BTITableEntry.sampler8x8BtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
state->bti2DIndexTable[ surfIndex ].BTI.sampler8x8SurfIndex = btIndex;
eStatus = MOS_STATUS_SUCCESS;
finish:
renderHal->bEnableP010SinglePass = false;
return eStatus;
}
//*-----------------------------------------------------------------------------
//| Purpose: Setup 3D surface State with BTIndex
//| Returns: Result of the operation
//*-----------------------------------------------------------------------------
MOS_STATUS HalCm_Setup3DSurfaceStateWithBTIndex(
PCM_HAL_STATE state,
int32_t bindingTable,
uint32_t surfIndex,
uint32_t btIndex)
{
PRENDERHAL_INTERFACE renderHal = state->renderHal;
MOS_STATUS eStatus;
RENDERHAL_SURFACE surface;
RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
int32_t nSurfaceEntries, i;
uint16_t memObjCtl;
uint32_t offsetSrc;
PRENDERHAL_STATE_HEAP stateHeap;
eStatus = MOS_STATUS_UNKNOWN;
nSurfaceEntries = 0;
if (surfIndex == CM_NULL_SURFACE)
{
return MOS_STATUS_SUCCESS;
}
memObjCtl = CM_DEFAULT_CACHE_TYPE;
// check the surfIndex
if (surfIndex >= state->cmDeviceParam.max3DSurfaceTableSize ||
Mos_ResourceIsNull(&state->surf3DTable[surfIndex].osResource))
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
CM_ASSERTMESSAGE(
"Invalid 3D surface array index '%d'", surfIndex);
return MOS_STATUS_UNKNOWN;
}
// Check to see if surface is already assigned
uint32_t nBTInTable = (unsigned char)CM_INVALID_INDEX;
nBTInTable = state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex;
if (btIndex == nBTInTable)
{
nSurfaceEntries = state->bti3DIndexTable[surfIndex].nPlaneNumber;
stateHeap = renderHal->pStateHeap;
// Get Offset to Current Binding Table
uint32_t offsetDst = (stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize) + // Points to the Base of Current SSH Buffer Instance
(stateHeap->iBindingTableOffset) + // Moves the pointer to Base of Array of Binding Tables
(bindingTable * stateHeap->iBindingTableSize) + // Moves the pointer to a Particular Binding Table
(btIndex * sizeof(uint32_t)); // Move the pointer to correct entry
uint32_t *bindingTableEntry = (uint32_t*)(stateHeap->pSshBuffer + offsetDst);
MOS_SecureMemcpy(bindingTableEntry, sizeof(uint32_t)* nSurfaceEntries, state->bti3DIndexTable[surfIndex].BTITableEntry.regularBtiEntryPosition, sizeof(uint32_t)* nSurfaceEntries);
return MOS_STATUS_SUCCESS;
}
// Get Details of 3D surface and fill the surface
CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE3D, surfIndex, false));
// Setup 3D surface
MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
surfaceParam.Type = renderHal->SurfaceTypeDefault;
surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
//Cache configurations
state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
//Set bRenderTarget by default
surfaceParam.bRenderTarget = true;
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
renderHal,
&surface,
&surfaceParam,
&nSurfaceEntries,
surfaceEntries,
nullptr));
for (i = 0; i < nSurfaceEntries; i++)
{
// Bind the surface State
CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
renderHal,
bindingTable,
btIndex + i,
surfaceEntries[i]));
}
state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex = btIndex;
state->bti3DIndexTable[surfIndex].nPlaneNumber = nSurfaceEntries;
// Get Offset to Current Binding Table
stateHeap = renderHal->pStateHeap;
offsetSrc = (stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize) + // Points to the Base of Current SSH Buffer Instance
(stateHeap->iBindingTableOffset) + // Moves the pointer to Base of Array of Binding Tables
(bindingTable * stateHeap->iBindingTableSize) + // Moves the pointer to a Particular Binding Table
(btIndex * sizeof(uint32_t)); // Move the pointer to correct entry
state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex = btIndex;
state->bti3DIndexTable[surfIndex].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
eStatus = MOS_STATUS_SUCCESS;
finish:
return eStatus;
}
//|-----------------------------------------------------------------------------
//| Purpose : Tag-based Synchronization on Resource
//| Input : state - Hal CM State
//| surface surface
//| isWrite - Write or Read
//| Returns : Result of the operation
//|-----------------------------------------------------------------------------
MOS_STATUS HalCm_SyncOnResource(
PCM_HAL_STATE state,
PMOS_SURFACE surface,
bool isWrite)
{
MOS_STATUS eStatus;
PMOS_INTERFACE osInterface;
eStatus = MOS_STATUS_SUCCESS;
osInterface = state->osInterface;
if (surface == nullptr || Mos_ResourceIsNull(&surface->OsResource))
{
CM_ASSERTMESSAGE("Input resource is not valid.");
eStatus = MOS_STATUS_UNKNOWN;
return eStatus;
}
osInterface->pfnSyncOnResource(
osInterface,
&(surface->OsResource),
state->osInterface->CurrentGpuContextOrdinal, //state->GpuContext,
isWrite);
// Sync Render Target with Overlay Context
if (surface->bOverlay)
{
osInterface->pfnSyncOnOverlayResource(
osInterface,
&(surface->OsResource),
state->osInterface->CurrentGpuContextOrdinal);
}
return eStatus;
}
//!
//! \brief Send Media Walker State
//! \details Send MEDIA_OBJECT_WALKER command
//! \param PCM_HAL_STATE state
//! [in] Pointer to CM_HAL_STATE Structure
//! \param PRENDERHAL_INTERFACE renderHal
//! [in] Pointer to Hardware Interface Structure
//! \param PMOS_COMMAND_BUFFER cmdBuffer
//! [in] Pointer to Command Buffer
//! \return MOS_STATUS
//!
MOS_STATUS HalCm_SendMediaWalkerState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
PMOS_COMMAND_BUFFER cmdBuffer)
{
PRENDERHAL_INTERFACE renderHal;
MHW_WALKER_PARAMS mediaWalkerParams;
MOS_STATUS eStatus;
eStatus = MOS_STATUS_SUCCESS;
renderHal = state->renderHal;
MOS_SecureMemcpy(&mediaWalkerParams, sizeof(MHW_WALKER_PARAMS), &kernelParam->walkerParams, sizeof(CM_HAL_WALKER_PARAMS));
if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth)
{
//per-kernel thread space is set, need use its own dependency mask
mediaWalkerParams.UseScoreboard = renderHal->VfeScoreboard.ScoreboardEnable;
mediaWalkerParams.ScoreboardMask = kernelParam->kernelThreadSpaceParam.globalDependencyMask;
}
else
{
//No per-kernel thread space setting, need use per-task depedency mask
mediaWalkerParams.UseScoreboard = renderHal->VfeScoreboard.ScoreboardEnable;
mediaWalkerParams.ScoreboardMask = renderHal->VfeScoreboard.ScoreboardMask;
}
eStatus = renderHal->pMhwRenderInterface->AddMediaObjectWalkerCmd(
cmdBuffer, &mediaWalkerParams);
return eStatus;
}
//!
//! \brief Send GpGpu Walker State
//! \details Send GPGPU_WALKER state
//! \param PCM_HAL_STATE state
//! [in] Pointer to CM_HAL_STATE Structure
//! \param PRENDERHAL_INTERFACE renderHal
//! [in] Pointer to Hardware Interface Structure
//! \param PMOS_COMMAND_BUFFER cmdBuffer
//! [in] Pointer to Command Buffer
//! \return MOS_STATUS
//!
MOS_STATUS HalCm_SendGpGpuWalkerState(
PCM_HAL_STATE state,
PCM_HAL_KERNEL_PARAM kernelParam,
PMOS_COMMAND_BUFFER cmdBuffer)
{
MhwRenderInterface *mhwRender;
MHW_GPGPU_WALKER_PARAMS gpGpuWalkerParams;
MOS_STATUS eStatus;
eStatus = MOS_STATUS_SUCCESS;
mhwRender = state->renderHal->pMhwRenderInterface;
gpGpuWalkerParams.InterfaceDescriptorOffset = kernelParam->gpgpuWalkerParams.interfaceDescriptorOffset;
gpGpuWalkerParams.GpGpuEnable = kernelParam->gpgpuWalkerParams.gpgpuEnabled;
gpGpuWalkerParams.GroupWidth = kernelParam->gpgpuWalkerParams.groupWidth;
gpGpuWalkerParams.GroupHeight = kernelParam->gpgpuWalkerParams.groupHeight;
gpGpuWalkerParams.GroupDepth = kernelParam->gpgpuWalkerParams.groupDepth;
gpGpuWalkerParams.ThreadWidth = kernelParam->gpgpuWalkerParams.threadWidth;
gpGpuWalkerParams.ThreadHeight = kernelParam->gpgpuWalkerParams.threadHeight;
gpGpuWalkerParams.ThreadDepth = kernelParam->gpgpuWalkerParams.threadDepth;
gpGpuWalkerParams.SLMSize = kernelParam->slmSize;
eStatus = mhwRender->AddGpGpuWalkerStateCmd(cmdBuffer, &gpGpuWalkerParams);
return eStatus;
}
//!
//! \brief surface Format Convert
//! \details Convert RENDERHAL_SURFACE to MHW_VEBOX_SURFACE
//! \param PRENDERHAL_SURFACE renderHalSurface
//! [in] Pointer to RENDERHAL_SURFACE Structure
//! \param PMHW_VEBOX_SURFACE_PARAMS mhwVeboxSurface
//! [in] Pointer to PMHW_VEBOX_SURFACE_PARAMS
//! \return MOS_STATUS
//!
MOS_STATUS HalCm_Convert_RENDERHAL_SURFACE_To_MHW_VEBOX_SURFACE(
PRENDERHAL_SURFACE renderHalSurface,
PMHW_VEBOX_SURFACE_PARAMS mhwVeboxSurface)
{
PMOS_SURFACE surface;
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHalSurface);
CM_CHK_NULL_GOTOFINISH_MOSERROR(mhwVeboxSurface);
surface = &renderHalSurface->OsSurface;
mhwVeboxSurface->Format = surface->Format;
mhwVeboxSurface->dwWidth = surface->dwWidth;
mhwVeboxSurface->dwHeight = surface->dwHeight;
mhwVeboxSurface->dwPitch = surface->dwPitch;
if (surface->dwPitch > 0)
{
mhwVeboxSurface->dwUYoffset = ((surface->UPlaneOffset.iSurfaceOffset - surface->YPlaneOffset.iSurfaceOffset) / surface->dwPitch)
+ surface->UPlaneOffset.iYOffset;
}
mhwVeboxSurface->TileType = surface->TileType;
mhwVeboxSurface->rcMaxSrc = renderHalSurface->rcMaxSrc;
mhwVeboxSurface->pOsResource = &surface->OsResource;
finish:
return eStatus;
}
//!
//! \brief Set Vtune Profiling Flag
//! \details Trun Vtune Profiling Flag On or off
//! \param PCM_HAL_STATE state
//! [in] Pointer to CM_HAL_STATE Structure
//! \return MOS_STATUS_SUCCESS
//!
MOS_STATUS HalCm_SetVtuneProfilingFlag(
PCM_HAL_STATE state,
bool vtuneOn)
{
state->vtuneProfilerOn = vtuneOn;
return MOS_STATUS_SUCCESS;
}
//*-----------------------------------------------------------------------------
//| Purpose: Get the offset for the Task Sync Location given the task ID
//| Returns: Sync Location
//*-----------------------------------------------------------------------------
int32_t HalCm_GetTaskSyncLocation(
PCM_HAL_STATE state,
int32_t taskId) // [in] Task ID
{
return (taskId * state->cmHalInterface->GetTimeStampResourceSize());
}
void HalCm_GetLegacyRenderHalL3Setting( CmHalL3Settings *l3SettingsPtr, RENDERHAL_L3_CACHE_SETTINGS *l3SettingsLegacyPtr )
{
*l3SettingsLegacyPtr = {};
l3SettingsLegacyPtr->bOverride = l3SettingsPtr->overrideSettings;
l3SettingsLegacyPtr->bEnableSLM = l3SettingsPtr->enableSlm;
l3SettingsLegacyPtr->bL3CachingEnabled = l3SettingsPtr->l3CachingEnabled;
l3SettingsLegacyPtr->bCntlRegOverride = l3SettingsPtr->cntlRegOverride;
l3SettingsLegacyPtr->bCntlReg2Override = l3SettingsPtr->cntlReg2Override;
l3SettingsLegacyPtr->bCntlReg3Override = l3SettingsPtr->cntlReg3Override;
l3SettingsLegacyPtr->bSqcReg1Override = l3SettingsPtr->sqcReg1Override;
l3SettingsLegacyPtr->bSqcReg4Override = l3SettingsPtr->sqcReg4Override;
l3SettingsLegacyPtr->bLra1RegOverride = l3SettingsPtr->lra1RegOverride;
l3SettingsLegacyPtr->dwCntlReg = l3SettingsPtr->cntlReg;
l3SettingsLegacyPtr->dwCntlReg2 = l3SettingsPtr->cntlReg2;
l3SettingsLegacyPtr->dwCntlReg3 = l3SettingsPtr->cntlReg3;
l3SettingsLegacyPtr->dwSqcReg1 = l3SettingsPtr->sqcReg1;
l3SettingsLegacyPtr->dwSqcReg4 = l3SettingsPtr->sqcReg4;
l3SettingsLegacyPtr->dwLra1Reg = l3SettingsPtr->lra1Reg;
return;
}
uint64_t HalCm_ConvertTicksToNanoSeconds(
PCM_HAL_STATE state,
uint64_t ticks)
{
if (state->tsFrequency == 0)
{
// if KMD doesn't report an valid value, fall back to default configs
return state->cmHalInterface->ConverTicksToNanoSecondsDefault(ticks);
}
return (ticks * 1000000000) / (state->tsFrequency);
}
//!
//! \brief Check GPU context
//! \details Check if the GPU context is valid for CM layer
//! \param MOS_GPU_CONTEXT gpuContext
//! [in] GPU Context ordinal
//! \return true/false
//!
bool HalCm_IsValidGpuContext(
MOS_GPU_CONTEXT gpuContext)
{
if( gpuContext == MOS_GPU_CONTEXT_RENDER3
|| gpuContext == MOS_GPU_CONTEXT_RENDER4
|| gpuContext == MOS_GPU_CONTEXT_CM_COMPUTE
|| gpuContext == MOS_GPU_CONTEXT_VEBOX)
{
return true;
}
else
{
CM_ASSERTMESSAGE("Invalid GPU context for CM.");
return false;
}
}