blob: d4fcf4ffe1f55dd172237505987ac38e38d77089 [file] [log] [blame]
/*
* Copyright (c) 2017-2019, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//!
//! \file codechal_vdenc_vp9_g12.cpp
//! \brief VP9 VDENC encoder for GEN12.
//!
#include "codechal_vdenc_vp9_g12.h"
#include "codechal_kernel_header_g12.h"
#include "codechal_kernel_hme_g12.h"
#include "codeckrnheader.h"
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
#include "igcodeckrn_g12.h"
#endif
#include "mhw_vdbox_hcp_g12_X.h"
#include "mhw_vdbox_vdenc_g12_X.h"
#include "mhw_vdbox_g12_X.h"
#include "mhw_vdbox_vdenc_hwcmd_g12_X.h"
#include "mhw_mi_g12_X.h"
#include "mhw_render_g12_X.h"
#include "codechal_mmc_encode_vp9_g12.h"
const uint32_t CodechalVdencVp9StateG12::meCurbeInit[48] =
{
0x00000000, 0x00200010, 0x00003939, 0x77a43000, 0x00000000, 0x28300000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
};
MOS_STATUS CodechalVdencVp9StateG12::UserFeatureKeyReport()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::UserFeatureKeyReport());
#if (_DEBUG || _RELEASE_INTERNAL)
CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_VDBOX_NUM_ID, m_numPipe, m_osInterface->pOsContext);
CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_VE_ID, m_useVirtualEngine, m_osInterface->pOsContext);
CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH, m_enableTileStitchByHW, m_osInterface->pOsContext);
CodecHalEncodeWriteKey(__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_SINGLE_PASS_DYS_ENABLE_ID, m_singlePassDys, m_osInterface->pOsContext);
#endif
return eStatus;
}
CodechalVdencVp9StateG12::CodechalVdencVp9StateG12(
CodechalHwInterface* hwInterface,
CodechalDebugInterface* debugInterface,
PCODECHAL_STANDARD_INFO standardInfo)
:CodechalVdencVp9State(hwInterface, debugInterface, standardInfo)
{
m_useCommonKernel = true;
m_isTilingSupported = true;
// We need the DYS kernel inside AllVP9Enc_CNLA0, for SHME we need kernels inside
// HME_DS_SCOREBOARD_KERNEL, so we need to allocate enough size in ISH for both.
uint8_t* binary = nullptr;
m_scalabilityState = nullptr;
uint32_t combinedKernelSize = 0;
pfnGetKernelHeaderAndSize = GetCommonKernelHeaderAndSizeG12;
m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_VP9_NUM_SYNC_TAGS;
m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_ENCODE_VP9_INIT_DSH_SIZE;
m_kuid = IDR_CODEC_AllVP9Enc;
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
if (m_useCommonKernel)
{
m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
eStatus = CodecHalGetKernelBinaryAndSize(
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
(uint8_t*)IGCODECKRN_G12,
#else
nullptr,
#endif
m_kuidCommon,
&binary,
&combinedKernelSize);
CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
m_hwInterface->GetStateHeapSettings()->dwIshSize +=
MOS_ALIGN_CEIL(combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
}
// Initialize to 0
MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer));
MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer));
MOS_ZeroMemory(&m_hcpScalabilitySyncBuffer, sizeof(m_hcpScalabilitySyncBuffer));
for (auto i = 0; i < m_numUncompressedSurface; i++)
{
MOS_ZeroMemory(&m_tileRecordBuffer[i].sResource, sizeof(m_tileRecordBuffer[i].sResource));
}
Mos_CheckVirtualEngineSupported(m_osInterface, false, true);
Mos_SetVirtualEngineSupported(m_osInterface, true);
for (auto i = 0; i < m_numUncompressedSurface; i++)
{
MOS_ZeroMemory(&m_tileStatsPakIntegrationBuffer[i].sResource, sizeof(m_tileStatsPakIntegrationBuffer[i].sResource));
}
MOS_ZeroMemory(&m_frameStatsPakIntegrationBuffer.sResource, sizeof(m_frameStatsPakIntegrationBuffer.sResource));
for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
{
for (auto j = 0; j < m_brcMaxNumPasses; j++)
{
MOS_ZeroMemory(&m_hucPakIntDmemBuffer[i][j], sizeof(m_hucPakIntDmemBuffer[i][j]));
}
}
MOS_ZeroMemory(&m_hucPakIntDummyBuffer, sizeof(m_hucPakIntDummyBuffer));
MOS_ZeroMemory(&m_hucPakIntBrcDataBuffer, sizeof(m_hucPakIntBrcDataBuffer));
for (auto i = 0; i < m_maxNumPipes; i++)
{
MOS_ZeroMemory(&m_stitchWaitSemaphoreMem[i], sizeof(m_stitchWaitSemaphoreMem[i]));
MOS_ZeroMemory(&m_hucDoneSemaphoreMem[i], sizeof(m_hucDoneSemaphoreMem[i]));
}
MOS_ZeroMemory(&m_pakIntDoneSemaphoreMem, sizeof(m_pakIntDoneSemaphoreMem));
}
CodechalVdencVp9StateG12::~CodechalVdencVp9StateG12()
{
CODECHAL_ENCODE_FUNCTION_ENTER;
if (m_scalabilityState)
{
MOS_FreeMemAndSetNull(m_scalabilityState);
}
//Note: virtual engine interface destroy is done in MOS layer
return;
}
// This is used only for DynamicScaling
MOS_STATUS CodechalVdencVp9StateG12::ExecuteDysPictureLevel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum");
auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex);
PerfTagSetting perfTag;
perfTag.Value = 0;
perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
perfTag.PictureCodingType = m_pictureCodingType;
m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
// We only need to update Huc PAK insert object and picture state for the first pass
if (IsFirstPass())
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(PakConstructPicStateBatchBuf(
&m_brcBuffers.resPicStateBrcWriteHucReadBuffer));
}
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
{
bool requestFrameTracking = false;
MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
forceWakeupParams.bMFXPowerWellControl = true;
forceWakeupParams.bMFXPowerWellControlMask = true;
forceWakeupParams.bHEVCPowerWellControl = true;
forceWakeupParams.bHEVCPowerWellControlMask = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(&cmdBuffer, &forceWakeupParams));
// Send command buffer header at the beginning (OS dependent)
// frame tracking tag is only added in the last command buffer header
requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
}
// Making sure ImgStatusCtrl is zeroed out before first PAK pass. HW supposedly does this before start of each frame. Remove this after confirming.
if (m_currPass == 0)
{
MHW_MI_LOAD_REGISTER_IMM_PARAMS miLoadRegImmParams;
MOS_ZeroMemory(&miLoadRegImmParams, sizeof(miLoadRegImmParams));
miLoadRegImmParams.dwData = 0;
miLoadRegImmParams.dwRegister = mmioRegisters->hcpVp9EncImageStatusCtrlRegOffset;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiLoadRegisterImmCmd(&cmdBuffer, &miLoadRegImmParams));
}
// Read Image status before running PAK, to get correct cumulative delta applied for final pass.
if (m_currPass != m_numPasses) // Don't read it for Repak
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadImageStatus(&cmdBuffer));
}
//updating the numberofpakpasses in encode staus buffer. should not update for repak.
if (m_currPass < m_numPasses)
{
uint32_t offset =
(m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) +
m_encodeStatusBuf.dwNumPassesOffset +
sizeof(uint32_t) * 2; // encode status doesn't start until 3rd DW
MHW_MI_STORE_DATA_PARAMS storeDataParams;
MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer;
storeDataParams.dwResourceOffset = offset;
storeDataParams.dwValue = m_currPass + 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
}
if (!m_currPass && m_osInterface->bTagResourceSync)
{
// This is a short term WA to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB
// which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning
// of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine
// as long as Dec/VP/Enc won't depend on this PAK so soon.
PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
m_osInterface,
globalGpuContextSyncTagBuffer));
CODECHAL_ENCODE_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
MHW_MI_STORE_DATA_PARAMS params;
params.pOsResource = globalGpuContextSyncTagBuffer;
params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
params.dwValue = (value > 0) ? (value - 1) : 0;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiStoreDataImmCmd(&cmdBuffer, &params));
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
//Send VD_CONTROL_STATE Pipe Initialization
MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
vdCtrlParam.initialization = true;
MhwMiInterfaceG12* miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
// set HCP_PIPE_MODE_SELECT values
PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
SetHcpPipeModeSelectParams(*pipeModeSelectParams);
pipeModeSelectParams->Mode = m_mode;
pipeModeSelectParams->bStreamOutEnabled = false;
pipeModeSelectParams->bVdencEnabled = false;
pipeModeSelectParams->ChromaType = m_vp9SeqParams->SeqFlags.fields.EncodedFormat;
pipeModeSelectParams->bDynamicScalingEnabled = m_dysRefFrameFlags && !m_dysVdencMultiPassEnabled;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams));
// set HCP_SURFACE_STATE values
MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
{
MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
surfaceParams[i].Mode = m_mode;
surfaceParams[i].ucSurfaceStateId = i;
surfaceParams[i].ChromaType = m_outputChromaFormat;
switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
{
case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
{
surfaceParams[i].ucBitDepthChromaMinus8 = 2;
surfaceParams[i].ucBitDepthLumaMinus8 = 2;
break;
}
default:
{
surfaceParams[i].ucBitDepthChromaMinus8 = 0;
surfaceParams[i].ucBitDepthLumaMinus8 = 0;
break;
}
}
}
// For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
PMOS_SURFACE refSurface[3];
for (auto i = 0; i < 3; i++)
{
refSurface[i] = nullptr;
}
if (m_pictureCodingType != I_TYPE)
{
uint8_t refPicIndex;
if (m_refFrameFlags & 0x01)
{
refPicIndex = m_vp9PicParams->RefFlags.fields.LastRefIdx;
CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])));
refSurface[0] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
}
if (m_refFrameFlags & 0x02)
{
refPicIndex = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])));
refSurface[1] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
}
if (m_refFrameFlags & 0x04)
{
refPicIndex = m_vp9PicParams->RefFlags.fields.AltRefIdx;
CODECHAL_ENCODE_ASSERT((refPicIndex < CODEC_VP9_NUM_REF_FRAMES) && (!CodecHal_PictureIsInvalid(m_vp9PicParams->RefFrameList[refPicIndex])))
refSurface[2] = &(m_refList[m_vp9PicParams->RefFrameList[refPicIndex].FrameIdx]->sRefBuffer);
}
if (!refSurface[0])
{
refSurface[0] = (refSurface[1]) ? refSurface[1] : refSurface[2];
}
if (!refSurface[1])
{
refSurface[1] = (refSurface[0]) ? refSurface[0] : refSurface[2];
}
if (!refSurface[2])
{
refSurface[2] = (refSurface[0]) ? refSurface[0] : refSurface[1];
}
// Program Surface params for Last/Golen/Alt Reference surface
surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].psSurface = refSurface[0];
surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].psSurface = refSurface[1];
surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].psSurface = refSurface[2];
surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[0] ? refSurface[0]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[1] ? refSurface[1]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].dwReconSurfHeight = MOS_ALIGN_CEIL((refSurface[2] ? refSurface[2]->dwHeight : 0), CODEC_VP9_MIN_BLOCK_WIDTH);
}
// recon
surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID].psSurface = &m_reconSurface;
surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID].dwReconSurfHeight = m_rawSurfaceToPak->dwHeight;
// raw
surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].psSurface = m_rawSurfaceToPak;
surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].bDisplayFormatSwizzle = m_vp9SeqParams->SeqFlags.fields.DisplayFormatSwizzle;
surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].dwActualWidth = MOS_ALIGN_CEIL(m_oriFrameWidth, CODEC_VP9_MIN_BLOCK_WIDTH);
surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID].dwActualHeight = MOS_ALIGN_CEIL(m_oriFrameHeight, CODEC_VP9_MIN_BLOCK_WIDTH);
// Decoded picture
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
// Source input
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
// Last reference picture
if (refSurface[0])
{
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]));
}
// Golden reference picture
if (refSurface[1])
{
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]));
}
// Alt reference picture
if (refSurface[2])
{
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]));
}
// set HCP_PIPE_BUF_ADDR_STATE values
PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
if (pipeBufAddrParams)
{
pipeBufAddrParams->Mode = m_mode;
pipeBufAddrParams->psPreDeblockSurface = &m_reconSurface;
pipeBufAddrParams->psPostDeblockSurface = &m_reconSurface;
pipeBufAddrParams->psRawSurface = m_rawSurfaceToPak;
pipeBufAddrParams->presStreamOutBuffer = nullptr;
pipeBufAddrParams->presMfdDeblockingFilterRowStoreScratchBuffer =
&m_resDeblockingFilterLineBuffer;
pipeBufAddrParams->presDeblockingFilterTileRowStoreScratchBuffer =
&m_resDeblockingFilterTileLineBuffer;
pipeBufAddrParams->presDeblockingFilterColumnRowStoreScratchBuffer =
&m_resDeblockingFilterTileColumnBuffer;
pipeBufAddrParams->presMetadataLineBuffer = &m_resMetadataLineBuffer;
pipeBufAddrParams->presMetadataTileLineBuffer = &m_resMetadataTileLineBuffer;
pipeBufAddrParams->presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
pipeBufAddrParams->presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams));
#endif
//Huc is disabled for ref frame scaling, use input region
uint8_t frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
pipeBufAddrParams->presVp9ProbBuffer = &m_resProbBuffer[frameCtxIdx];
pipeBufAddrParams->presVp9SegmentIdBuffer = &m_resSegmentIdBuffer;
if (m_pictureCodingType != I_TYPE)
{
for (auto i = 0; i < 3; i++)
{
CODECHAL_ENCODE_CHK_NULL_RETURN(refSurface[i]);
pipeBufAddrParams->presReferences[i] = &refSurface[i]->OsResource;
}
pipeBufAddrParams->presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
}
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams));
MOS_Delete(pipeBufAddrParams);
}
// set HCP_IND_OBJ_BASE_ADDR_STATE values
MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams));
indObjBaseAddrParams.Mode = m_mode;
indObjBaseAddrParams.presMvObjectBuffer = &m_resMbCodeSurface;
indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset;
indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset;
indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer;
indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound;
indObjBaseAddrParams.presProbabilityDeltaBuffer = &m_resProbabilityDeltaBuffer;
indObjBaseAddrParams.dwProbabilityDeltaSize = 29 * CODECHAL_CACHELINE_SIZE;
indObjBaseAddrParams.presCompressedHeaderBuffer = &m_resCompressedHeaderBuffer;
indObjBaseAddrParams.dwCompressedHeaderSize = 32 * CODECHAL_CACHELINE_SIZE;
indObjBaseAddrParams.presProbabilityCounterBuffer = &m_resProbabilityCounterBuffer;
indObjBaseAddrParams.dwProbabilityCounterSize = 193 * CODECHAL_CACHELINE_SIZE;
indObjBaseAddrParams.presTileRecordBuffer = &m_resTileRecordStrmOutBuffer;
indObjBaseAddrParams.dwTileRecordSize = m_picSizeInSb * CODECHAL_CACHELINE_SIZE;
indObjBaseAddrParams.presCuStatsBuffer = &m_resCuStatsStrmOutBuffer;
indObjBaseAddrParams.dwCuStatsSize = MOS_ALIGN_CEIL(m_picSizeInSb * 64 * 8, CODECHAL_CACHELINE_SIZE);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
// Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
MHW_BATCH_BUFFER secondLevelBatchBuffer;
MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
secondLevelBatchBuffer.dwOffset = (m_numPasses > 0) ? CODECHAL_ENCODE_VP9_PIC_STATE_BUFFER_SIZE_PER_PASS * (m_currPass % m_numPasses) : 0;
secondLevelBatchBuffer.bSecondLevel = true;
//As Huc is disabled for Ref frame scaling, use the ReadBuffer
secondLevelBatchBuffer.OsResource = m_brcBuffers.resPicStateBrcWriteHucReadBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
&cmdBuffer,
&secondLevelBatchBuffer));
// HCP_VP9_SEGMENT_STATE
uint8_t segmentCount = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
MOS_ZeroMemory(&segmentState, sizeof(segmentState));
segmentState.Mode = m_mode;
segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
segmentState.ucQPIndexLumaAC = m_vp9PicParams->LumaACQIndex;
// For BRC with segmentation, seg state commands for PAK are copied from BRC seg state buffer
// For CQP or BRC with no segmentation, PAK still needs seg state commands and driver prepares those commands.
segmentState.pbSegStateBufferPtr = nullptr; // Set this to nullptr, for commands to be prepared by driver
segmentState.pcucLfQpLookup = &LF_VALUE_QP_LOOKUP[0];
for (uint8_t i = 0; i < segmentCount; i++)
{
segmentState.ucCurrentSegmentId = i;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpVp9SegmentStateCmd(&cmdBuffer, nullptr, &segmentState));
}
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ExecuteDysSliceLevel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(m_nalUnitParams);
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
if (!m_singleTaskPhaseSupported)
{
PerfTagSetting perfTag;
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
}
MHW_BATCH_BUFFER secondLevelBatchBuffer;
MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
secondLevelBatchBuffer.dwOffset = 0;
secondLevelBatchBuffer.bSecondLevel = true;
if (!m_hucEnabled)
{
secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
}
else
{
secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
&cmdBuffer,
&secondLevelBatchBuffer));
// Setup Tile level PAK commands
CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams);
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9StateG12::SetTileData());
CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[0]));
//Reset Frame Tracking header for this submission as this is not the last submission
bool isFrameTrackingHeaderSet = cmdBuffer.Attributes.bEnableMediaFrameTracking;
cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(MHW_BATCH_BUFFER));
secondLevelBatchBuffer.OsResource = m_resMbCodeSurface;
secondLevelBatchBuffer.dwOffset = 0;
secondLevelBatchBuffer.bSecondLevel = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &secondLevelBatchBuffer));
MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
// MFXPipeDone should not be set for tail insertion
vdPipelineFlushParams.Flags.bWaitDoneMFX =
(m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
vdPipelineFlushParams.Flags.bFlushHEVC = 1;
vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
MHW_MI_FLUSH_DW_PARAMS flushDwParams;
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
if (!m_scalableMode)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
}
if (m_currPass >= (m_numPasses - 1)) // Last pass and the one before last
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
}
std::string currPassName = "PAK_PASS_DYS" + std::to_string((int)m_currPass);
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
CODECHAL_NUM_MEDIA_STATES,
currPassName.data())));
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
if (m_waitForEnc &&
!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
{
MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
syncParams.GpuContext = m_videoContext;
syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
m_waitForEnc = false;
}
if (m_currPass >= (m_numPasses - 1)) // Last pass and the one before last
{
bool renderFlags;
renderFlags = m_videoContextUsesNullHw;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
cmdBuffer.Attributes.bEnableMediaFrameTracking = isFrameTrackingHeaderSet;
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
CODECHAL_DEBUG_TOOL(
if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
//CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
//m_debugInterface->DumpBuffer(
// (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
// CodechalDbgAttr::attrOutput,
// "SegMap_Out",
// CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
// 0,
// CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
} if (m_mmcState) {
m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
});
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::InitKernelStates()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
m_kernelBase = (uint8_t*)IGCODECKRN_G12;
#endif
// KUID for HME + DS + SW SCOREBOARD Kernel
m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL;
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
// DYS
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateDys());
// SHME
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
#endif
return eStatus;
}
uint32_t CodechalVdencVp9StateG12::GetMaxBtCount()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
uint32_t maxBtCount = 0;
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
if (m_hmeSupported)
{
uint32_t scalingBtCount = 0;
uint32_t numKernelsToLoad = m_interlacedFieldDisabled ? 1 : CODEC_NUM_FIELDS_PER_FRAME;
uint16_t btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
{
scalingBtCount += MOS_ALIGN_CEIL(
m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
btIdxAlignment);
}
uint32_t meBtCount = 0;
// 4xME + Streamin kernel btcount
meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_VDENC].KernelParams.iBTCount, btIdxAlignment);
//16xME streamin kernel count added to ME count and scaling kernel 16x added to scaling count
if (m_16xMeSupported)
{
meBtCount += MOS_ALIGN_CEIL(m_meKernelStates[CODECHAL_ENCODE_ME_IDX_P].KernelParams.iBTCount, btIdxAlignment);
for (uint32_t krnStateIdx = 0; krnStateIdx < numKernelsToLoad; krnStateIdx++)
{
scalingBtCount += MOS_ALIGN_CEIL(
m_scaling4xKernelStates[krnStateIdx].KernelParams.iBTCount,
btIdxAlignment);
}
}
maxBtCount = scalingBtCount + meBtCount;
}
#endif
return maxBtCount;
}
// DYS kernel state init
MOS_STATUS CodechalVdencVp9StateG12::InitKernelStateDys()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
uint32_t combinedKernelSize = 0;
uint8_t* binary = nullptr;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
(uint8_t*)IGCODECKRN_G12,
m_kuidCommon,
&binary,
&combinedKernelSize));
uint32_t kernelSize = combinedKernelSize;
CODECHAL_KERNEL_HEADER currKrnHeader;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
binary,
ENC_DYS,
0,
&currKrnHeader,
&kernelSize));
PMHW_KERNEL_STATE kernelState = &m_dysKernelState;
kernelState->KernelParams.iBTCount = MOS_ALIGN_CEIL(m_dysNumSurfaces, m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment());
kernelState->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(m_dysStaticDataSize, m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;// just assign this to 16, the block resolution for the kernel is decided when its launched depending on the std.
kernelState->KernelParams.iIdCount = 1;
kernelState->KernelParams.iSamplerCount = 1;
kernelState->KernelParams.iSamplerLength = m_stateHeapInterface->pStateHeapInterface->GetSizeofSamplerStateAvs();
kernelState->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
kernelState->dwSamplerOffset = MOS_ALIGN_CEIL(kernelState->dwCurbeOffset + kernelState->KernelParams.iCurbeLength, MHW_SAMPLER_STATE_AVS_ALIGN_G9);
kernelState->KernelParams.pBinary =
binary +
(currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
kernelState->KernelParams.iSize = kernelSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
m_stateHeapInterface,
kernelState->KernelParams.iBTCount,
&kernelState->dwSshSize,
&kernelState->dwBindingTableSize));
m_dysDshSize = kernelState->dwSamplerOffset +
MOS_ALIGN_CEIL(kernelState->KernelParams.iSamplerLength * kernelState->KernelParams.iSamplerCount, MHW_SAMPLER_STATE_AVS_ALIGN);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelState));
#endif
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetupSegmentationStreamIn()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if (!m_segmentMapProvided && !m_hmeEnabled) // If we're not going to use the streamin surface leave now
{
return eStatus;
}
MOS_LOCK_PARAMS lockFlagsWriteOnly;
MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsWriteOnly.WriteOnly = 1;
MOS_LOCK_PARAMS lockFlagsReadOnly;
MOS_ZeroMemory(&lockFlagsReadOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsReadOnly.ReadOnly = 1;
mhw_vdbox_vdenc_g12_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *
streamIn = (mhw_vdbox_vdenc_g12_X::VDENC_HEVC_VP9_STREAMIN_STATE_CMD *)m_osInterface->pfnLockResource(
m_osInterface,
&m_resVdencStreamInBuffer[m_currRecycledBufIdx],
&lockFlagsWriteOnly);
CODECHAL_ENCODE_CHK_NULL_RETURN(streamIn);
// align to cache line size is OK since streamin state is padded to cacheline size - HW uses cacheline size to read, not command size
uint32_t blockWidth = MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
uint32_t blockHeight = MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
MOS_ZeroMemory(streamIn, streamInSize);
// If segment map isn't provided then we unlock surface and exit function here.
// Reason why check isn't done before function call is to take advantage of the fact that
// we need the surface locked here if seg map is provided and we want it 0'd either way.
// This saves us from doing 2 locks on this buffer per frame.
if (!m_segmentMapProvided)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
m_osInterface,
&m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
return eStatus;
}
char *data = (char *)m_osInterface->pfnLockResource(
m_osInterface,
&m_mbSegmentMapSurface.OsResource,
&lockFlagsReadOnly);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
// Rasterization is done within a tile and then for each tile within the frame in raster order.
if (m_isTilingSupported)
{
uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
uint32_t numTiles = numTileColumns * numTileRows;
uint32_t currTileStartX64Aligned = 0, dwCurrTileStartY64Aligned = 0; //Set tile Y coordinate 0
m_32BlocksRasterized = 0; //Count of rasterized blocks for this frame
uint32_t tileX = 0;
uint32_t tileY = 0;
for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
{
tileX = tileIdx % numTileColumns; //Current tile column position
tileY = tileIdx / numTileColumns; //Current tile row position
currTileStartX64Aligned = ((tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
dwCurrTileStartY64Aligned = ((tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;
uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) *
CODEC_VP9_SUPER_BLOCK_WIDTH) -
currTileStartX64Aligned;
uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) *
CODEC_VP9_SUPER_BLOCK_HEIGHT) -
dwCurrTileStartY64Aligned;
// last tile col raw width and raw height not necessarily 64 aligned, use this length to duplicate values from segmap for empty padding blocks in last tiles.
uint32_t lastTileColWidth = (tileX == (numTileColumns - 1)) ? (m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (m_frameHeight - dwCurrTileStartY64Aligned) : tileHeight64Aligned;
uint32_t tileWidth = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;
// Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
// which was processed from this frame or previous,
// or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
if (!m_mapBuffer ||
tileHeight != m_segStreamInHeight ||
tileWidth != m_segStreamInWidth ||
numTileColumns != m_tileParams[tileIdx].NumOfTileColumnsInFrame ||
m_tileParams[tileIdx].NumOfTilesInFrame != numTiles)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(tileHeight,
tileWidth,
dwCurrTileStartY64Aligned,
currTileStartX64Aligned));
}
m_tileParams[tileIdx].NumOfTileColumnsInFrame = numTileColumns;
m_tileParams[tileIdx].NumOfTilesInFrame = numTiles;
}
}
uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
if (GetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
{
//application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer
//driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
dwPitch = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
}
// set seg ID's of streamin states
for (uint32_t i = 0; i < blockHeight * blockWidth; ++i)
{
uint32_t addrOffset = CalculateBufferOffset(
m_mapBuffer[i],
m_frameWidth,
m_vp9PicParams->PicFlags.fields.seg_id_block_size,
dwPitch);
uint32_t segId = *(data + addrOffset);
streamIn[i].DW7.SegidEnable = 1;
streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);
// TU functions copied from there.
streamIn[i].DW0.Maxtusize = 3;
streamIn[i].DW0.Maxcusize = 3;
// For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
if ((i % 4) == 3 && m_pictureCodingType == P_TYPE)
{
if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
{
streamIn[i - 3].DW0.Maxcusize = streamIn[i - 2].DW0.Maxcusize = streamIn[i - 1].DW0.Maxcusize = streamIn[i].DW0.Maxcusize = 2;
}
}
streamIn[i].DW0.Numimepredictors = CODECHAL_VDENC_NUMIMEPREDICTORS;
switch (m_vp9SeqParams->TargetUsage)
{
case 1: // Quality mode
case 2:
case 4: // Normal mode
streamIn[i].DW6.Nummergecandidatecu8X8 = 1;
streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
streamIn[i].DW6.Nummergecandidatecu32X32 = 3;
streamIn[i].DW6.Nummergecandidatecu64X64 = 4;
break;
case 7: // Speed mode
streamIn[i].DW0.Numimepredictors = CODECHAL_VDENC_NUMIMEPREDICTORS_SPEED;
streamIn[i].DW6.Nummergecandidatecu8X8 = 0;
streamIn[i].DW6.Nummergecandidatecu16X16 = 2;
streamIn[i].DW6.Nummergecandidatecu32X32 = 2;
streamIn[i].DW6.Nummergecandidatecu64X64 = 2;
break;
default:
MHW_ASSERTMESSAGE("Invalid TU provided!");
return MOS_STATUS_INVALID_PARAMETER;
}
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
m_osInterface,
&m_mbSegmentMapSurface.OsResource));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
m_osInterface,
&m_resVdencStreamInBuffer[m_currRecycledBufIdx]));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::GetSystemPipeNumberCommon()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
MOS_STATUS statusKey = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
MOS_USER_FEATURE_VALUE_DATA userFeatureData;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
statusKey = MOS_UserFeature_ReadValue_ID(
NULL,
__MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY,
&userFeatureData,
m_osInterface->pOsContext);
bool disableScalability = m_hwInterface->IsDisableScalability();
if (statusKey == MOS_STATUS_SUCCESS)
{
disableScalability = userFeatureData.i32Data ? true : false;
}
MEDIA_SYSTEM_INFO *gtSystemInfo = m_gtSystemInfo;
if (gtSystemInfo && disableScalability == false)
{
// Both VE mode and media solo mode should be able to get the VDBOX number via the same interface
m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled);
}
else
{
m_numVdbox = 1;
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::InitKernelStateMe()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderEngineInterface->GetHwCaps());
uint32_t combinedKernelSize = 0;
uint8_t* binary = nullptr;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
m_kernelBase,
m_kuidCommon,
&binary,
&combinedKernelSize));
for (uint32_t krnStateIdx = 0; krnStateIdx < 2; krnStateIdx++)
{
CODECHAL_KERNEL_HEADER currKrnHeader;
PMHW_KERNEL_STATE kernelStatePtr = &m_meKernelStates[krnStateIdx];
uint32_t kernelSize = combinedKernelSize;
EncOperation encOperation = (krnStateIdx > 0 && m_vdencEnabled) ?
(m_useNonLegacyStreamin ? VDENC_STREAMIN_HEVC : VDENC_ME) : ENC_ME;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommonKernelHeaderAndSizeG12(
binary,
encOperation,
(encOperation != ENC_ME) ? 0 : krnStateIdx,
&currKrnHeader,
&kernelSize));
kernelStatePtr->KernelParams.iBTCount = CODECHAL_ENCODE_ME_NUM_SURFACES_G12;
kernelStatePtr->KernelParams.iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
kernelStatePtr->KernelParams.iCurbeLength = sizeof(MeCurbe);
kernelStatePtr->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
kernelStatePtr->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
kernelStatePtr->KernelParams.iIdCount = 1;
kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
kernelStatePtr->KernelParams.pBinary = binary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
kernelStatePtr->KernelParams.iSize = kernelSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
m_stateHeapInterface,
kernelStatePtr->KernelParams.iBTCount,
&kernelStatePtr->dwSshSize,
&kernelStatePtr->dwBindingTableSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
if (m_noMeKernelForPFrame)
{
m_meKernelStates[1] = m_meKernelStates[0];
break;
}
}
// Until a better way can be found, maintain old binding table structures
MeKernelBindingTable* bindingTable = &m_meBindingTable;
bindingTable->dwMEMVDataSurface = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G12;
bindingTable->dw16xMEMVDataSurface = CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G12;
bindingTable->dw32xMEMVDataSurface = CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G12;
bindingTable->dwMEDist = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G12;
bindingTable->dwMEBRCDist = CODECHAL_ENCODE_ME_BRC_DISTORTION_G12;
bindingTable->dwMECurrForFwdRef = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G12;
bindingTable->dwMEFwdRefPicIdx[0] = CODECHAL_ENCODE_ME_FWD_REF_IDX0_G12;
bindingTable->dwMEFwdRefPicIdx[1] = CODECHAL_ENCODE_ME_FWD_REF_IDX1_G12;
bindingTable->dwMEFwdRefPicIdx[2] = CODECHAL_ENCODE_ME_FWD_REF_IDX2_G12;
bindingTable->dwMEFwdRefPicIdx[3] = CODECHAL_ENCODE_ME_FWD_REF_IDX3_G12;
bindingTable->dwMEFwdRefPicIdx[4] = CODECHAL_ENCODE_ME_FWD_REF_IDX4_G12;
bindingTable->dwMEFwdRefPicIdx[5] = CODECHAL_ENCODE_ME_FWD_REF_IDX5_G12;
bindingTable->dwMEFwdRefPicIdx[6] = CODECHAL_ENCODE_ME_FWD_REF_IDX6_G12;
bindingTable->dwMEFwdRefPicIdx[7] = CODECHAL_ENCODE_ME_FWD_REF_IDX7_G12;
bindingTable->dwMECurrForBwdRef = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G12;
bindingTable->dwMEBwdRefPicIdx[0] = CODECHAL_ENCODE_ME_BWD_REF_IDX0_G12;
bindingTable->dwMEBwdRefPicIdx[1] = CODECHAL_ENCODE_ME_BWD_REF_IDX1_G12;
bindingTable->dwVdencStreamInSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G12;
bindingTable->dwVdencStreamInInputSurface = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G12;
#endif
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetCurbeMe(
MeCurbeParams* params)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(params);
CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
CODECHAL_ENCODE_ASSERT(params->TargetUsage <= NUM_TARGET_USAGE_MODES);
uint32_t scaleFactor = 0;
bool useMvFromPrevStep = false, writeDistortions = false;
uint8_t mvShiftFactor = 0, prevMvReadPosFactor = 0;
bool framePicture = CodecHal_PictureIsFrame(params->CurrOriginalPic);
char qpPrimeY = (params->pic_init_qp_minus26 + 26) + params->slice_qp_delta;
switch (params->hmeLvl)
{
case HME_LEVEL_32x:
useMvFromPrevStep = m_hmeFirstStep;
writeDistortions = false;
scaleFactor = SCALE_FACTOR_32x;
mvShiftFactor = m_mvShiftFactor32x;
break;
case HME_LEVEL_16x:
useMvFromPrevStep = (params->b32xMeEnabled) ? m_hmeFollowingStep : m_hmeFirstStep;
writeDistortions = false;
scaleFactor = SCALE_FACTOR_16x;
mvShiftFactor = m_mvShiftFactor16x;
prevMvReadPosFactor = m_prevMvReadPosition16x;
break;
case HME_LEVEL_4x:
useMvFromPrevStep = (params->b16xMeEnabled) ? m_hmeFollowingStep : m_hmeFirstStep;
writeDistortions = true;
scaleFactor = SCALE_FACTOR_4x;
mvShiftFactor = m_mvShiftFactor4x;
prevMvReadPosFactor = m_prevMvReadPosition4x;
break;
default:
return MOS_STATUS_INVALID_PARAMETER;
}
MeCurbe cmd;
CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(
&cmd,
sizeof(MeCurbe),
meCurbeInit,
sizeof(MeCurbe)));
cmd.DW3.SubPelMode = 3;
if (m_fieldScalingOutputInterleaved)
{
cmd.DW3.SrcAccess =
cmd.DW3.RefAccess = CodecHal_PictureIsField(params->CurrOriginalPic) ? 1 : 0;
cmd.DW7.SrcFieldPolarity = CodecHal_PictureIsBottomField(params->CurrOriginalPic) ? 1 : 0;
}
cmd.DW4.PictureHeightMinus1 = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scaleFactor) - 1;
cmd.DW4.PictureWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth / scaleFactor);
cmd.DW5.QpPrimeY = qpPrimeY;
cmd.DW6.WriteDistortions = writeDistortions;
cmd.DW6.UseMvFromPrevStep = useMvFromPrevStep;
cmd.DW6.SuperCombineDist = m_superCombineDistGeneric[params->TargetUsage];
cmd.DW6.MaxVmvR = (framePicture) ?
params->MaxMvLen * 4 : (params->MaxMvLen >> 1) * 4;
if (m_pictureCodingType == B_TYPE)
{
// This field is irrelevant since we are not using the bi-direct search.
cmd.DW1.BiWeight = 32;
cmd.DW13.NumRefIdxL1MinusOne = params->num_ref_idx_l1_active_minus1;
}
if (m_pictureCodingType == P_TYPE ||
m_pictureCodingType == B_TYPE)
{
if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
{
cmd.DW30.ActualMBHeight = m_frameHeight;
cmd.DW30.ActualMBWidth = m_frameWidth;
}
else if (m_vdencEnabled && m_16xMeSupported)
{
cmd.DW30.ActualMBHeight = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
cmd.DW30.ActualMBWidth = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameWidth);
}
cmd.DW13.NumRefIdxL0MinusOne =
params->num_ref_idx_l0_active_minus1;
}
cmd.DW13.RefStreaminCost = 5;
// This flag is to indicate the ROI source type instead of indicating ROI is enabled or not
cmd.DW13.ROIEnable = 0;
if (!framePicture)
{
if (m_pictureCodingType != I_TYPE)
{
cmd.DW14.List0RefID0FieldParity = params->List0RefID0FieldParity;
cmd.DW14.List0RefID1FieldParity = params->List0RefID1FieldParity;
cmd.DW14.List0RefID2FieldParity = params->List0RefID2FieldParity;
cmd.DW14.List0RefID3FieldParity = params->List0RefID3FieldParity;
cmd.DW14.List0RefID4FieldParity = params->List0RefID4FieldParity;
cmd.DW14.List0RefID5FieldParity = params->List0RefID5FieldParity;
cmd.DW14.List0RefID6FieldParity = params->List0RefID6FieldParity;
cmd.DW14.List0RefID7FieldParity = params->List0RefID7FieldParity;
}
if (m_pictureCodingType == B_TYPE)
{
cmd.DW14.List1RefID0FieldParity = params->List1RefID0FieldParity;
cmd.DW14.List1RefID1FieldParity = params->List1RefID1FieldParity;
}
}
cmd.DW15.MvShiftFactor = mvShiftFactor;
cmd.DW15.PrevMvReadPosFactor = prevMvReadPosFactor;
// r3 & r4
uint8_t targetUsage = params->TargetUsage;
uint8_t meMethod = 0;
if (m_pictureCodingType == B_TYPE)
{
meMethod = params->pBMEMethodTable ?
params->pBMEMethodTable[targetUsage]
: m_bMeMethodGeneric[targetUsage];
}
else
{
meMethod = params->pMEMethodTable ?
params->pMEMethodTable[targetUsage]
: m_meMethodGeneric[targetUsage];
}
uint8_t tableIdx = (m_pictureCodingType == B_TYPE) ? 1 : 0;
eStatus = MOS_SecureMemcpy(&(cmd.SPDelta), 14 * sizeof(uint32_t), m_encodeSearchPath[tableIdx][meMethod], 14 * sizeof(uint32_t));
if (eStatus != MOS_STATUS_SUCCESS)
{
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory.");
return eStatus;
}
// Non legacy stream in is for hevc vp9 streamin kernel
if (params->hmeLvl == HME_LEVEL_4x && m_useNonLegacyStreamin)
{
//StreamIn CURBE
cmd.DW6.LCUSize = 1;//Only LCU64 supported by the VDEnc HW
cmd.DW6.InputStreamInSurfaceEnable = params->segmapProvided;
cmd.DW31.MaxCuSize = 3;
cmd.DW31.MaxTuSize = 3;
cmd.DW31.NumImePredictors = CODECHAL_VDENC_NUMIMEPREDICTORS;
switch (params->TargetUsage)
{
case 1: // Quality mode
case 2:
case 4: // Normal mode
cmd.DW36.NumMergeCandidateCu64x64 = 4;
cmd.DW36.NumMergeCandidateCu32x32 = 3;
cmd.DW36.NumMergeCandidateCu16x16 = 2;
cmd.DW36.NumMergeCandidateCu8x8 = 1;
break;
case 7: // Speed mode
cmd.DW36.NumMergeCandidateCu64x64 = 2;
cmd.DW36.NumMergeCandidateCu32x32 = 2;
cmd.DW36.NumMergeCandidateCu16x16 = 2;
cmd.DW36.NumMergeCandidateCu8x8 = 0;
cmd.DW31.NumImePredictors = CODECHAL_VDENC_NUMIMEPREDICTORS_SPEED;
break;
default:
MHW_ASSERTMESSAGE("Invalid TU provided!");
return MOS_STATUS_INVALID_PARAMETER;
}
}
// r5
cmd.DW40._4xMeMvOutputDataSurfIndex = CODECHAL_ENCODE_ME_MV_DATA_SURFACE_G12;
cmd.DW41._16xOr32xMeMvInputDataSurfIndex = (params->hmeLvl == HME_LEVEL_32x) ?
CODECHAL_ENCODE_32xME_MV_DATA_SURFACE_G12 : CODECHAL_ENCODE_16xME_MV_DATA_SURFACE_G12;
cmd.DW42._4xMeOutputDistSurfIndex = CODECHAL_ENCODE_ME_DISTORTION_SURFACE_G12;
cmd.DW43._4xMeOutputBrcDistSurfIndex = CODECHAL_ENCODE_ME_BRC_DISTORTION_G12;
cmd.DW44.VMEFwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_FWD_REF_G12;
cmd.DW45.VMEBwdInterPredictionSurfIndex = CODECHAL_ENCODE_ME_CURR_FOR_BWD_REF_G12;
cmd.DW46.VDEncStreamInOutputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_OUTPUT_G12;
cmd.DW47.VDEncStreamInInputSurfIndex = CODECHAL_ENCODE_ME_VDENC_STREAMIN_INPUT_G12;
CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
&cmd,
params->pKernelState->dwCurbeOffset,
sizeof(cmd)));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SendMeSurfaces(
PMOS_COMMAND_BUFFER cmdBuffer,
MeSurfaceParams* params)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
CODECHAL_ENCODE_CHK_NULL_RETURN(params);
CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
CODECHAL_ENCODE_CHK_NULL_RETURN(params->pCurrOriginalPic);
CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps4xMeMvDataBuffer);
CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeDistortionBuffer);
if (!params->bVdencStreamInEnabled)
{
CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeBrcDistortionBuffer);
}
else
{
CODECHAL_ENCODE_CHK_NULL_RETURN(params->psMeVdencStreamInBuffer);
}
CODECHAL_MEDIA_STATE_TYPE encMediaStateType = (params->b32xMeInUse) ? CODECHAL_MEDIA_STATE_32X_ME :
params->b16xMeInUse ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
if (params->bVdencStreamInEnabled && encMediaStateType == CODECHAL_MEDIA_STATE_4X_ME)
{
encMediaStateType = CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
}
CODECHAL_ENCODE_CHK_NULL_RETURN(params->pMeBindingTable);
MeKernelBindingTable* meBindingTable = params->pMeBindingTable;
bool isFieldPicture = CodecHal_PictureIsField(*(params->pCurrOriginalPic)) ? 1 : 0;
bool isBottomField = CodecHal_PictureIsBottomField(*(params->pCurrOriginalPic)) ? 1 : 0;
uint8_t currVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
((isBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
uint32_t meMvBottomFieldOffset = 0, currScaledBottomFieldOffset = 0;
if (params->b32xMeInUse)
{
CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps32xMeMvDataBuffer);
currScaledSurface = m_trackedBuf->Get32xDsSurface(CODEC_CURR_TRACKED_BUFFER);
meMvDataBuffer = params->ps32xMeMvDataBuffer;
meMvBottomFieldOffset = params->dw32xMeMvBottomFieldOffset;
currScaledBottomFieldOffset = params->dw32xScaledBottomFieldOffset;
}
else if (params->b16xMeInUse)
{
CODECHAL_ENCODE_CHK_NULL_RETURN(params->ps16xMeMvDataBuffer);
currScaledSurface = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
meMvDataBuffer = params->ps16xMeMvDataBuffer;
meMvBottomFieldOffset = params->dw16xMeMvBottomFieldOffset;
currScaledBottomFieldOffset = params->dw16xScaledBottomFieldOffset;
}
else
{
currScaledSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
meMvDataBuffer = params->ps4xMeMvDataBuffer;
meMvBottomFieldOffset = params->dw4xMeMvBottomFieldOffset;
currScaledBottomFieldOffset = params->dw4xScaledBottomFieldOffset;
}
// Reference height and width information should be taken from the current scaled surface rather
// than from the reference scaled surface in the case of PAFF.
uint32_t width = MOS_ALIGN_CEIL(params->dwDownscaledWidthInMb * 32, 64);
uint32_t height = params->dwDownscaledHeightInMb * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER;
// Force the values
meMvDataBuffer->dwWidth = width;
meMvDataBuffer->dwHeight = height;
meMvDataBuffer->dwPitch = width;
CODECHAL_SURFACE_CODEC_PARAMS surfaceParams;
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bIs2DSurface = true;
surfaceParams.bMediaBlockRW = true;
surfaceParams.psSurface = meMvDataBuffer;
surfaceParams.dwOffset = meMvBottomFieldOffset;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
surfaceParams.dwBindingTableOffset = meBindingTable->dwMEMVDataSurface;
surfaceParams.bIsWritable = true;
surfaceParams.bRenderTarget = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
if (params->b16xMeInUse && params->b32xMeEnabled)
{
// Pass 32x MV to 16x ME operation
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bIs2DSurface = true;
surfaceParams.bMediaBlockRW = true;
surfaceParams.psSurface = params->ps32xMeMvDataBuffer;
surfaceParams.dwOffset =
isBottomField ? params->dw32xMeMvBottomFieldOffset : 0;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
surfaceParams.dwBindingTableOffset = meBindingTable->dw32xMEMVDataSurface;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
else if (!params->b32xMeInUse && params->b16xMeEnabled)
{
// Pass 16x MV to 4x ME operation
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bIs2DSurface = true;
surfaceParams.bMediaBlockRW = true;
surfaceParams.psSurface = params->ps16xMeMvDataBuffer;
surfaceParams.dwOffset =
isBottomField ? params->dw16xMeMvBottomFieldOffset : 0;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value;
surfaceParams.dwBindingTableOffset = meBindingTable->dw16xMEMVDataSurface;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
// Insert Distortion buffers only for 4xMe case
if (!params->b32xMeInUse && !params->b16xMeInUse)
{
if (!params->bVdencStreamInEnabled)
{
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bIs2DSurface = true;
surfaceParams.bMediaBlockRW = true;
surfaceParams.psSurface = params->psMeBrcDistortionBuffer;
surfaceParams.dwOffset = params->dwMeBrcDistortionBottomFieldOffset;
surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBRCDist;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
surfaceParams.bIsWritable = true;
surfaceParams.bRenderTarget = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bIs2DSurface = true;
surfaceParams.bMediaBlockRW = true;
surfaceParams.psSurface = params->psMeDistortionBuffer;
surfaceParams.dwOffset = params->dwMeDistortionBottomFieldOffset;
surfaceParams.dwBindingTableOffset = meBindingTable->dwMEDist;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value;
surfaceParams.bIsWritable = true;
surfaceParams.bRenderTarget = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
// Setup references 1...n
// LIST 0 references
CODEC_PICTURE refPic;
bool isRefFieldPicture = false, isRefBottomField = false;
uint8_t refPicIdx = 0;
if (params->pL0RefFrameList)
{
for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL0ActiveMinus1; refIdx++)
{
refPic = params->pL0RefFrameList[refIdx];
if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
{
if (refIdx == 0)
{
// Current Picture Y - VME
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bUseAdvState = true;
surfaceParams.psSurface = currScaledSurface;
surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForFwdRef;
surfaceParams.ucVDirection = currVDirection;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
uint32_t refScaledBottomFieldOffset = 0;
MOS_SURFACE *refScaledSurface;
if (params->b32xMeInUse)
{
refScaledSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
}
else if (params->b16xMeInUse)
{
refScaledSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
}
else
{
refScaledSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
}
refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
// L0 Reference Picture Y - VME
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bUseAdvState = true;
surfaceParams.psSurface = refScaledSurface;
surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx];
surfaceParams.ucVDirection = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME :
((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
surfaceParams.dwBindingTableOffset = meBindingTable->dwMEFwdRefPicIdx[refIdx] + 1;
surfaceParams.ucVDirection = !isFieldPicture ? CODECHAL_VDIRECTION_FRAME :
((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
}
}
// Setup references 1...n
// LIST 1 references
if (params->pL1RefFrameList)
{
for (uint8_t refIdx = 0; refIdx <= params->dwNumRefIdxL1ActiveMinus1; refIdx++)
{
refPic = params->pL1RefFrameList[refIdx];
if (!CodecHal_PictureIsInvalid(refPic) && params->pPicIdx[refPic.FrameIdx].bValid)
{
if (refIdx == 0)
{
// Current Picture Y - VME
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bUseAdvState = true;
surfaceParams.psSurface = currScaledSurface;
surfaceParams.dwOffset = isBottomField ? currScaledBottomFieldOffset : 0;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
surfaceParams.dwBindingTableOffset = meBindingTable->dwMECurrForBwdRef;
surfaceParams.ucVDirection = currVDirection;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
isRefFieldPicture = CodecHal_PictureIsField(refPic) ? 1 : 0;
isRefBottomField = (CodecHal_PictureIsBottomField(refPic)) ? 1 : 0;
refPicIdx = params->pPicIdx[refPic.FrameIdx].ucPicIdx;
uint8_t scaledIdx = params->ppRefList[refPicIdx]->ucScalingIdx;
uint32_t refScaledBottomFieldOffset = 0;
MOS_SURFACE *refScaledSurface;
if (params->b32xMeInUse)
{
refScaledSurface = m_trackedBuf->Get32xDsSurface(scaledIdx);
}
else if (params->b16xMeInUse)
{
refScaledSurface = m_trackedBuf->Get16xDsSurface(scaledIdx);
}
else
{
refScaledSurface = m_trackedBuf->Get4xDsSurface(scaledIdx);
}
refScaledBottomFieldOffset = isRefBottomField ? currScaledBottomFieldOffset : 0;
// L1 Reference Picture Y - VME
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.bUseAdvState = true;
surfaceParams.psSurface = refScaledSurface;
surfaceParams.dwOffset = isRefBottomField ? refScaledBottomFieldOffset : 0;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value;
surfaceParams.dwBindingTableOffset = meBindingTable->dwMEBwdRefPicIdx[refIdx];
surfaceParams.ucVDirection = (!isFieldPicture) ? CODECHAL_VDIRECTION_FRAME :
((isRefBottomField) ? CODECHAL_VDIRECTION_BOT_FIELD : CODECHAL_VDIRECTION_TOP_FIELD);
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
}
}
if (encMediaStateType == CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN)
{
// Output buffer
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
surfaceParams.bIs2DSurface = false;
surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInSurface;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
surfaceParams.bIsWritable = true;
surfaceParams.bRenderTarget = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
// Input buffer (for AVC case we only read the surface and update data)
MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
surfaceParams.dwSize = params->dwVDEncStreamInSurfaceSize;
surfaceParams.bIs2DSurface = false;
surfaceParams.presBuffer = params->psMeVdencStreamInBuffer;
surfaceParams.dwBindingTableOffset = meBindingTable->dwVdencStreamInInputSurface;
surfaceParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value;
surfaceParams.bIsWritable = true;
surfaceParams.bRenderTarget = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceParams,
params->pKernelState));
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::InitInterface()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
uint8_t* binary = nullptr;
uint32_t kernelSize = 0;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
m_kernelBase,
m_kuidCommon,
&binary,
&kernelSize));
GetHwInterface()->GetStateHeapSettings()->dwIshSize +=
MOS_ALIGN_CEIL(kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetMeSurfaceParams(MeSurfaceParams *meSurfaceParams)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
meSurfaceParams->bMbaff = false;
meSurfaceParams->b4xMeDistortionBufferSupported = true;
meSurfaceParams->dwNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
meSurfaceParams->dwNumRefIdxL1ActiveMinus1 = 0;
MOS_ZeroMemory(&m_refPicList0, sizeof(m_refPicList0));
if (m_lastRefPic)
{
m_refPicList0[0].FrameIdx = m_vp9PicParams->RefFlags.fields.LastRefIdx;
m_refPicList0[0].PicFlags = PICTURE_FRAME;
}
if (m_goldenRefPic)
{
m_refPicList0[1].FrameIdx = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
m_refPicList0[1].PicFlags = PICTURE_FRAME;
}
if (m_altRefPic)
{
m_refPicList0[2].FrameIdx = m_vp9PicParams->RefFlags.fields.AltRefIdx;
m_refPicList0[2].PicFlags = PICTURE_FRAME;
}
meSurfaceParams->pL0RefFrameList = &(m_refPicList0[0]);
meSurfaceParams->ppRefList = &m_refList[0];
meSurfaceParams->pPicIdx = &m_picIdx[0];
meSurfaceParams->pCurrOriginalPic = &m_currOriginalPic;
meSurfaceParams->ps4xMeMvDataBuffer = &m_4xMeMvDataBuffer;
meSurfaceParams->ps16xMeMvDataBuffer = &m_16xMeMvDataBuffer;
meSurfaceParams->psMeDistortionBuffer = &m_4xMeDistortionBuffer;
meSurfaceParams->dwVerticalLineStride = m_verticalLineStride;
meSurfaceParams->dwVerticalLineStrideOffset = m_verticalLineStrideOffset;
meSurfaceParams->b32xMeEnabled = m_32xMeSupported;
meSurfaceParams->b16xMeEnabled = m_16xMeEnabled;
meSurfaceParams->pMeBindingTable = &m_meBindingTable;
meSurfaceParams->bVdencStreamInEnabled = true;
meSurfaceParams->psMeVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
meSurfaceParams->dwVDEncStreamInSurfaceSize = MOS_BYTES_TO_DWORDS((MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
(MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
CODECHAL_CACHELINE_SIZE);
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetMeCurbeParams(MeCurbeParams *meParams)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
meParams->b16xMeEnabled = m_16xMeEnabled;
meParams->b32xMeEnabled = m_32xMeSupported;
meParams->TargetUsage = TU_QUALITY;
meParams->MaxMvLen = m_hmeMaxMvLength;
meParams->CurrOriginalPic.FrameIdx = m_vp9PicParams->CurrOriginalPic.FrameIdx;
meParams->CurrOriginalPic.PicEntry = m_vp9PicParams->CurrOriginalPic.PicEntry;
meParams->CurrOriginalPic.PicFlags = m_vp9PicParams->CurrOriginalPic.PicFlags;
meParams->pic_init_qp_minus26 = m_vp9PicParams->LumaACQIndex - 26;
meParams->num_ref_idx_l0_active_minus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
meParams->num_ref_idx_l1_active_minus1 = 0;
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ExecuteMeKernel(
MeCurbeParams *meParams,
MeSurfaceParams *meSurfaceParams,
HmeLevel hmeLevel)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(meParams);
CODECHAL_ENCODE_CHK_NULL_RETURN(meSurfaceParams);
PerfTagSetting perfTag;
perfTag.Value = 0;
perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
perfTag.CallType = m_singleTaskPhaseSupported ? CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL : CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL;
perfTag.PictureCodingType = m_pictureCodingType;
m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
// Each ME kernel buffer counts as a separate perf task
m_osInterface->pfnResetPerfBufferID(m_osInterface);
CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_32x) ? CODECHAL_MEDIA_STATE_32X_ME :
(hmeLevel == HME_LEVEL_16x) ? CODECHAL_MEDIA_STATE_16X_ME : CODECHAL_MEDIA_STATE_4X_ME;
bool vdencMeInUse = false;
if (m_vdencEnabled && (encFunctionType == CODECHAL_MEDIA_STATE_4X_ME))
{
vdencMeInUse = true;
// Non legacy stream in is for hevc vp9 streamin kernel
encFunctionType = m_useNonLegacyStreamin ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_ME_VDENC_STREAMIN;
}
uint32_t krnStateIdx = vdencMeInUse ?
CODECHAL_ENCODE_ME_IDX_VDENC :
((m_pictureCodingType == P_TYPE) ? CODECHAL_ENCODE_ME_IDX_P : CODECHAL_ENCODE_ME_IDX_B);
PMHW_KERNEL_STATE kernelState = &m_meKernelStates[krnStateIdx];
// If Single Task Phase is not enabled, use BT count for the kernel state.
if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
{
uint32_t maxBtCount = m_singleTaskPhaseSupported ?
m_maxBtCount : kernelState->KernelParams.iBTCount;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
m_stateHeapInterface,
maxBtCount));
m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
m_stateHeapInterface,
kernelState,
false,
0,
false,
m_storeData));
MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
MOS_ZeroMemory(&idParams, sizeof(idParams));
idParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
m_stateHeapInterface,
1,
&idParams));
// Setup Additional MeParams (Most of them set up in codec specific function, so don't zero out here)
meParams->hmeLvl = hmeLevel;
meParams->pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMe(meParams));
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_DSH_TYPE,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
encFunctionType,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_ISH_TYPE,
kernelState));
)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
}
SendKernelCmdsParams sendKernelCmdsParams;
sendKernelCmdsParams = SendKernelCmdsParams();
sendKernelCmdsParams.EncFunctionType = encFunctionType;
sendKernelCmdsParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
// Add binding table
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
m_stateHeapInterface,
kernelState));
// Setup Additional ME surface params (Most of them set up in codec specific function, so don't zero out here)
meSurfaceParams->dwDownscaledWidthInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledWidthInMb32x :
(hmeLevel == HME_LEVEL_16x) ? m_downscaledWidthInMb16x : m_downscaledWidthInMb4x;
meSurfaceParams->dwDownscaledHeightInMb = (hmeLevel == HME_LEVEL_32x) ? m_downscaledFrameFieldHeightInMb32x :
(hmeLevel == HME_LEVEL_16x) ? m_downscaledFrameFieldHeightInMb16x : m_downscaledFrameFieldHeightInMb4x;
meSurfaceParams->b32xMeInUse = (hmeLevel == HME_LEVEL_32x) ? true : false;
meSurfaceParams->b16xMeInUse = (hmeLevel == HME_LEVEL_16x) ? true : false;
meSurfaceParams->pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(&cmdBuffer, meSurfaceParams));
// Dump SSH for ME kernel
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState)));
/* zero out the mv data memory and me distortion buffer for the driver ULT
kernel only writes out this data used for current frame, in some cases the the data used for
previous frames would be left in the buffer (for example, the L1 mv for B frame would still show
in the P frame mv data buffer */
// Zeroing out the buffers has perf impact, so zero it out only when dumps are actually enabled
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_NULL_RETURN(m_debugInterface);
uint8_t* data = NULL;
uint32_t size = 0;
bool driverMeDumpEnabled = m_debugInterface->DumpIsEnabled(CodechalDbgAttr::attrOutput, encFunctionType);
if (driverMeDumpEnabled)
{
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
switch (hmeLevel)
{
case HME_LEVEL_32x:
data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&meSurfaceParams->ps32xMeMvDataBuffer->OsResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
size = MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) *
(m_downscaledHeightInMb32x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
MOS_ZeroMemory(data, size);
m_osInterface->pfnUnlockResource(
m_osInterface,
&meSurfaceParams->ps32xMeMvDataBuffer->OsResource);
break;
case HME_LEVEL_16x:
data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&meSurfaceParams->ps16xMeMvDataBuffer->OsResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
size = MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) *
(m_downscaledHeightInMb16x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
MOS_ZeroMemory(data, size);
m_osInterface->pfnUnlockResource(
m_osInterface,
&meSurfaceParams->ps16xMeMvDataBuffer->OsResource);
break;
case HME_LEVEL_4x:
if (!m_vdencEnabled)
{
data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&meSurfaceParams->ps4xMeMvDataBuffer->OsResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
size = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) *
(m_downscaledHeightInMb4x * 2 * 4 * CODECHAL_ENCODE_ME_DATA_SIZE_MULTIPLIER);
MOS_ZeroMemory(data, size);
m_osInterface->pfnUnlockResource(
m_osInterface,
&meSurfaceParams->ps4xMeMvDataBuffer->OsResource);
}
break;
default:
return MOS_STATUS_INVALID_PARAMETER;
}
// zeroing out ME dist buffer
if (meSurfaceParams->b4xMeDistortionBufferSupported)
{
data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface, &meSurfaceParams->psMeDistortionBuffer->OsResource, &lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
size = meSurfaceParams->psMeDistortionBuffer->dwHeight * meSurfaceParams->psMeDistortionBuffer->dwPitch;
MOS_ZeroMemory(data, size);
m_osInterface->pfnUnlockResource(
m_osInterface,
&meSurfaceParams->psMeDistortionBuffer->OsResource);
}
}
);
uint32_t scalingFactor = (hmeLevel == HME_LEVEL_32x) ? SCALE_FACTOR_32x :
(hmeLevel == HME_LEVEL_16x) ? SCALE_FACTOR_16x : SCALE_FACTOR_4x;
uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth / scalingFactor);
uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight / scalingFactor);
CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
walkerCodecParams.WalkerMode = m_walkerMode;
walkerCodecParams.dwResolutionX = resolutionX;
walkerCodecParams.dwResolutionY = resolutionY;
walkerCodecParams.bNoDependency = true;
walkerCodecParams.bMbaff = meSurfaceParams->bMbaff;
walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
walkerCodecParams.ucGroupId = m_groupId;
MHW_WALKER_PARAMS walkerParams;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
m_hwInterface,
&walkerParams,
&walkerCodecParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
&cmdBuffer,
&walkerParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
m_stateHeapInterface,
kernelState));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
m_stateHeapInterface));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
encFunctionType,
nullptr)));
m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase);
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
m_lastTaskInPhase = false;
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ExecuteKernelFunctions()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
#if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE)
uint32_t dumpFormat = 0;
CODECHAL_DEBUG_TOOL(
// CodecHal_DbgMapSurfaceFormatToDumpFormat(m_rawSurfaceToEnc->Format, &dumpFormat);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
m_rawSurfaceToEnc,
CodechalDbgAttr::attrEncodeRawInputSurface,
"SrcSurf"));
if (m_lastRefPic)
{
// CodecHal_DbgMapSurfaceFormatToDumpFormat(m_lastRefPic->Format, &dumpFormat);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
m_lastRefPic,
CodechalDbgAttr::attrReferenceSurfaces,
"LastRefSurface"));
}
if (m_goldenRefPic)
{
// CodecHal_DbgMapSurfaceFormatToDumpFormat(m_goldenRefPic->Format, &dumpFormat);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
m_goldenRefPic,
CodechalDbgAttr::attrReferenceSurfaces,
"GoldenRefSurface"));
}
if (m_altRefPic)
{
// CodecHal_DbgMapSurfaceFormatToDumpFormat(m_altRefPic->Format, &dumpFormat);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
m_altRefPic,
CodechalDbgAttr::attrReferenceSurfaces,
"_AltRefSurface"));
}
);
m_setRequestedEUSlices = ((m_frameHeight * m_frameWidth) >= m_ssdResolutionThreshold &&
m_targetUsage <= m_ssdTargetUsageThreshold) ? true : false;
m_hwInterface->m_numRequestedEuSlices = (m_setRequestedEUSlices) ?
m_sliceShutdownRequestState : m_sliceShutdownDefaultState;
// While this streamin isn't a kernel function, we 0 the surface here which is needed before HME kernel
SetupSegmentationStreamIn();
if (m_16xMeSupported)
{
//4x Downscaling
CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
cscScalingKernelParams.bLastTaskInPhaseCSC =
cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled);
cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled);
cscScalingKernelParams.bLastTaskInPhase32xDS = !m_hmeEnabled;
m_firstTaskInPhase = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams));
}
if (m_16xMeEnabled)
{
//Initialize the ME struct for HME kernel calls
MeCurbeParams meParams;
MOS_ZeroMemory(&meParams, sizeof(MeCurbeParams));
SetMeCurbeParams(&meParams);
MeSurfaceParams meSurfaceParams;
MOS_ZeroMemory(&meSurfaceParams, sizeof(MeSurfaceParams));
SetMeSurfaceParams(&meSurfaceParams);
// P_HME kernel (16x HME)
m_lastTaskInPhase = false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_16x));
//StreamIn kernel, 4xME
m_lastTaskInPhase = true;
meParams.segmapProvided = m_segmentMapProvided;
CODECHAL_ENCODE_CHK_STATUS_RETURN(ExecuteMeKernel(&meParams, &meSurfaceParams, HME_LEVEL_4x));
}
if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
{
MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
syncParams.GpuContext = m_renderContext;
syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
m_waitForEnc = true;
}
#endif
return eStatus;
}
static void StatusReportCleanup(
EncodeStatusReport* encodeStatusReport,
HCPPakHWTileSizeRecord_G12* tileStatusReport,
CODECHAL_ENCODE_BUFFER* tileSizeStreamoutBuffer,
PMOS_INTERFACE osInterface,
uint8_t* tempBsBuffer,
uint8_t* bitstream)
{
if (tempBsBuffer)
{
MOS_FreeMemory(tempBsBuffer);
}
if (bitstream)
{
osInterface->pfnUnlockResource(osInterface, &encodeStatusReport->pCurrRefList->resBitstreamBuffer);
}
if (tileStatusReport)
{
// clean-up the tile status report buffer
if (encodeStatusReport->CodecStatus == CODECHAL_STATUS_SUCCESSFUL)
{
for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
{
MOS_ZeroMemory(&tileStatusReport[i], sizeof(tileStatusReport[i]));
}
}
osInterface->pfnUnlockResource(osInterface, &tileSizeStreamoutBuffer->sResource);
}
}
MOS_STATUS CodechalVdencVp9StateG12::GetStatusReport(
EncodeStatus* encodeStatus,
EncodeStatusReport* encodeStatusReport)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus);
CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport);
if (encodeStatusReport->UsedVdBoxNumber == 1)
{
encodeStatusReport->bitstreamSize = encodeStatus->dwMFCBitstreamByteCountPerFrame + encodeStatus->dwHeaderBytesInserted;
encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses;
encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
return eStatus;
}
// Tile record always in m_tileRecordBuffer even in scala mode
PCODECHAL_ENCODE_BUFFER presTileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx];
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
HCPPakHWTileSizeRecord_G12* tileStatusReport = (HCPPakHWTileSizeRecord_G12*)m_osInterface->pfnLockResource(
m_osInterface,
&presTileSizeStatusReport->sResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport);
encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL;
encodeStatusReport->PanicMode = false;
encodeStatusReport->AverageQp = 0;
encodeStatusReport->QpY = 0;
encodeStatusReport->SuggestedQpYDelta = 0;
encodeStatusReport->NumberPasses = 1;
encodeStatusReport->bitstreamSize = 0;
encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0;
double sum_qp = 0.0;
uint32_t totalCU = 0;
for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
{
if (tileStatusReport[i].Length == 0)
{
encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE;
StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
return eStatus;
}
encodeStatusReport->bitstreamSize += tileStatusReport[i].Length;
totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1);
sum_qp += tileStatusReport[i].Hcp_Qp_Status_Count;
}
encodeStatusReport->QpY = encodeStatusReport->AverageQp =
(uint8_t)((sum_qp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU
if (m_enableTileStitchByHW)
{
StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, nullptr, nullptr);
return eStatus;
}
uint8_t* bufPtr = (uint8_t*)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize);
uint8_t* tempBsBuffer = bufPtr;
CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer);
CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList);
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.ReadOnly = 1;
uint8_t* bitstream = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&currRefList.resBitstreamBuffer,
&lockFlags);
if (bitstream == nullptr)
{
MOS_SafeFreeMemory(tempBsBuffer);
CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr);
}
for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++)
{
uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
uint32_t len = tileStatusReport[i].Length;
if (offset + len >= m_bitstreamUpperBound)
{
eStatus = MOS_STATUS_INVALID_FILE_SIZE;
CODECHAL_ENCODE_ASSERTMESSAGE("Error: Tile offset and length add up to more than bitstream upper bound");
encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR;
encodeStatusReport->bitstreamSize = 0;
StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
return eStatus;
}
MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
bufPtr += len;
}
MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize);
MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize],
m_bitstreamUpperBound - encodeStatusReport->bitstreamSize);
StatusReportCleanup(encodeStatusReport, tileStatusReport, presTileSizeStatusReport, m_osInterface, tempBsBuffer, bitstream);
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::DecideEncodingPipeNumber()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
m_numPipe = m_numVdbox;
uint8_t num_tile_columns = (1 << m_vp9PicParams->log2_tile_columns);
if (num_tile_columns > m_numPipe)
{
m_numPipe = 1;
}
if (num_tile_columns < m_numPipe)
{
if (num_tile_columns >= 1 && num_tile_columns <= 4)
{
m_numPipe = num_tile_columns;
}
else
{
m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode
}
}
if (m_numPipe == 0 || m_numPipe > CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE)
{
m_numPipe = 1;
}
if (m_numPipe > 1)
{
m_scalableMode = true; // KMD VE is now enabled by default. Mediasolo can also use the VE interface.
}
else
{
m_scalableMode = false;
}
if (m_scalabilityState)
{
// Create/ re-use a GPU context with 2 pipes
m_scalabilityState->ucScalablePipeNum = m_numPipe;
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::PlatformCapabilityCheck()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber());
if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState,
(PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
}
if (m_numPipe > 1)
{
m_singleTaskPhaseSupported = m_singleTaskPhaseSupportedInPak = false;
}
//so far only validate Tiling for VDEnc VP9
uint8_t col = (1 << (m_vp9PicParams->log2_tile_columns));
uint8_t row = (1 << (m_vp9PicParams->log2_tile_rows));
// Handling invalid tiling and scalability cases. When NumTilingColumn does not match NumPipe fall back to single pipe mode
if (m_numPipe > 1 && (col != m_numPipe))
{
if ((col == 1) || (row == 1))
{
m_numPipe = 1; // number of tile columns cannot be greater than number of pipes (VDBOX), run in single pipe mode
m_scalableMode = false;
}
else
{
CODECHAL_ENCODE_ASSERTMESSAGE("Number of tile columns cannot be greater than number of pipes (VDBOX) when number of rows > 1");
return MOS_STATUS_INVALID_PARAMETER;
}
}
//num columns must be either 2 or 4 for scalability mode, H/W limitation
if ((m_numPipe > 1) && (m_numPipe != 2) && (m_numPipe != 4))
{
CODECHAL_ENCODE_ASSERTMESSAGE("Num pipes must be either 2 or 4 for scalability mode, H/W limitation");
return MOS_STATUS_INVALID_PARAMETER;
}
// Tile width needs to be minimum size 256, error out if less
if ((col != 1) && ((m_vp9PicParams->SrcFrameWidthMinus1 + 1) < col * CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH))
{
CODECHAL_ENCODE_ASSERTMESSAGE("Incorrect number of columns input parameter, Tile width is < 256");
return MOS_STATUS_INVALID_PARAMETER;
}
if (row > 4)
{
CODECHAL_ENCODE_ASSERTMESSAGE("Max number of rows cannot exceeds 4 by VP9 Spec.");
return MOS_STATUS_INVALID_PARAMETER;
}
// number of tiles for this frame
m_numberTilesInFrame = col * row;
m_numUsedVdbox = m_numPipe;
if (!m_newSeq)
{
// If there is no new SEQ header, then the number of passes is decided here.
// Otherwise, it is done in SetSequenceStructs. For example, BRC setting may be changed.
m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
}
//Last place where scalable mode is decided
if(m_frameNum == 0)
{
m_lastFrameScalableMode = m_scalableMode;
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetGpuCtxCreatOption()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
{
CodechalEncoderState::SetGpuCtxCreatOption();
}
else
{
m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt);
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ConstructParmsForGpuCtxCreation(
m_scalabilityState,
(PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt));
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetAndPopulateVEHintParams(
PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if (!MOS_VE_SUPPORTED(m_osInterface))
{
return eStatus;
}
CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms;
MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS));
if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface))
{
scalSetParms.bNeedSyncWithPrevious = true;
}
int32_t currentPass = GetCurrentPass();
uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
// Scalable mode only
if (m_scalableMode)
{
for (auto i = 0; i < m_numPipe; i++)
{
scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex].OsResource;
}
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms));
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetTileData()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 *tileCodingParams = m_tileParams;
tileCodingParams->Mode = CODECHAL_ENCODE_MODE_VP9;
uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
uint32_t numTiles = numTileRows * numTileColumns;
uint32_t bitstreamSizePerTile = m_bitstreamUpperBound / (numTiles * CODECHAL_CACHELINE_SIZE);
uint32_t numLcusInTiles = 0, numCuRecord = 64;
uint32_t cuLevelStreamoutOffset = 0, sliceSizeStreamoutOffset = 0, bitstreamByteOffset = 0, sseRowstoreOffset = 0;
for (uint32_t tileCntr = 0; tileCntr < numTiles; tileCntr++)
{
uint32_t tileX, tileY, tileStartSbX, tileStartSbY, tileWidthInSb, tileHeightInSb, lastTileColWidth, lastTileRowHeight, numLcuInTile;
bool isLastTileCol, isLastTileRow;
tileX = tileCntr % numTileColumns;
tileY = tileCntr / numTileColumns;
isLastTileCol = ((numTileColumns - 1) == tileX);
isLastTileRow = ((numTileRows - 1) == tileY);
tileStartSbX = (tileX * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns;
tileStartSbY = (tileY * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows;
tileWidthInSb = (isLastTileCol ? m_picWidthInSb : (((tileX + 1) * m_picWidthInSb) >> m_vp9PicParams->log2_tile_columns)) - tileStartSbX;
tileHeightInSb = (isLastTileRow ? m_picHeightInSb : (((tileY + 1) * m_picHeightInSb) >> m_vp9PicParams->log2_tile_rows)) - tileStartSbY;
lastTileColWidth = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameWidthMinus1 + 1 - tileStartSbX * CODEC_VP9_SUPER_BLOCK_WIDTH), CODEC_VP9_MIN_BLOCK_WIDTH) / CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
lastTileRowHeight = (MOS_ALIGN_CEIL((m_vp9PicParams->SrcFrameHeightMinus1 + 1 - tileStartSbY * CODEC_VP9_SUPER_BLOCK_HEIGHT), CODEC_VP9_MIN_BLOCK_HEIGHT) / CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
numLcuInTile = tileWidthInSb * tileHeightInSb;
tileCodingParams[tileCntr].NumberOfActiveBePipes = m_numPipe;
tileCodingParams[tileCntr].NumOfTilesInFrame = numTiles;
tileCodingParams[tileCntr].NumOfTileColumnsInFrame = numTileColumns;
tileCodingParams[tileCntr].TileStartLCUX = tileStartSbX;
tileCodingParams[tileCntr].TileStartLCUY = tileStartSbY;
tileCodingParams[tileCntr].IsLastTileofColumn = isLastTileRow;
tileCodingParams[tileCntr].IsLastTileofRow = isLastTileCol;
tileCodingParams[tileCntr].TileWidthInMinCbMinus1 = isLastTileCol ? lastTileColWidth : (tileWidthInSb * CODEC_VP9_MIN_BLOCK_WIDTH) - 1;
tileCodingParams[tileCntr].TileHeightInMinCbMinus1 = isLastTileRow ? lastTileRowHeight : (tileHeightInSb * CODEC_VP9_MIN_BLOCK_HEIGHT) - 1;
if (m_scalableMode)
{
sseRowstoreOffset = (tileStartSbX + (3 * tileX)) << 5;
tileCodingParams[tileCntr].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * numLcusInTiles) * 64),
CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE;
tileCodingParams[tileCntr].presHcpSyncBuffer = &m_hcpScalabilitySyncBuffer.sResource;
tileCodingParams[tileCntr].SliceSizeStreamoutOffset = sliceSizeStreamoutOffset;
tileCodingParams[tileCntr].SseRowstoreOffset = sseRowstoreOffset;
tileCodingParams[tileCntr].BitstreamByteOffset = bitstreamByteOffset;
tileCodingParams[tileCntr].CuLevelStreamoutOffset = cuLevelStreamoutOffset;
cuLevelStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
sliceSizeStreamoutOffset += (tileCodingParams[tileCntr].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[tileCntr].TileHeightInMinCbMinus1 + 1);
sseRowstoreOffset += (numLcuInTile * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE;
bitstreamByteOffset += bitstreamSizePerTile;
numLcusInTiles += numLcuInTile;
tileCodingParams[tileCntr].TileSizeStreamoutOffset = (tileCntr*m_hcpInterface->GetPakHWTileSizeRecordSize() + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
//DW5
const uint32_t frameStatsStreamoutSize = m_brcPakStatsBufSize;
tileCodingParams[tileCntr].PakTileStatisticsOffset = (tileCntr*frameStatsStreamoutSize + CODECHAL_CACHELINE_SIZE - 1) / CODECHAL_CACHELINE_SIZE;
//DW12
tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = ((tileCntr * m_probabilityCounterBufferSize) + (CODECHAL_CACHELINE_SIZE - 1)) / CODECHAL_CACHELINE_SIZE;
}
else
{
tileCodingParams[tileCntr].CuRecordOffset = 0;
tileCodingParams[tileCntr].presHcpSyncBuffer = nullptr;
tileCodingParams[tileCntr].SliceSizeStreamoutOffset = 0;
tileCodingParams[tileCntr].SseRowstoreOffset = 0;
tileCodingParams[tileCntr].BitstreamByteOffset = 0;
tileCodingParams[tileCntr].CuLevelStreamoutOffset = 0;
tileCodingParams[tileCntr].TileSizeStreamoutOffset = 0;
//DW5
tileCodingParams[tileCntr].PakTileStatisticsOffset = 0;
//DW12
tileCodingParams[tileCntr].Vp9ProbabilityCounterStreamoutOffset = 0;
}
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetTileCommands(
PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
MHW_VDBOX_VDENC_WALKER_STATE_PARAMS_G12 vdencWalkerStateParams;
vdencWalkerStateParams.Mode = CODECHAL_ENCODE_MODE_VP9;
vdencWalkerStateParams.pVp9EncPicParams = m_vp9PicParams;
vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
// MFXPipeDone should not be set for tail insertion
vdPipelineFlushParams.Flags.bWaitDoneMFX =
(m_lastPicInStream || m_lastPicInSeq) ? 0 : 1;
vdPipelineFlushParams.Flags.bWaitDoneVDENC = 1;
vdPipelineFlushParams.Flags.bFlushVDENC = 1;
vdPipelineFlushParams.Flags.bFlushHEVC = 1;
vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
if (IsFirstPipe() && IsFirstPass())
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData());
}
MHW_VDBOX_VDENC_WEIGHT_OFFSET_PARAMS vdencWeightOffsetParams;
uint32_t numTileColumns = (1 << m_vp9PicParams->log2_tile_columns);
uint32_t numTileRows = (1 << m_vp9PicParams->log2_tile_rows);
int currentPipe = GetCurrentPipe();
for (uint32_t tileRow = 0, tileIdx = 0; tileRow < numTileRows; tileRow++)
{
for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++, tileIdx++)
{
if (m_numPipe > 1)
{
if (tileCol != currentPipe)
{
continue;
}
}
if (m_scalableMode)
{
MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
//in scalability mode
MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
vdCtrlParam.scalableModePipeLock = true;
MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(cmdBuffer, &vdCtrlParam));
}
// HCP_TILE_CODING commmand
CODECHAL_ENCODE_CHK_STATUS_RETURN(static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(cmdBuffer, &m_tileParams[tileIdx]));
MOS_ZeroMemory(&vdencWeightOffsetParams, sizeof(vdencWeightOffsetParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWeightsOffsetsStateCmd(cmdBuffer, nullptr, &vdencWeightOffsetParams));
vdencWalkerStateParams.pTileCodingParams = &m_tileParams[tileIdx];
vdencWalkerStateParams.dwTileId = tileIdx;
switch (m_numPipe)
{
case 0:
case 1:
vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
break;
case 2:
vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
break;
case 4:
vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
break;
default:
vdencWalkerStateParams.dwNumberOfPipes = VDENC_PIPE_INVALID;
CODECHAL_ENCODE_ASSERTMESSAGE("Num Pipes invalid");
return eStatus;
break;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencWalkerStateCmd(cmdBuffer, &vdencWalkerStateParams));
if (m_scalableMode)
{
MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
vdCtrlParam.scalableModePipeUnlock = true;
MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(cmdBuffer, &vdCtrlParam));
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipelineFlushParams));
// Send MI_FLUSH command
MHW_MI_FLUSH_DW_PARAMS flushDwParams;
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
flushDwParams.bVideoPipelineCacheInvalidate = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
}
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ExecuteTileLevel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
int currentPipe = GetCurrentPipe();
int currentPass = GetCurrentPass();
if (currentPipe < 0 || currentPass < 0)
{
return MOS_STATUS_INVALID_PARAMETER;
}
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
if (IsFirstPipe())
{
MHW_BATCH_BUFFER secondLevelBatchBuffer;
MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
secondLevelBatchBuffer.dwOffset = 0;
secondLevelBatchBuffer.bSecondLevel = true;
if (!m_hucEnabled)
{
secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
}
else
{
secondLevelBatchBuffer.OsResource = m_resHucPakInsertUncompressedHeaderWriteBuffer;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
&cmdBuffer,
&secondLevelBatchBuffer));
}
// Setup Tile level PAK commands
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileCommands(&cmdBuffer));
MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
vdCtrlParam.memoryImplicitFlush = true;
MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
// Send VD_PIPELINE_FLUSH command
MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams;
MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams));
// MFXPipeDone should not be set for tail insertion
vdPipelineFlushParams.Flags.bWaitDoneMFX = 1;
vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1;
vdPipelineFlushParams.Flags.bFlushHEVC = 1;
vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams));
// Send MI_FLUSH command
MHW_MI_FLUSH_DW_PARAMS flushDwParams;
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
flushDwParams.bVideoPipelineCacheInvalidate = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
if (IsFirstPipe())
{
if (m_numPipe > 1 && m_enableTileStitchByHW)
{
for (auto i = 1; i < m_numPipe; i++)
{
if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource) && m_hucEnabled)
{
// This semaphore waits for all pipes except pipe 1 vdenc+pak to finish processing before stitching bitstream
SendHWWaitCommand(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, (currentPass + 1));
SetSemaphoreMem(&m_stitchWaitSemaphoreMem[i].sResource, &cmdBuffer, 0); // Reset above semaphore
}
}
}
// PAK integration kernel to integrate stats for next HUC pass
if (m_scalableMode && m_hucEnabled && m_isTilingSupported && IsFirstPipe())
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9PakInt(&cmdBuffer));
// Signal pak int done semaphore here for next pass to proceed
if (!IsLastPass())
{
SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, (currentPass + 1));
}
if (m_enableTileStitchByHW)
{
// 2nd level BB buffer for stitching cmd
// current location to add cmds in 2nd level batch buffer
m_HucStitchCmdBatchBuffer.iCurrent = 0;
// reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
m_HucStitchCmdBatchBuffer.dwOffset = 0;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer));
// This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false));
}
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
if (!m_scalableMode) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer));
}
}
else // 2nd Pipe
{
// Signal stitch command to proceed because vdenc+pak is done in this pipe and we can stitch bs
if (m_hucEnabled && m_isTilingSupported && !Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[currentPipe].sResource))
{
SetSemaphoreMem(&m_stitchWaitSemaphoreMem[currentPipe].sResource, &cmdBuffer, (currentPass + 1));
}
}
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
if (m_singleTaskPhaseSupported && m_hucEnabled && IsLastPass())
{
m_lastTaskInPhase = true; //HPU singletask phase mode only
}
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase || m_scalableMode)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
}
std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass);
if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
{
// Added extra symbol into log to avoid log's file overwrite on the next pass
// For DYS Mutlipass mode next pass should run with "m_currPass = 0" again
// See ExecutePictureLevel() function for all details
currPassName.append("_0");
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
CODECHAL_NUM_MEDIA_STATES,
currPassName.data())));
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
if (IsLastPipe())
{
if (m_hucEnabled)
{
// We save the index of the 2nd level batch buffer in case there is a pass that needs the last SLBB
m_lastVdencPictureState2ndLevelBBIndex = m_vdencPictureState2ndLevelBBIndex;
}
m_vdencPictureState2ndLevelBBIndex = (m_vdencPictureState2ndLevelBBIndex + 1) % CODECHAL_VP9_ENCODE_RECYCLED_BUFFER_NUM;
}
if (IsFirstPipe() &&
m_waitForEnc &&
IsFirstPass() &&
!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
{
MOS_SYNC_PARAMS syncParams;
syncParams = g_cInitSyncParams;
syncParams.GpuContext = m_videoContext;
syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams));
m_waitForEnc = false;
}
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
bool renderFlags = m_videoContextUsesNullHw;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
m_lastTaskInPhase = false;
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
m_resVdencPakObjCmdStreamOutBuffer,
CodechalDbgAttr::attrPakObjStreamout,
currPassName.data(),
m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE,
0,
CODECHAL_NUM_MEDIA_STATES));
if (m_vp9PicParams->PicFlags.fields.segmentation_enabled) {
//CodecHal_DbgDumpEncodeVp9SegmentStreamout(m_debugInterface, m_encoder);
//m_debugInterface->DumpBuffer(
// (PCODECHAL_ENCODE_VP9_STATE)pvStandardState.resVdencSegmentMapStreamOut,
// CodechalDbgAttr::attrOutput,
// "SegMap_Out",
// CODECHAL_CACHELINE_SIZE * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameHeight, 64) * MOS_ROUNDUP_DIVIDE(pEncoder->dwFrameWidth, 64),
// 0,
// CODECHAL_MEDIA_STATE_VP9_PAK_LUMA_RECON);
}
if (m_mmcState && !m_mmcUserFeatureUpdated) {
m_mmcState->UpdateUserFeatureKey(&m_reconSurface);
m_mmcUserFeatureUpdated = true;
});
}
// Reset parameters for next PAK execution
if (IsLastPipe() && IsLastPass())
{
if (m_vp9PicParams->PicFlags.fields.super_frame && m_tsEnabled)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructSuperFrame());
}
if ((currentPipe == 0) &&
m_signalEnc &&
!Mos_ResourceIsNull(&m_resSyncObjectVideoContextInUse))
{
// signal semaphore
MOS_SYNC_PARAMS syncParams;
syncParams = g_cInitSyncParams;
syncParams.GpuContext = m_videoContext;
syncParams.presSyncResource = &m_resSyncObjectVideoContextInUse;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
m_semaphoreObjCount++;
}
m_prevFrameInfo.KeyFrame = !m_vp9PicParams->PicFlags.fields.frame_type;
m_prevFrameInfo.IntraOnly = (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME) || m_vp9PicParams->PicFlags.fields.intra_only;
m_prevFrameInfo.ShowFrame = m_vp9PicParams->PicFlags.fields.show_frame;
m_prevFrameInfo.FrameWidth = m_oriFrameWidth;
m_prevFrameInfo.FrameHeight = m_oriFrameHeight;
m_currMvTemporalBufferIndex ^= 0x01;
m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx] = m_vp9PicParams->PicFlags.fields.frame_type;
m_prevFrameSegEnabled = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
// Reset parameters for next PAK execution
if (!m_singleTaskPhaseSupported)
{
m_osInterface->pfnResetPerfBufferID(m_osInterface);
}
m_newPpsHeader = 0;
m_newSeqHeader = 0;
m_frameNum++;
//Save the last frame's scalable mode flag to prevent switching buffers when doing next pass
m_lastFrameScalableMode = m_scalableMode;
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ExecuteSliceLevel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
return ExecuteTileLevel();
}
void CodechalVdencVp9StateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS& pipeModeSelectParams)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CodechalVdencVp9State::SetHcpPipeModeSelectParams(pipeModeSelectParams);
MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12& pipeModeSelectParamsG12 = static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12&>(pipeModeSelectParams);
pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
pipeModeSelectParamsG12.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
pipeModeSelectParamsG12.bDynamicScalingEnabled = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
if (m_scalableMode)
{
// Running in the multiple VDBOX mode
if (IsFirstPipe())
{
pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
}
else
{
if (IsLastPipe())
{
pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
}
else
{
pipeModeSelectParamsG12.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
}
}
pipeModeSelectParamsG12.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
}
return;
}
void CodechalVdencVp9StateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS& indObjBaseAddrParams)
{
CodechalVdencVp9State::SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBBIndex];
bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource);
if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
{
// overwrite presProbabilityCounterBuffer and it's params for scalable mode
indObjBaseAddrParams.presProbabilityCounterBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
indObjBaseAddrParams.dwProbabilityCounterOffset = m_tileStatsOffset.counterBuffer;
indObjBaseAddrParams.dwProbabilityCounterSize = m_statsSize.counterBuffer;
}
// Need to use presPakTileSizeStasBuffer instead of presTileRecordBuffer, so setting to null
indObjBaseAddrParams.presTileRecordBuffer = nullptr;
indObjBaseAddrParams.dwTileRecordSize = 0;
indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer? &tileRecordBuffer->sResource : nullptr;
indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer? ((m_statsSize.tileSizeRecord) * GetNumTilesInFrame()) : 0;
indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer? m_tileStatsOffset.tileSizeRecord: 0;
}
MOS_STATUS CodechalVdencVp9StateG12::VerifyCommandBufferSize()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if (UseLegacyCommandBuffer()) // legacy mode & resize CommandBuffer Size for every BRC pass
{
if (!m_singleTaskPhaseSupported)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
}
else // virtual engine
{
uint32_t requestedSize =
m_pictureStatesSize +
m_picturePatchListSize +
m_extraPictureStatesSize +
(m_sliceStatesSize * m_numSlices);
requestedSize += requestedSize*m_numPassesInOnePipe;
if (m_hucEnabled && m_brcEnabled)
{
requestedSize += m_brcMaxNumPasses*(m_defaultHucCmdsSize + m_defaultHucPatchListSize);
}
// Running in the multiple VDBOX mode
int currentPipe = GetCurrentPipe();
int currentPass = GetCurrentPass();
if (currentPipe < 0 || currentPipe >= m_numPipe)
{
return MOS_STATUS_INVALID_PARAMETER;
}
if (currentPass < 0 || currentPass >= m_brcMaxNumPasses)
{
return MOS_STATUS_INVALID_PARAMETER;
}
if (IsFirstPipe() && m_osInterface->bUsesPatchList)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
PMOS_COMMAND_BUFFER cmdBuffer;
if (m_osInterface->phasedSubmission)
{
m_osInterface->pfnVerifyCommandBufferSize(m_osInterface, requestedSize, 0);
return eStatus;
}
else
{
cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][(uint32_t)currentPipe][passIndex];
}
if (Mos_ResourceIsNull(&cmdBuffer->OsResource) ||
m_sizeOfVEBatchBuffer < requestedSize)
{
MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = requestedSize;
allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX";
if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
{
if (cmdBuffer->pCmdBase)
{
m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
}
m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
}
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&cmdBuffer->OsResource);
CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
m_sizeOfVEBatchBuffer = requestedSize;
}
if (cmdBuffer->pCmdBase == 0)
{
MOS_LOCK_PARAMS lockParams;
MOS_ZeroMemory(&lockParams, sizeof(lockParams));
lockParams.WriteOnly = true;
cmdBuffer->pCmdPtr = cmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &cmdBuffer->OsResource, &lockParams);
cmdBuffer->iRemaining = m_sizeOfVEBatchBuffer;
cmdBuffer->iOffset = 0;
if (cmdBuffer->pCmdBase == nullptr)
{
return MOS_STATUS_NULL_POINTER;
}
}
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::GetCommandBuffer(
PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
if (UseLegacyCommandBuffer()) // legacy mode
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0));
}
else // virtual engine
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0));
if (m_osInterface->phasedSubmission)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, GetCurrentPipe() + 1));
CodecHalEncodeScalability_EncodePhaseToSubmissionType(IsFirstPipe(), cmdBuffer);
if (IsLastPipe())
{
cmdBuffer->iSubmissionType |= SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE;
}
}
else
{
int currentPipe = GetCurrentPipe();
int currentPass = GetCurrentPass();
if (currentPipe < 0 || currentPipe >= m_numPipe)
{
return MOS_STATUS_INVALID_PARAMETER;
}
uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
*cmdBuffer = m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex];
}
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ReturnCommandBuffer(
PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
if (UseLegacyCommandBuffer()) // legacy mode
{
m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0);
}
else // virtual engine
{
if (m_osInterface->phasedSubmission)
{
m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, GetCurrentPipe() + 1);
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
}
else
{
int currentPipe = GetCurrentPipe();
int currentPass = GetCurrentPass();
if (currentPipe < 0 || currentPipe >= m_numPipe)
{
return MOS_STATUS_INVALID_PARAMETER;
}
if (eStatus == MOS_STATUS_SUCCESS)
{
uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
m_veBatchBuffer[m_virtualEngineBBIndex][currentPipe][passIndex] = *cmdBuffer;
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0);
}
}
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SubmitCommandBuffer(
PMOS_COMMAND_BUFFER cmdBuffer,
bool nullRendering)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
if (UseLegacyCommandBuffer()) // legacy mode
{
if (!IsRenderContext()) // Set VE Hints for video contexts only
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer));
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, nullRendering));
}
else // virtual engine
{
if (!IsLastPipe())
{
return eStatus;
}
if (m_osInterface->phasedSubmission)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, nullRendering));
}
else
{
int currentPass = GetCurrentPass();
for (auto i = 0; i < m_numPipe; i++)
{
uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass;
PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBBIndex][i][passIndex];
if (cmdBuffer->pCmdBase)
{
m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
}
cmdBuffer->pCmdBase = 0;
cmdBuffer->iOffset = cmdBuffer->iRemaining = 0;
}
if (eStatus == MOS_STATUS_SUCCESS)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, nullRendering));
}
}
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SendPrologWithFrameTracking(
PMOS_COMMAND_BUFFER cmdBuffer,
bool frameTrackingRequested,
MHW_MI_MMIOREGISTERS *mmioRegister)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
if (IsRenderContext()) //Render context only
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister));
return eStatus;
}
else // Legacy mode or virtual engine
{
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext));
#endif
if (!IsLastPipe())
{
return eStatus;
}
PMOS_COMMAND_BUFFER commandBufferInUse;
if (m_realCmdBuffer.pCmdBase)
{
commandBufferInUse = &m_realCmdBuffer; //virtual engine mode
}
else
{
if (cmdBuffer && cmdBuffer->pCmdBase)
{
commandBufferInUse = cmdBuffer; //legacy mode
}
else
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
}
commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode;
commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices;
commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices;
commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus;
commandBufferInUse->Attributes.bValidPowerGatingRequest = true;
if (frameTrackingRequested && m_frameTrackingEnabled)
{
commandBufferInUse->Attributes.bEnableMediaFrameTracking = true;
commandBufferInUse->Attributes.resMediaFrameTrackingSurface =
&m_encodeStatusBuf.resStatusBuffer;
commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData;
// Set media frame tracking address offset(the offset from the encoder status buffer page)
commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0;
}
MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface();
genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface();
genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
genericPrologParams.dwStoreDataValue = m_storeData - 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams));
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetSemaphoreMem(
PMOS_RESOURCE semaphoreMem,
PMOS_COMMAND_BUFFER cmdBuffer,
uint32_t value)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
MHW_MI_STORE_DATA_PARAMS storeDataParams;
MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
storeDataParams.pOsResource = semaphoreMem;
storeDataParams.dwResourceOffset = 0;
storeDataParams.dwValue = value;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
cmdBuffer,
&storeDataParams));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SendHWWaitCommand(
PMOS_RESOURCE semaphoreMem,
PMOS_COMMAND_BUFFER cmdBuffer,
uint32_t value)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(semaphoreMem);
MHW_MI_SEMAPHORE_WAIT_PARAMS semaphoreWaitParams;
MOS_ZeroMemory(&semaphoreWaitParams, sizeof(semaphoreWaitParams));
semaphoreWaitParams.presSemaphoreMem = semaphoreMem;
semaphoreWaitParams.bPollingWaitMode = true;
semaphoreWaitParams.dwSemaphoreData = value;
semaphoreWaitParams.CompareOperation = MHW_MI_SAD_EQUAL_SDD;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiSemaphoreWaitCmd(cmdBuffer, &semaphoreWaitParams));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetDmemHuCPakInt()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
uint8_t currPass = (uint8_t)GetCurrentPass();
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
// All bytes in below dmem for fields not used by VP9 to be set to 0xFF.
HucPakIntDmem* dmem = (HucPakIntDmem*)m_osInterface->pfnLockResource(
m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass], &lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
MOS_ZeroMemory(dmem, sizeof(HucPakIntDmem));
// CODECHAL_VDENC_VP9_PAK_INT_DMEM_OFFSETS_SIZE size of offsets in the CODECHAL_VDENC_VP9_HUC_PAK_INT_DMEM struct.
// Reset offsets to 0xFFFFFFFF as unavailable
memset(dmem, 0xFF, m_pakIntDmemOffsetsSize);
dmem->totalSizeInCommandBuffer = GetNumTilesInFrame() * CODECHAL_CACHELINE_SIZE;
dmem->offsetInCommandBuffer = 0xFFFF; // Not used for VP9, all bytes in dmem for fields not used are 0xFF
dmem->picWidthInPixel = (uint16_t)m_frameWidth;
dmem->picHeightInPixel = (uint16_t)m_frameHeight;
dmem->totalNumberOfPaks = m_numPipe;
dmem->codec = m_pakIntVp9CodecId;
dmem->maxPass = m_brcMaxNumPasses; // Only VDEnc CQP and BRC
dmem->currentPass = currPass + 1;
dmem->lastTileBSStartInBytes = m_tileParams[GetNumTilesInFrame() - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8;
dmem->picStateStartInBytes = 0xFFFF;
if (m_enableTileStitchByHW)
{
dmem->StitchEnable = true;
dmem->StitchCommandOffset = 0;
dmem->BBEndforStitch = HUC_BATCH_BUFFER_END;
}
// Offset 0 is for region 1 - output of integrated frame stats from PAK integration kernel
dmem->tileSizeRecordOffset[0] = m_frameStatsOffset.tileSizeRecord;
dmem->vdencStatOffset[0] = m_frameStatsOffset.vdencStats;
dmem->vp9PakStatOffset[0] = m_frameStatsOffset.pakStats;
dmem->vp9CounterBufferOffset[0] = m_frameStatsOffset.counterBuffer;
//Offset 1 - 4 is for region 0 - Input to PAK integration kernel for all tile statistics per pipe
for (auto i = 1; i <= m_numPipe; i++)
{
dmem->numTiles[i - 1] = (GetNumTilesInFrame()) / m_numPipe;
dmem->tileSizeRecordOffset[i] = m_tileStatsOffset.tileSizeRecord + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.tileSizeRecord);
dmem->vdencStatOffset[i] = m_tileStatsOffset.vdencStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.vdencStats);
dmem->vp9PakStatOffset[i] = m_tileStatsOffset.pakStats + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.pakStats);
dmem->vp9CounterBufferOffset[i] = m_tileStatsOffset.counterBuffer + ((i - 1)*(dmem->numTiles[i - 1])*m_statsSize.counterBuffer);
}
m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][currPass]);
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetSequenceStructs()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
#ifdef LINUX
MOS_SURFACE rawSurface;
PCODEC_VP9_ENCODE_SEQUENCE_PARAMS seqParams = (PCODEC_VP9_ENCODE_SEQUENCE_PARAMS)m_encodeParams.pSeqParams;
rawSurface = *(m_encodeParams.psRawSurface);
if (rawSurface.OsResource.Format == Format_A8R8G8B8 ||
rawSurface.OsResource.Format == Format_B10G10R10A2)
{
seqParams->SeqFlags.fields.DisplayFormatSwizzle = 1;
}
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetSequenceStructs());
// All pipe need to go through the picture-level and slice-level commands
m_numPassesInOnePipe = m_numPasses;
m_numPasses = (m_numPasses + 1) * m_numPipe - 1;
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetPictureStructs()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::SetPictureStructs());
m_virtualEngineBBIndex = m_currOriginalPic.FrameIdx;
m_picWidthInMinBlk =
MOS_ALIGN_CEIL(m_oriFrameWidth, CODEC_VP9_MIN_BLOCK_WIDTH);
m_picHeightInMinBlk =
MOS_ALIGN_CEIL(m_oriFrameHeight, CODEC_VP9_MIN_BLOCK_WIDTH);
// When buffers start recycling , we need to know the index of last buffer for next frame.
if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
{
if (!m_hucEnabled)
{
m_numPassesInOnePipe = (m_dysRefFrameFlags != DYS_REF_NONE);
}
if (m_vdencBrcEnabled)
{
//Reduce per pipe passes by 1, as m_numPassesInOnePipe == 1 becomes m_numPassesInOnePipe = 0 for Huc to run
m_dysBrc = true;
m_numPassesInOnePipe = (m_numPassesInOnePipe > 0 ) ? m_numPassesInOnePipe - 1 : m_numPassesInOnePipe;
}
else
{
m_dysCqp = true;
}
m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
}
// This is BRC DYS SinglePass case
// Actually, repak is disabled
if (m_vdencBrcEnabled && (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
{
m_dysBrc = true;
m_numPassesInOnePipe = 1;
m_numPasses = (m_numPassesInOnePipe + 1) * m_numPipe - 1;
}
if (!m_vdencBrcEnabled && (m_dysRefFrameFlags != DYS_REF_NONE))
{
m_dysCqp = true;
}
#ifdef _MMC_SUPPORTED
//WA to clear CCS by VE resolve
if (MEDIA_IS_WA(m_waTable, Wa_1408785368))
{
bool clearccswa = false;
MOS_SURFACE surfaceDetails = {};
m_osInterface->pfnGetResourceInfo(m_osInterface, &m_reconSurface.OsResource, &surfaceDetails);
// Restore CCS if the surface's width/height is not aligned with that of current frame due to resolution change
if ((m_frameNum != 0) &&
((surfaceDetails.dwWidth != m_picWidthInMinBlk) ||
(surfaceDetails.dwHeight != m_picHeightInMinBlk)))
{
clearccswa = true;
}
if (clearccswa && m_mmcState && m_mmcState->IsMmcEnabled())
{
m_osInterface->pfnDecompResource(m_osInterface, &m_reconSurface.OsResource);
m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
}
}
#endif
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ExecutePictureLevel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize());
PerfTagSetting perfTag;
perfTag.Value = 0;
perfTag.Mode = (uint16_t)m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
perfTag.CallType = CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE;
perfTag.PictureCodingType = m_pictureCodingType;
m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
if ((m_dysRefFrameFlags == DYS_REF_NONE) && m_pakOnlyModeEnabledForLastPass)
{
//This flag sets pak-only mode in slbb for RePak pass. In single-pass mode, this flag should be disabled.
m_vdencPakonlyMultipassEnabled = ((m_numPasses > 0) && (IsLastPass())) ? true : false;
}
// Scalable Mode header
if (m_scalableMode)
{
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
forceWakeupParams.bMFXPowerWellControl = true;
forceWakeupParams.bMFXPowerWellControlMask = true;
forceWakeupParams.bHEVCPowerWellControl = true;
forceWakeupParams.bHEVCPowerWellControlMask = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(
&cmdBuffer,
&forceWakeupParams));
bool requestFrameTracking = m_singleTaskPhaseSupported ? IsFirstPass() : IsLastPass();
// In scalable mode, command buffer header is sent on last pipe only
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
}
// for VDENC dynamic scaling, here are the steps we need to process
// 1. Use PAK to down scale the reference picture (PASS 0)
// 2. Run VDENC to stream out PakObjCmd (PASS 0)
// 3. Run VDENC (with PAK only multi pass enabled) to stream in PakObjCmd from previous pass (PASS 0)
// 4. Repak (PASS 1) - it is only for CQP mode
// 5. Extra note: Repak is disabled for BRC Dynamic scaling single pass mode
if (m_dysRefFrameFlags != DYS_REF_NONE)
{
if (m_currPass == 0)
{
// Turn off scalability and Tiling for Dynamic scaling pass 0 for reference scaling
uint8_t logTileRows = m_vp9PicParams->log2_tile_rows;
uint8_t logTileColumns = m_vp9PicParams->log2_tile_columns;
bool scalableMode = m_scalableMode;
uint8_t numPipe = m_numPipe;
m_vp9PicParams->log2_tile_rows = 0;
m_vp9PicParams->log2_tile_columns = 0;
m_scalableMode = false;
m_numPipe = 1;
// Execute Reference scaling pass
CODECHAL_ENCODE_CHK_STATUS_RETURN(DysRefFrames());
// Restore scalability and Tiling status for subsequent passes
m_vp9PicParams->log2_tile_rows = logTileRows;
m_vp9PicParams->log2_tile_columns = logTileColumns;
m_scalableMode = scalableMode;
m_numPipe = numPipe;
if (m_dysVdencMultiPassEnabled)
{
m_vdencPakObjCmdStreamOutEnabled = true;
m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
// enable single task phase here since we need to combine the pakobj streamout and pakonly pass into one batch buffer
m_singleTaskPhaseSupported = true;
m_firstTaskInPhase = true;
if (Mos_ResourceIsNull(&m_resVdencDysPictureState2NdLevelBatchBuffer))
{
MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = m_vdencPicStateSecondLevelBatchBufferSize;
allocParamsForBufferLinear.pBufName = "VDEnc DYS Picture Second Level Batch Buffer";
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_resVdencDysPictureState2NdLevelBatchBuffer);
if (eStatus != MOS_STATUS_SUCCESS)
{
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate VDEnc DYS Picture Second Level Batch Buffer.");
return eStatus;
}
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
else
{
m_hucEnabled = m_dysHucEnabled; // recover huc state
}
}
else if (m_currPass == 1 && m_dysVdencMultiPassEnabled)
{
m_hucEnabled = m_dysHucEnabled; // recover huc state
m_vdencPakonlyMultipassEnabled = true;
m_dysRefFrameFlags = DYS_REF_NONE;
m_currPass = 0; // reset ucCurrPass = 0 to run the Huc
m_lastTaskInPhase = false;
}
}
else
{
if (!(IsLastPass()))
{
m_vdencPakObjCmdStreamOutEnabled = true;
m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
}
else
{
m_vdencPakObjCmdStreamOutEnabled = false;
}
}
if (m_isTilingSupported)
{
MOS_LOCK_PARAMS lockFlagsWriteOnly;
uint8_t* tileStatsData = nullptr;
MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsWriteOnly.WriteOnly = 1;
if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBBIndex].sResource))
{
// Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
auto size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
allocParamsForBufferLinear.dwBytes = size;
allocParamsForBufferLinear.pBufName = "Tile Record Buffer";
CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_tileRecordBuffer[m_virtualEngineBBIndex].sResource));
m_tileRecordBuffer[m_virtualEngineBBIndex].dwSize = size;
auto tileRecordData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
MOS_ZeroMemory(tileRecordData, allocParamsForBufferLinear.dwBytes);
m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource);
}
}
if (m_isTilingSupported && m_scalableMode && m_hucEnabled && IsFirstPipe() && IsFirstPass())
{
// Max row is 4 by VP9 Spec
uint32_t m_maxScalableModeRows = 4;
uint32_t m_maxScalableModeTiles = m_numVdbox * m_maxScalableModeRows;
// Fill Pak integration kernel input tile stats structure
MOS_ZeroMemory(&m_tileStatsOffset, sizeof(StatsInfo));
// TileSizeRecord has to be 4k aligned
m_tileStatsOffset.tileSizeRecord = 0;
// VdencStats has to be 4k aligned
m_tileStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_tileStatsOffset.tileSizeRecord + (m_maxScalableModeTiles * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
// VP9PAKStats has to be 64 byte aligned
m_tileStatsOffset.pakStats = MOS_ALIGN_CEIL((m_tileStatsOffset.vdencStats + (m_maxScalableModeTiles * m_statsSize.vdencStats)), CODECHAL_PAGE_SIZE);
// VP9CounterBuffer has to be 4k aligned
m_tileStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_tileStatsOffset.pakStats + (m_maxScalableModeTiles * m_statsSize.pakStats)), CODECHAL_PAGE_SIZE);
MOS_LOCK_PARAMS lockFlagsWriteOnly;
uint8_t* tileStatsData = nullptr;
MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsWriteOnly.WriteOnly = 1;
if (Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource))
{
// Allocate Tile Stats Buffer for PAK integration and to be used everywhere for tile stats
MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL((m_tileStatsOffset.counterBuffer + (m_maxScalableModeTiles * m_statsSize.counterBuffer)), CODECHAL_PAGE_SIZE);
allocParamsForBufferLinear.pBufName = "GEN12 Tile Level Statistics Buffer";
m_tileStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource));
m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].dwSize = allocParamsForBufferLinear.dwBytes;
tileStatsData = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource, &lockFlagsWriteOnly);
MOS_ZeroMemory(tileStatsData, allocParamsForBufferLinear.dwBytes);
m_osInterface->pfnUnlockResource(m_osInterface, &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource);
}
}
if (IsFirstPass())
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPakInsertObjBatchBuf(&m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx]));
}
int currPass = GetCurrentPass();
if ((m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencDysPictureState2NdLevelBatchBuffer));
}
else
{
if (IsFirstPipe())
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ConstructPicStateBatchBuf(&m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex]));
}
if (!m_scalableMode)
{
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams;
MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS));
forceWakeupParams.bMFXPowerWellControl = true;
forceWakeupParams.bMFXPowerWellControlMask = true;
forceWakeupParams.bHEVCPowerWellControl = true;
forceWakeupParams.bHEVCPowerWellControlMask = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd(&cmdBuffer, &forceWakeupParams));
ReturnCommandBuffer(&cmdBuffer);
}
}
if (m_vdencBrcEnabled && IsFirstPipe())
{
// Invoke BRC init/reset FW
if (m_brcInit || m_brcReset)
{
if (!m_singleTaskPhaseSupported)
{
//Reset earlier set PAK perf tag
m_osInterface->pfnResetPerfBufferID(m_osInterface);
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcInitReset());
m_brcInit = m_brcReset = false;
}
// For multipass and singlepass+RePAK we call BRC update for all passes except last pass (RePAK)
// For single pass w/o RePAK (1 total pass) we call BRC update on one and only pass
if (!IsLastPass() || (m_currPass == 0 && m_numPasses == 0))
{
bool origFrameTrackingHeader = false;
bool origSingleTaskPhase = m_singleTaskPhaseSupported;
// If this is the case of Dynamic Scaling + BRC Pass 0' VDENC + Pak pass
// Disable SingleTaskPhase before running 1st BRC update
// To run HPU0 on the next pass i.e Pak only pass, we make Pass 1 as Pass 0 in which case the
// BRC dmem buffer( resVdencBrcUpdateDmemBuffer[0] ) will get overridden if we do not submit BRC command now.
if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
{
//Reset Frame Tracking Header for this submission
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
origFrameTrackingHeader = cmdBuffer.Attributes.bEnableMediaFrameTracking;
cmdBuffer.Attributes.bEnableMediaFrameTracking = false;
ReturnCommandBuffer(&cmdBuffer);
m_singleTaskPhaseSupported = false;
}
if (!m_singleTaskPhaseSupported)
{
//Reset performance buffer used for BRC init
m_osInterface->pfnResetPerfBufferID(m_osInterface);
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCBrcUpdate());
//Restore Original Frame Tracking Header
if (m_dysBrc && m_dysRefFrameFlags != DYS_REF_NONE)
{
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
cmdBuffer.Attributes.bEnableMediaFrameTracking = origFrameTrackingHeader;
ReturnCommandBuffer(&cmdBuffer);
}
//Restore the original state of SingleTaskPhaseSupported flag
m_singleTaskPhaseSupported = origSingleTaskPhase;
}
}
// run HuC_VP9Prob first pass (it runs in parallel with ENC)
if (m_hucEnabled)
{
if (IsFirstPipe() && (IsFirstPass() || IsLastPass() || m_vdencBrcEnabled)) // Before the first PAK pass and for RePak pass
{
if (!m_singleTaskPhaseSupported)
{
//Reset earlier set PAK perf tag
m_osInterface->pfnResetPerfBufferID(m_osInterface);
// Add Hpu tag here after updated
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU);
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(HuCVp9Prob());
if (!m_singleTaskPhaseSupported)
{
//reset performance buffer used for HPU update
m_osInterface->pfnResetPerfBufferID(m_osInterface);
}
}
}
else
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(RefreshFrameInternalBuffers());
}
// set HCP_SURFACE_STATE values
MHW_VDBOX_SURFACE_PARAMS surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID + 1];
for (uint8_t i = 0; i <= CODECHAL_HCP_ALTREF_SURFACE_ID; i++)
{
MOS_ZeroMemory(&surfaceParams[i], sizeof(surfaceParams[i]));
surfaceParams[i].Mode = m_mode;
surfaceParams[i].ucSurfaceStateId = i;
surfaceParams[i].ChromaType = m_outputChromaFormat;
surfaceParams[i].bSrc8Pak10Mode = (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth) && (!m_vp9SeqParams->SeqFlags.fields.SourceBitDepth);
switch (m_vp9SeqParams->SeqFlags.fields.EncodedBitDepth)
{
case VP9_ENCODED_BIT_DEPTH_10: //10 bit encoding
{
surfaceParams[i].ucBitDepthChromaMinus8 = 2;
surfaceParams[i].ucBitDepthLumaMinus8 = 2;
break;
}
default:
{
surfaceParams[i].ucBitDepthChromaMinus8 = 0;
surfaceParams[i].ucBitDepthLumaMinus8 = 0;
break;
}
}
}
// For PAK engine, we do NOT use scaled reference images even if dynamic scaling is enabled
PMOS_SURFACE refSurface[3], refSurfaceNonScaled[3], dsRefSurface4x[3], dsRefSurface8x[3];
for (auto i = 0; i < 3; i++)
{
refSurface[i] = refSurfaceNonScaled[i] = dsRefSurface4x[i] = dsRefSurface8x[i] = nullptr;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpSrcSurfaceParams(surfaceParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
if (!m_singleTaskPhaseSupported)
{
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE);
}
// Non scalable mode header
if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
{
// Send command buffer header at the beginning (OS dependent)
// frame tracking tag is only added in the last command buffer header
bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
}
// Place hw semaphore on all other pipe to wait for first pipe HUC to finish.
int currPipe = GetCurrentPipe();
if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
{
if (!IsFirstPipe())
{
if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[currPipe].sResource))
{
// On second pipe, wait here for huc to finish on first pipe
SendHWWaitCommand(&m_hucDoneSemaphoreMem[currPipe].sResource, &cmdBuffer, (currPass + 1));
SetSemaphoreMem(&m_hucDoneSemaphoreMem[currPipe].sResource, &cmdBuffer, 0);
}
}
}
// Repak conditional batch buffer end based on repak flag written by Huc to HUC_STATUS regster
if (m_hucEnabled && (m_numPasses > 0) && IsLastPass())
{
// Insert conditional batch buffer end
// Bit 30 has been added as a success condition, therefore this needs to be masked to only check 31 for RePAK
// or else if HuC decides not to do RePAK for conditional RePAK yet terminates successfully RePAK will still happen.
// Success = bit 30 set to 1, Do RePAK = bit 31 set to 1, value is always 0; if 0 < memory, continue
MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams;
MOS_ZeroMemory(
&miConditionalBatchBufferEndParams,
sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
miConditionalBatchBufferEndParams.presSemaphoreBuffer =
&m_resHucPakMmioBuffer;
// Make the DisableCompareMask 0, so that the HW will do AND operation on DW0 with Mask DW1, refer to HuCVp9Prob() for the settings
// and compare the result against the Semaphore data which in our case dwValue = 0.
// If result > dwValue then continue execution otherwise terminate the batch buffer
miConditionalBatchBufferEndParams.bDisableCompareMask = false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
&cmdBuffer,
&miConditionalBatchBufferEndParams));
}
if (IsFirstPipe())
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES));
}
//Send VD_CONTROL_STATE Pipe Initialization
MHW_MI_VD_CONTROL_STATE_PARAMS vdCtrlParam;
MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
vdCtrlParam.initialization = true;
MhwMiInterfaceG12 *miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
// set HCP_PIPE_MODE_SELECT values
PMHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams = nullptr;
pipeModeSelectParams = m_vdencInterface->CreateMhwVdboxPipeModeSelectParams();
SetHcpPipeModeSelectParams(*pipeModeSelectParams);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, false));
// Decoded picture
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
// Source input
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
// Last reference picture
if (refSurface[0])
{
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]));
}
// Golden reference picture
if (refSurface[1])
{
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]));
}
// Alt reference picture
if (refSurface[2])
{
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceState(&surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]));
}
// set HCP_PIPE_BUF_ADDR_STATE values
PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams = nullptr;
pipeBufAddrParams = CreateHcpPipeBufAddrParams(pipeBufAddrParams);
if (pipeBufAddrParams)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetHcpPipeBufAddrParams(*pipeBufAddrParams, refSurface, refSurfaceNonScaled, dsRefSurface4x, dsRefSurface8x));
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams));
}
// set HCP_IND_OBJ_BASE_ADDR_STATE values
MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams;
SetHcpIndObjBaseAddrParams(indObjBaseAddrParams);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams));
// Send VD_CONTROL_STATE Pipe Initialization
MOS_ZeroMemory(&vdCtrlParam, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS));
vdCtrlParam.vdencEnabled = true;
vdCtrlParam.vdencInitialization = true;
miInterfaceG12 = static_cast <MhwMiInterfaceG12 *>(m_miInterface);
CODECHAL_ENCODE_CHK_STATUS_RETURN((miInterfaceG12)->AddMiVdControlStateCmd(&cmdBuffer, &vdCtrlParam));
// Change ref surfaces to scaled for VDENC for DYS
if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
{
surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID].psSurface = refSurface[0];
surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID].psSurface = refSurface[1];
surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID].psSurface = refSurface[2];
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeModeSelectCmd(&cmdBuffer, pipeModeSelectParams));
if (pipeModeSelectParams)
{
MOS_Delete(pipeModeSelectParams);
pipeModeSelectParams = nullptr;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencSrcSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID]));
if (m_pictureCodingType == I_TYPE)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID]));
}
else
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_LAST_SURFACE_ID]));
if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
{
if (m_refFrameFlags & 0x02)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_GOLDEN_SURFACE_ID]));
}
if (m_refFrameFlags & 0x04)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencRefSurfaceStateCmd(&cmdBuffer, &surfaceParams[CODECHAL_HCP_ALTREF_SURFACE_ID]));
}
}
}
MHW_VDBOX_SURFACE_PARAMS dsSurfaceParams[2]; // 8x and 4x DS surfaces
SetHcpDsSurfaceParams(&dsSurfaceParams[0]);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencDsRefSurfaceStateCmd(&cmdBuffer, &dsSurfaceParams[0], 2));
if (pipeBufAddrParams)
{
pipeBufAddrParams->presVdencTileRowStoreBuffer = &m_vdencTileRowStoreBuffer;
pipeBufAddrParams->presVdencCumulativeCuCountStreamoutSurface = &m_vdencCumulativeCuCountStreamoutSurface;
pipeBufAddrParams->bDynamicScalingEnable = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
pipeBufAddrParams->pRawSurfParam = &surfaceParams[CODECHAL_HCP_SRC_SURFACE_ID];
pipeBufAddrParams->pDecodedReconParam = &surfaceParams[CODECHAL_HCP_DECODED_SURFACE_ID];
#ifdef _MMC_SUPPORTED
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetPipeBufAddr(pipeBufAddrParams));
#endif
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencPipeBufAddrCmd(&cmdBuffer, pipeBufAddrParams));
MOS_Delete(pipeBufAddrParams);
pipeBufAddrParams = nullptr;
}
MHW_BATCH_BUFFER secondLevelBatchBuffer;
MOS_ZeroMemory(&secondLevelBatchBuffer, sizeof(secondLevelBatchBuffer));
secondLevelBatchBuffer.dwOffset = 0;
secondLevelBatchBuffer.bSecondLevel = true;
if (m_hucEnabled)
{
secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferWrite[0];
}
else
{
if (m_dysRefFrameFlags != DYS_REF_NONE && m_dysVdencMultiPassEnabled)
{
secondLevelBatchBuffer.OsResource = m_resVdencDysPictureState2NdLevelBatchBuffer;
}
else
{
secondLevelBatchBuffer.OsResource = m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
}
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(
&cmdBuffer,
&secondLevelBatchBuffer));
CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS& pipeBufAddrParams,
PMOS_SURFACE* refSurface,
PMOS_SURFACE* refSurfaceNonScaled,
PMOS_SURFACE* dsRefSurface4x,
PMOS_SURFACE* dsRefSurface8x)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
pipeBufAddrParams = {};
pipeBufAddrParams.Mode = m_mode;
pipeBufAddrParams.psPreDeblockSurface = &m_reconSurface;
pipeBufAddrParams.psPostDeblockSurface = &m_reconSurface;
pipeBufAddrParams.psRawSurface = m_rawSurfaceToPak;
pipeBufAddrParams.presMfdDeblockingFilterRowStoreScratchBuffer =
&m_resDeblockingFilterLineBuffer;
pipeBufAddrParams.presDeblockingFilterTileRowStoreScratchBuffer =
&m_resDeblockingFilterTileLineBuffer;
pipeBufAddrParams.presDeblockingFilterColumnRowStoreScratchBuffer =
&m_resDeblockingFilterTileColumnBuffer;
pipeBufAddrParams.presMetadataLineBuffer = &m_resMetadataLineBuffer;
pipeBufAddrParams.presMetadataTileLineBuffer = &m_resMetadataTileLineBuffer;
pipeBufAddrParams.presMetadataTileColumnBuffer = &m_resMetadataTileColumnBuffer;
pipeBufAddrParams.presCurMvTempBuffer = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex);
pipeBufAddrParams.bDynamicScalingEnable = (m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled;
if (m_mmcState && m_mmcState->IsMmcEnabled() && m_reconSurface.bCompressible)
{
pipeBufAddrParams.PreDeblockSurfMmcState = MOS_MEMCOMP_HORIZONTAL;
pipeBufAddrParams.PostDeblockSurfMmcState = pipeBufAddrParams.PreDeblockSurfMmcState;
}
else
{
pipeBufAddrParams.PreDeblockSurfMmcState = MOS_MEMCOMP_DISABLED;
}
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetPipeBufAddr(&pipeBufAddrParams));
// Huc first pass doesn't write probabilities to output prob region but only updates to the input region. HuC run before repak writes to the ouput region.
uint8_t frameCtxIdx = 0;
if (m_hucEnabled && IsLastPass())
{
pipeBufAddrParams.presVp9ProbBuffer = &m_resHucProbOutputBuffer;
}
else
{
frameCtxIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
CODECHAL_ENCODE_ASSERT(frameCtxIdx < CODEC_VP9_NUM_CONTEXTS);
pipeBufAddrParams.presVp9ProbBuffer = &m_resProbBuffer[frameCtxIdx];
}
pipeBufAddrParams.presVp9SegmentIdBuffer = &m_resSegmentIdBuffer;
pipeBufAddrParams.presHvdTileRowStoreBuffer = &m_resHvcTileRowstoreBuffer;
pipeBufAddrParams.ps4xDsSurface = m_trackedBuf->Get4xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
pipeBufAddrParams.ps8xDsSurface = m_trackedBuf->Get8xDsReconSurface(CODEC_CURR_TRACKED_BUFFER);
pipeBufAddrParams.presVdencIntraRowStoreScratchBuffer = &m_resVdencIntraRowStoreScratchBuffer;
pipeBufAddrParams.dwNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
{
pipeBufAddrParams.presVdencStreamOutBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource;
pipeBufAddrParams.dwVdencStatsStreamOutOffset = m_tileStatsOffset.vdencStats;
}
else
{
pipeBufAddrParams.presVdencStreamOutBuffer = &m_resVdencBrcStatsBuffer;
pipeBufAddrParams.dwVdencStatsStreamOutOffset = 0;
}
pipeBufAddrParams.presStreamOutBuffer = nullptr;
if (m_scalableMode && m_hucEnabled && m_isTilingSupported)
{
PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex];
bool useTileStatisticsBuffer = tileStatisticsBuffer && !Mos_ResourceIsNull(&tileStatisticsBuffer->sResource);
// the new framestats streamout will now be the tile level stats buffer because each pak is spewing out tile level stats
pipeBufAddrParams.presFrameStatStreamOutBuffer = useTileStatisticsBuffer ? &tileStatisticsBuffer->sResource : nullptr;
pipeBufAddrParams.dwFrameStatStreamOutOffset = useTileStatisticsBuffer ? m_tileStatsOffset.pakStats : 0;
//Main Frame Stats are integrated by PAK integration kernel
}
else
{
pipeBufAddrParams.presFrameStatStreamOutBuffer = &m_resFrameStatStreamOutBuffer;
pipeBufAddrParams.dwFrameStatStreamOutOffset = 0;
}
pipeBufAddrParams.presSseSrcPixelRowStoreBuffer = &m_resSseSrcPixelRowStoreBuffer;
pipeBufAddrParams.presVdencStreamInBuffer = &m_resVdencStreamInBuffer[m_currRecycledBufIdx];
pipeBufAddrParams.presSegmentMapStreamOut = &m_resVdencSegmentMapStreamOut;
pipeBufAddrParams.presPakCuLevelStreamoutBuffer =
Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ? nullptr : &m_resPakcuLevelStreamoutData.sResource;
if (m_dysRefFrameFlags != DYS_REF_NONE)
{
pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer =
(m_vdencPakObjCmdStreamOutEnabled) ? m_resVdencPakObjCmdStreamOutBuffer : nullptr;
}
else
{
pipeBufAddrParams.presVdencPakObjCmdStreamOutBuffer = m_resVdencPakObjCmdStreamOutBuffer = &m_resMbCodeSurface;
}
if (m_pictureCodingType != I_TYPE)
{
for (auto i = 0; i < 3; i++)
{
CODECHAL_ENCODE_CHK_NULL_RETURN(refSurface[i]);
CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface4x[i]);
CODECHAL_ENCODE_CHK_NULL_RETURN(dsRefSurface8x[i]);
if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
{
pipeBufAddrParams.presReferences[i] = &refSurfaceNonScaled[i]->OsResource;
pipeBufAddrParams.presReferences[i+4] = &refSurfaceNonScaled[i]->OsResource;
}
else
{
pipeBufAddrParams.presReferences[i] = &refSurface[i]->OsResource;
}
pipeBufAddrParams.presVdencReferences[i] = &refSurface[i]->OsResource;
pipeBufAddrParams.presVdenc4xDsSurface[i] = &dsRefSurface4x[i]->OsResource;
pipeBufAddrParams.presVdenc8xDsSurface[i] = &dsRefSurface8x[i]->OsResource;
}
if ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled)
{
pipeBufAddrParams.psFwdRefSurface0 = refSurface[0];
pipeBufAddrParams.psFwdRefSurface1 = refSurface[1];
pipeBufAddrParams.psFwdRefSurface2 = refSurface[2];
}
pipeBufAddrParams.presColMvTempBuffer[0] = m_trackedBuf->GetMvTemporalBuffer(m_currMvTemporalBufferIndex ^ 0x01);
}
return eStatus;
}
uint16_t CodechalVdencVp9StateG12::GetNumTilesInFrame()
{
return ((1 << m_vp9PicParams->log2_tile_rows) * (1 << m_vp9PicParams->log2_tile_columns));
}
MOS_STATUS CodechalVdencVp9StateG12::AllocateResources()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::AllocateResources());
// create the tile coding state parameters
CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams =
(PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12) * m_maxTileNumber));
if (m_isTilingSupported)
{
// VDENC tile row store buffer
MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = MOS_ROUNDUP_DIVIDE(m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2;
allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer";
CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_vdencTileRowStoreBuffer),
"Failed to allocate VDENC Tile Row Store Buffer");
uint32_t maxPicWidthInSb = MOS_ROUNDUP_DIVIDE(m_maxPicWidth, CODEC_VP9_SUPER_BLOCK_WIDTH);
uint32_t maxPicHeightInSb = MOS_ROUNDUP_DIVIDE(m_maxPicHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT);
//PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
uint32_t size = maxPicWidthInSb * maxPicHeightInSb * 64 * CODECHAL_CACHELINE_SIZE; // One CU has 16-byte, and there are 64 CU in one SB. But, each tile needs to be aliged to the cache line
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.dwBytes = size;
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_resPakcuLevelStreamoutData.sResource);
CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
//PAK Slice Level Streamut Data. DW60-DW62 in HCP pipe buffer address command
// one LCU has one cache line. Use CU as LCU during creation
allocParamsForBufferLinear.dwBytes = size;
allocParamsForBufferLinear.pBufName = "PAK Slice Level Streamout Data";
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_resPakSliceLevelStreamutData.sResource);
CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
//HCP scalability Sync buffer
size = CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE;
allocParamsForBufferLinear.dwBytes = size;
allocParamsForBufferLinear.pBufName = "Hcp scalability Sync buffer ";
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_hcpScalabilitySyncBuffer.sResource);
CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
m_hcpScalabilitySyncBuffer.dwSize = size;
//HCP Tile Size Streamout Buffer. Use in HCP_IND_OBJ_CMD
size = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
allocParamsForBufferLinear.dwBytes = size;
allocParamsForBufferLinear.pBufName = "HCP Tile Record Buffer";
if (m_scalableMode && m_hucEnabled)
{
//Sizes of each buffer to be loaded into the region 0 as input and 1 loaded out as output.
MOS_ZeroMemory(&m_statsSize, sizeof(StatsInfo));
m_statsSize.tileSizeRecord = m_hcpInterface->GetPakHWTileSizeRecordSize();
m_statsSize.vdencStats = m_brcStatsBufSize; // VDEnc stats size
m_statsSize.pakStats = m_brcPakStatsBufSize; // Frame stats size
m_statsSize.counterBuffer = m_probabilityCounterBufferSize;
// HUC Pak Int DMEM buffer
allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
allocParamsForBufferLinear.pBufName = "Huc Pak Int Dmem Buffer";
for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
{
for (auto j = 0; j < m_brcMaxNumPasses; j++)
{
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_hucPakIntDmemBuffer[i][j]);
CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
}
}
// HuC PAK Int Region 1 programming related stats
MOS_ZeroMemory(&m_frameStatsOffset, sizeof(StatsInfo));
m_frameStatsOffset.tileSizeRecord = 0;
m_frameStatsOffset.vdencStats = MOS_ALIGN_CEIL((m_frameStatsOffset.tileSizeRecord + (m_maxTileNumber * m_statsSize.tileSizeRecord)), CODECHAL_PAGE_SIZE);
m_frameStatsOffset.pakStats = MOS_ALIGN_CEIL((m_frameStatsOffset.vdencStats + m_statsSize.vdencStats), CODECHAL_PAGE_SIZE);
m_frameStatsOffset.counterBuffer = MOS_ALIGN_CEIL((m_frameStatsOffset.pakStats + m_statsSize.pakStats), CODECHAL_PAGE_SIZE);
// HuC PAK Int DMEM region 1 buffer allocation
allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(m_frameStatsOffset.counterBuffer + m_statsSize.counterBuffer, CODECHAL_PAGE_SIZE);
allocParamsForBufferLinear.pBufName = "PAK HUC Integrated Frame Stats Buffer";
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
m_frameStatsPakIntegrationBufferSize = allocParamsForBufferLinear.dwBytes;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_frameStatsPakIntegrationBuffer.sResource));
m_frameStatsPakIntegrationBuffer.dwSize = allocParamsForBufferLinear.dwBytes;
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
uint8_t* data = nullptr;
data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource, &lockFlags);
MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
m_osInterface->pfnUnlockResource(m_osInterface, &m_frameStatsPakIntegrationBuffer.sResource);
// HuC PAK Int region 7, 8
allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(64, CODECHAL_PAGE_SIZE);
allocParamsForBufferLinear.pBufName = "HUC PAK Int Dummy Buffer";
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_hucPakIntDummyBuffer);
CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus);
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&m_hucPakIntDummyBuffer,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
MOS_ZeroMemory(
data,
allocParamsForBufferLinear.dwBytes);
m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntDummyBuffer);
// Allocate region 9 of pak integration to be fed as input to HUC BRC region 7
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
allocParamsForBufferLinear.pBufName = "GEN12 PAK Integration FrameByteCount output";
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_hucPakIntBrcDataBuffer));
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &m_hucPakIntBrcDataBuffer, &lockFlags);
MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes);
m_osInterface->pfnUnlockResource(m_osInterface, &m_hucPakIntBrcDataBuffer);
// Allocate Semaphore memory for HUC to signal other pipe VDENC/PAK to continue
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
allocParamsForBufferLinear.pBufName = "GEN12 HUC done Semaphore Memory";
for (auto i = 0; i < m_numPipe; i++)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_hucDoneSemaphoreMem[i].sResource));
m_hucDoneSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
}
// Allocate Semaphore memory for VDEnc/PAK on all pipes to signal stitch command to stop waiting
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
allocParamsForBufferLinear.pBufName = "GEN12 VDEnc PAK done Semaphore Memory";
for (auto i = 0; i < m_numPipe; i++)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_stitchWaitSemaphoreMem[i].sResource));
m_stitchWaitSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes;
}
// Allocate semaphore memory for HUC HPU or BRC to wait on previous pass' PAK Integration command to finish
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
allocParamsForBufferLinear.dwBytes = sizeof(uint32_t);
allocParamsForBufferLinear.pBufName = "GEN12 VDEnc PAK Int done Semaphore Memory";
CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_pakIntDoneSemaphoreMem.sResource));
m_pakIntDoneSemaphoreMem.dwSize = allocParamsForBufferLinear.dwBytes;
}
}
if (m_enableTileStitchByHW)
{
MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
{
for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; j++)
{
// HuC stitching Data buffer
allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
allocParamsForBufferLinear.pBufName = "VP9 HuC Stitch Data Buffer";
CODECHAL_ENCODE_CHK_STATUS_RETURN(
m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_resHucStitchDataBuffer[i][j]));
MOS_LOCK_PARAMS lockFlagsWriteOnly;
MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsWriteOnly.WriteOnly = 1;
uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource(
m_osInterface,
&m_resHucStitchDataBuffer[i][j],
&lockFlagsWriteOnly);
CODECHAL_ENCODE_CHK_NULL_RETURN(pData);
MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes);
m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]);
}
}
//Second level BB for huc stitching cmd
MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
m_HucStitchCmdBatchBuffer.bSecondLevel = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
m_osInterface,
&m_HucStitchCmdBatchBuffer,
nullptr,
m_hwInterface->m_HucStitchCmdBatchBufferSize));
}
uint32_t aligned_width = MOS_ALIGN_CEIL(m_frameWidth, 64);
uint32_t aligned_height = MOS_ALIGN_CEIL(m_frameHeight, 64);
uint32_t num_lcu = (aligned_width * aligned_height) / (64 * 64);
MOS_ALLOC_GFXRES_PARAMS allocParamsForSurface;
MOS_ZeroMemory(&allocParamsForSurface, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForSurface.Type = MOS_GFXRES_BUFFER;
allocParamsForSurface.TileType = MOS_TILE_LINEAR;
allocParamsForSurface.Format = Format_Buffer;
allocParamsForSurface.dwBytes = num_lcu * 4;
allocParamsForSurface.pBufName = "VDEnc Cumulative CU Count Streamout Surface";
CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForSurface,
&m_vdencCumulativeCuCountStreamoutSurface),
"Failed to allocate VDEnc Cumulative CU Count Streamout Surface");
return eStatus;
}
void CodechalVdencVp9StateG12::FreeResources()
{
CodechalVdencVp9State::FreeResources();
MOS_FreeMemory(m_tileParams);
if (m_isTilingSupported)
{
if (!Mos_ResourceIsNull(&m_vdencTileRowStoreBuffer))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_vdencTileRowStoreBuffer);
}
if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_resPakcuLevelStreamoutData.sResource);
}
if (!Mos_ResourceIsNull(&m_resPakSliceLevelStreamutData.sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_resPakSliceLevelStreamutData.sResource);
}
// Release Hcp scalability Sync buffer
if (!Mos_ResourceIsNull(&m_hcpScalabilitySyncBuffer.sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_hcpScalabilitySyncBuffer.sResource);
}
for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++)
{
if (!Mos_ResourceIsNull(&m_tileRecordBuffer[i].sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_tileRecordBuffer[i].sResource);
}
}
for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileStatsPakIntegrationBuffer); i++)
{
if (!Mos_ResourceIsNull(&m_tileStatsPakIntegrationBuffer[i].sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_tileStatsPakIntegrationBuffer[i].sResource);
}
}
if (!Mos_ResourceIsNull(&m_frameStatsPakIntegrationBuffer.sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_frameStatsPakIntegrationBuffer.sResource);
}
if (!Mos_ResourceIsNull(&m_hucPakIntBrcDataBuffer))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_hucPakIntBrcDataBuffer);
}
for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
{
for (auto j = 0; j < m_brcMaxNumPasses; j++)
{
if (!Mos_ResourceIsNull(&m_hucPakIntDmemBuffer[i][j]))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_hucPakIntDmemBuffer[i][j]);
}
}
}
if (!Mos_ResourceIsNull(&m_hucPakIntDummyBuffer))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_hucPakIntDummyBuffer);
}
for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_stitchWaitSemaphoreMem); i++)
{
if (!Mos_ResourceIsNull(&m_stitchWaitSemaphoreMem[i].sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_stitchWaitSemaphoreMem[i].sResource);
}
}
if (!Mos_ResourceIsNull(&m_pakIntDoneSemaphoreMem.sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_pakIntDoneSemaphoreMem.sResource);
}
for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_hucDoneSemaphoreMem); i++)
{
if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[i].sResource))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_hucDoneSemaphoreMem[i].sResource);
}
}
for (auto i = 0; i < m_numUncompressedSurface; i++)
{
for (auto j = 0; j < CODECHAL_ENCODE_VP9_MAX_NUM_HCP_PIPE; j++)
{
for (auto k = 0; k < 3; k++)
{
PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k];
if (!Mos_ResourceIsNull(&cmdBuffer->OsResource))
{
if (cmdBuffer->pCmdBase)
{
m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource);
}
m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource);
}
}
}
}
}
if (m_enableTileStitchByHW)
{
for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++)
{
for (auto j = 0; j < CODECHAL_ENCODE_VP9_BRC_MAX_NUM_OF_PASSES; j++)
{
// HuC stitching Data buffer
m_osInterface->pfnFreeResource(
m_osInterface,
&m_resHucStitchDataBuffer[i][j]);
}
}
//Second level BB for huc stitching cmd
Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
}
if (!Mos_ResourceIsNull(&m_vdencCumulativeCuCountStreamoutSurface))
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_vdencCumulativeCuCountStreamoutSurface);
}
return;
}
MOS_STATUS CodechalVdencVp9StateG12::SetRowstoreCachingOffsets()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
// Add row store cache support for VDENC Gen12.
if (m_hwInterface->GetHcpInterface()->IsRowStoreCachingSupported())
{
//add row store cache support.
MHW_VDBOX_ROWSTORE_PARAMS rowstoreParams;
rowstoreParams.Mode = m_mode;
rowstoreParams.dwPicWidth = m_frameWidth;
rowstoreParams.ucChromaFormat = ToHCPChromaFormat(m_chromaFormat);
rowstoreParams.ucBitDepthMinus8 = m_bitDepth * 2; // 0(8bit) -> 0, 1(10bit)->2, 2(12bit)->4
m_hwInterface->SetRowstoreCachingOffsets(&rowstoreParams);
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::Initialize(CodechalSetting * settings)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t maxRows = 1;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalVdencVp9State::Initialize(settings));
GetSystemPipeNumberCommon();
if (MOS_VE_SUPPORTED(m_osInterface))
{
m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE));
CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState);
//scalability initialize
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface));
}
m_adaptiveRepakSupported = true;
//This flag enables pak-only mode for RePak pass
m_pakOnlyModeEnabledForLastPass = true;
maxRows = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_HEIGHT;
//Max num of rows = 4 by VP9 Spec
maxRows = MOS_MIN(maxRows, 4);
//Max tile numbers = max of number tiles for single pipe or max muber of tiles for scalable pipes
m_maxTileNumber = MOS_MAX((MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH) / CODECHAL_ENCODE_VP9_MIN_TILE_SIZE_WIDTH), m_numVdbox * maxRows);
m_numPipe = m_numVdbox;
m_scalableMode = (m_numPipe > 1);
m_useVirtualEngine = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRowstoreCachingOffsets());
MOS_USER_FEATURE_VALUE_DATA userFeatureData;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_STATUS eStatusKey = MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ENABLE_HW_STITCH,
&userFeatureData,
m_osInterface->pOsContext);
m_enableTileStitchByHW = userFeatureData.i32Data ? true : false;
userFeatureData.i32Data = 1;
userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_HUC_ENABLE_ID,
&userFeatureData,
m_osInterface->pOsContext);
m_hucEnabled = (userFeatureData.i32Data) ? true : false;
//Enable single pass dynamic scaling by default
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
userFeatureData.i32Data = 1;
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_SINGLE_PASS_DYS_ENABLE_ID,
&userFeatureData,
m_osInterface->pOsContext);
m_dysVdencMultiPassEnabled = (userFeatureData.i32Data) ? false : true;
m_singlePassDys = !m_dysVdencMultiPassEnabled;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
userFeatureData.i32Data = 1;
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
&userFeatureData,
m_osInterface->pOsContext);
m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
m_singleTaskPhaseSupportedInPak = m_singleTaskPhaseSupported;
// For dynamic scaling, the SingleTaskPhaseSupported is set to true and it does not get restored
// to the original value after encoding of the frame. So need to restore to the original state
m_storeSingleTaskPhaseSupported = m_singleTaskPhaseSupported; //Save the SingleTaskPhase state here
// Multi-Pass BRC: currently disabled by default
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_MULTIPASS_BRC_ENABLE_ID,
&userFeatureData,
m_osInterface->pOsContext);
m_multipassBrcSupported = (userFeatureData.i32Data) ? true : false;
m_vdencBrcStatsBufferSize = m_brcStatsBufSize;
m_vdencBrcPakStatsBufferSize = m_brcPakStatsBufSize;
m_brcHistoryBufferSize = m_brcHistoryBufSize;
// HME enabled by default for VP9
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
NULL,
__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_ME_ENABLE_ID,
&userFeatureData,
m_osInterface->pOsContext);
m_hmeSupported = (userFeatureData.i32Data) ? true : false;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
NULL,
__MEDIA_USER_FEATURE_VALUE_VP9_ENCODE_16xME_ENABLE_ID,
&userFeatureData,
m_osInterface->pOsContext);
m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
// disable superHME when HME is disabled
if (m_hmeSupported == false)
{
m_16xMeSupported = false;
}
// UHME disabled
m_32xMeSupported = false;
// VP9 uses a different streamin kernel
m_useNonLegacyStreamin = true;
// Initialize kernel State
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStates());
// Get max binding table count
m_maxBtCount = GetMaxBtCount(); // Need to add the correct BTcount when HME is enabled
return eStatus;
}
/*----------------------------------------------------------------------------
| Name : GetSegmentBlockIndexInFrame
| Purpose : Returns the offset of 32x32 block in the frame based on current x,y 32 block location in current tile
|
| Returns : MOS_STATUS
\---------------------------------------------------------------------------*/
uint32_t CodechalVdencVp9StateG12::GetSegmentBlockIndexInFrame(
uint32_t frameWidth,
uint32_t curr32XInTile,
uint32_t curr32YInTile,
uint32_t currTileStartY64aligned,
uint32_t currTileStartX64aligned)
{
uint32_t frameWidthIn32 = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
uint32_t curr32XInFrame = currTileStartX64aligned / 32 + curr32XInTile;
uint32_t curr32YInFrame = currTileStartY64aligned / 32 + curr32YInTile;
uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
return curr32BlockInFrame;
}
/*----------------------------------------------------------------------------
| Name : InitZigZagToRasterLUTPerTile
| Purpose : Rasterize a tile's 32 blocks' segmap indices, add to frame mapbuffer created for these indices
|
| Returns : MOS_STATUS
\---------------------------------------------------------------------------*/
MOS_STATUS CodechalVdencVp9StateG12::InitZigZagToRasterLUTPerTile(
uint32_t tileHeight,
uint32_t tileWidth,
uint32_t currTileStartYInFrame,
uint32_t currTileStartXInFrame)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
// Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned))
// We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
// We keep this map around until sequence is finished, it's deleted at device destruction.
if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
{
if (m_mapBuffer) // free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
{
MOS_FreeMemory(m_mapBuffer);
}
// Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index.
m_mapBuffer = (uint32_t*)MOS_AllocAndZeroMemory(
(MOS_ALIGN_CEIL(m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
(MOS_ALIGN_CEIL(m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
sizeof(int32_t)); //Framewidth and height are 64 aligned already
}
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mapBuffer);
uint32_t align64Width32 = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
uint32_t align64Height32 = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
uint32_t* mapBufferZigZagPerTile = (uint32_t*)MOS_AllocAndZeroMemory(align64Width32*align64Height32 * sizeof(uint32_t));
CODECHAL_ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);
m_segStreamInHeight = m_frameHeight;
m_segStreamInWidth = m_frameWidth;
uint32_t dwCount32 = 0; //Number of 32 by 32 blocks that will be processed here
for (uint32_t curr32YInTile = 0; curr32YInTile< align64Height32; curr32YInTile++)
{
for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
{
mapBufferZigZagPerTile[dwCount32++] = GetSegmentBlockIndexInFrame(
m_frameWidth,
curr32XInTile,
curr32YInTile,
currTileStartYInFrame,
currTileStartXInFrame);
}
}
// mapBufferZigZagPerTile ---> m_mapBuffer
// | a b c d ... ---> | a b W X c d Y Z ....
// | W X Y Z ...
uint32_t num32blocks = align64Width32 * align64Height32;
uint32_t tileOffsetIndex = m_32BlocksRasterized;
for (uint32_t i = 0, dwRasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
{
for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
{
m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
}
for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
{
m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[dwRasterCount++];
m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[dwRasterCount++];
}
}
if (mapBufferZigZagPerTile) // free per tile map buffer as it has been rasterized and copied into the mapbuffer
{
MOS_FreeMemory(mapBufferZigZagPerTile);
}
// ^ Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
if (width32 != align64Width32) // replicate last column
{
for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
{
m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 1];
m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
}
}
uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
if (height32 != align64Height32) // replicate last row
{
for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
{
m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 2];
m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
}
}
//Index offset to be added to the buffer for the next tile depending on how many blocks were rasterized already in this tile
m_32BlocksRasterized += dwCount32;
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::CalculateVdencPictureStateCommandSize()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams;
uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
stateCmdSizeParams.bHucDummyStream = true;
m_hwInterface->GetHxxStateCommandSize(
CODECHAL_ENCODE_MODE_VP9,
&vdencPictureStatesSize,
&vdencPicturePatchListSize,
&stateCmdSizeParams);
m_defaultPictureStatesSize += vdencPictureStatesSize;
m_defaultPicturePatchListSize += vdencPicturePatchListSize;
m_hwInterface->GetVdencStateCommandsDataSize(
CODECHAL_ENCODE_MODE_VP9,
&vdencPictureStatesSize,
&vdencPicturePatchListSize);
m_defaultPictureStatesSize += vdencPictureStatesSize;
m_defaultPicturePatchListSize += vdencPicturePatchListSize;
return eStatus;
}
PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS CodechalVdencVp9StateG12::CreateHcpPipeBufAddrParams(PMHW_VDBOX_PIPE_BUF_ADDR_PARAMS pipeBufAddrParams)
{
pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12);
return pipeBufAddrParams;
}
MOS_STATUS CodechalVdencVp9StateG12::UpdateCmdBufAttribute(
PMOS_COMMAND_BUFFER cmdBuffer,
bool renderEngineInUse)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
// should not be there. Will remove it in the next change
CODECHAL_ENCODE_FUNCTION_ENTER;
if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe)
{
PMOS_CMD_BUF_ATTRI_VE attriExt =
(PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe);
memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE));
attriExt->bUseVirtualEngineHint =
attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse;
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::AddMediaVfeCmd(
PMOS_COMMAND_BUFFER cmdBuffer,
SendKernelCmdsParams *params)
{
CODECHAL_ENCODE_CHK_NULL_RETURN(params);
MHW_VFE_PARAMS_G12 vfeParams = {};
vfeParams.pKernelState = params->pKernelState;
vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads;
vfeParams.bFusedEuDispatch = false; // legacy mode
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
return MOS_STATUS_SUCCESS;
}
MOS_STATUS CodechalVdencVp9StateG12::HuCVp9PakInt(
PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if (!IsFirstPipe())
{
return eStatus;
}
CODECHAL_DEBUG_TOOL(
uint32_t hucRegionSize[16] = { 0 };
const char* hucRegionName[16] = { "\0" };
hucRegionName[0] = "_MultiPakStreamout_input";
hucRegionSize[0] = m_tileStatsPakIntegrationBufferSize;
hucRegionName[1] = "_IntegratedStreamout_output";
hucRegionSize[1] = m_frameStatsPakIntegrationBufferSize;
hucRegionName[4] = "_HCPPICSTATEInputDummy";
hucRegionSize[4] = sizeof(m_hucPakIntDummyBuffer);
hucRegionName[5] = "_HCPPICSTATEInputDummy";
hucRegionSize[5] = sizeof(m_hucPakIntDummyBuffer);
hucRegionName[6] = "_HCPPICSTATEInputDummy";
hucRegionSize[6] = sizeof(m_hucPakIntDummyBuffer);
hucRegionName[7] = "_HCPPICSTATEInputDummy";
hucRegionSize[7] = sizeof(m_hucPakIntDummyBuffer);
hucRegionName[8] = "_HucStitchDataBuffer";
hucRegionSize[8] = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
hucRegionName[9] = "_BrcDataOutputBuffer"; // This is the pak MMIO region 7 , not 4, of BRC update
hucRegionSize[9] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
hucRegionName[15] = "_TileRecordBuffer";
hucRegionSize[15] = m_maxTileNumber * MOS_ALIGN_CEIL(m_hcpInterface->GetPakHWTileSizeRecordSize(), CODECHAL_CACHELINE_SIZE);
)
MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
MOS_ZeroMemory(&imemParams, sizeof(imemParams));
imemParams.dwKernelDescriptor = m_vdboxHucPakIntegrationKernelDescriptor;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(cmdBuffer, &imemParams));
// pipe mode select
MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
pipeModeSelectParams.Mode = m_mode;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakInt());
MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
dmemParams.presHucDataSource = &m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()];
dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakIntDmem), CODECHAL_CACHELINE_SIZE);
dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(cmdBuffer, &dmemParams));
if (m_enableTileStitchByHW)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
}
MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
MOS_ZeroMemory(&virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS));
virtualAddrParams.regionParams[0].presRegion = &m_tileStatsPakIntegrationBuffer[m_virtualEngineBBIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc
virtualAddrParams.regionParams[0].dwOffset = 0;
virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource; // Region 1 - HuC Frame statistics output
virtualAddrParams.regionParams[1].isWritable = true;
virtualAddrParams.regionParams[4].presRegion = &m_hucPakIntDummyBuffer; // Region 4 - Not used for VP9
virtualAddrParams.regionParams[5].presRegion = &m_hucPakIntDummyBuffer; // Region 5 - Not used for VP9
virtualAddrParams.regionParams[5].isWritable = true;
virtualAddrParams.regionParams[6].presRegion = &m_hucPakIntDummyBuffer; // Region 6 - Not used for VP9
virtualAddrParams.regionParams[6].isWritable = true;
virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntDummyBuffer; // Region 7 - Not used for VP9
if (m_enableTileStitchByHW)
{
virtualAddrParams.regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][GetCurrentPass()]; // Region 8 - data buffer read by HUC for stitching cmd generation
virtualAddrParams.regionParams[8].isWritable = true;
}
virtualAddrParams.regionParams[9].presRegion = &m_hucPakIntBrcDataBuffer; // Region 9 - HuC outputs BRC data
virtualAddrParams.regionParams[9].isWritable = true;
if (m_enableTileStitchByHW)
{
virtualAddrParams.regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc
virtualAddrParams.regionParams[10].isWritable = true;
}
virtualAddrParams.regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer
virtualAddrParams.regionParams[15].dwOffset = 0;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(cmdBuffer, true));
// wait Huc completion (use HEVC bit for now)
MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
vdPipeFlushParams.Flags.bFlushHEVC = 1;
vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams));
// Flush the engine to ensure memory written out
MHW_MI_FLUSH_DW_PARAMS flushDwParams;
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
flushDwParams.bVideoPipelineCacheInvalidate = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams));
auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, cmdBuffer, false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(cmdBuffer));
CODECHAL_DEBUG_TOOL(
// Dump input Pak Integration buffers before running HuC
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[0].presRegion,
0,
hucRegionSize[0],
0,
"_PakIntStitchBuffer",
(virtualAddrParams.regionParams[0].isWritable ? true : false),
GetCurrentPass(),
CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
m_debugInterface->DumpHucDmem(
&m_hucPakIntDmemBuffer[m_currRecycledBufIdx][GetCurrentPass()],
sizeof(HucPakIntDmem),
GetCurrentPass(),
CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
for (auto i = 0; i < 16; i++)
{
if (virtualAddrParams.regionParams[i].presRegion)
{
if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
{
continue;
}
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[i].presRegion,
virtualAddrParams.regionParams[i].dwOffset,
hucRegionSize[i],
i,
hucRegionName[i],
!virtualAddrParams.regionParams[i].isWritable,
GetCurrentPass(),
CodechalHucRegionDumpType::hucRegionDumpPakIntegrate);
}
}
)
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::ConstructPicStateBatchBuf(
PMOS_RESOURCE picStateBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(picStateBuffer);
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
if (!m_singleTaskPhaseSupported || m_firstTaskInPhase)
{
// Send command buffer header at the beginning (OS dependent)
bool requestFrameTracking = false;
if (!m_vp9PicParams->PicFlags.fields.super_frame) {
requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
m_firstTaskInPhase = false;
}
ReturnCommandBuffer(&cmdBuffer);
MOS_LOCK_PARAMS lockFlagsWriteOnly;
MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsWriteOnly.WriteOnly = 1;
uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, picStateBuffer, &lockFlagsWriteOnly);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
MOS_COMMAND_BUFFER constructedCmdBuf;
MOS_ZeroMemory(&constructedCmdBuf, sizeof(constructedCmdBuf));
constructedCmdBuf.pCmdBase = (uint32_t *)data;
constructedCmdBuf.pCmdPtr = (uint32_t *)data;
constructedCmdBuf.iOffset = 0;
constructedCmdBuf.iRemaining = m_vdencPicStateSecondLevelBatchBufferSize;
eStatus = AddCommandsVp9(CODECHAL_CMD1, &constructedCmdBuf);
if (eStatus != MOS_STATUS_SUCCESS)
{
m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add CODECHAL_CMD1 command.");
return eStatus;
}
// HCP_VP9_PIC_STATE
MHW_VDBOX_VP9_ENCODE_PIC_STATE picState;
MOS_ZeroMemory(&picState, sizeof(picState));
picState.pVp9PicParams = m_vp9PicParams;
picState.pVp9SeqParams = m_vp9SeqParams;
picState.ppVp9RefList = &(m_refList[0]);
picState.PrevFrameParams.fields.KeyFrame = m_prevFrameInfo.KeyFrame;
picState.PrevFrameParams.fields.IntraOnly = m_prevFrameInfo.IntraOnly;
picState.PrevFrameParams.fields.Display = m_prevFrameInfo.ShowFrame;
picState.dwPrevFrmWidth = m_prevFrameInfo.FrameWidth;
picState.dwPrevFrmHeight = m_prevFrameInfo.FrameHeight;
picState.ucTxMode = m_txMode;
picState.bSSEEnable = m_vdencBrcEnabled;
picState.bUseDysRefSurface = (m_dysRefFrameFlags != DYS_REF_NONE) && m_dysVdencMultiPassEnabled;
picState.bVdencPakOnlyPassFlag = m_vdencPakonlyMultipassEnabled;
picState.uiMaxBitRate = m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
picState.uiMinBitRate = m_vp9SeqParams->MinBitRate * CODECHAL_ENCODE_BRC_KBPS;
m_hucPicStateOffset = (uint16_t)constructedCmdBuf.iOffset;
eStatus = m_hcpInterface->AddHcpVp9PicStateEncCmd(&constructedCmdBuf, nullptr, &picState);
if (eStatus != MOS_STATUS_SUCCESS)
{
m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add HCP_VP9_PIC_STATE command.");
return eStatus;
}
// HCP_VP9_SEGMENT_STATE
MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
MOS_ZeroMemory(&segmentState, sizeof(segmentState));
segmentState.Mode = m_mode;
segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
uint8_t segmentCount = (m_vp9PicParams->PicFlags.fields.segmentation_enabled) ? CODEC_VP9_MAX_SEGMENTS : 1;
for (uint8_t i = 0; i < segmentCount; i++)
{
segmentState.ucCurrentSegmentId = i;
eStatus = m_hcpInterface->AddHcpVp9SegmentStateCmd(&constructedCmdBuf, nullptr, &segmentState);
if (eStatus != MOS_STATUS_SUCCESS)
{
m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MHW_VDBOX_VP9_SEGMENT_STATE command.");
return eStatus;
}
}
// Adjust cmd buffer offset to have 8 segment state blocks
if (segmentCount < CODEC_VP9_MAX_SEGMENTS)
{
// Max 7 segments, 32 bytes each
uint8_t zeroBlock[m_segmentStateBlockSize * (CODEC_VP9_MAX_SEGMENTS - 1)];
MOS_ZeroMemory(zeroBlock, sizeof(zeroBlock));
Mhw_AddCommandCmdOrBB(&constructedCmdBuf, nullptr, zeroBlock, (CODEC_VP9_MAX_SEGMENTS - segmentCount) * m_segmentStateBlockSize);
}
m_slbbImgStateOffset = (uint16_t)constructedCmdBuf.iOffset;
eStatus = AddCommandsVp9(CODECHAL_CMD2, &constructedCmdBuf);
if (eStatus != MOS_STATUS_SUCCESS)
{
m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add CODECHAL_CMD2 command.");
return eStatus;
}
// BB_END
eStatus = m_miInterface->AddMiBatchBufferEnd(&constructedCmdBuf, nullptr);
if (eStatus != MOS_STATUS_SUCCESS)
{
m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to add MI Batch Buffer End command.");
return eStatus;
}
m_hucSlbbSize = (uint16_t)constructedCmdBuf.iOffset;
m_osInterface->pfnUnlockResource(m_osInterface, picStateBuffer);
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::HuCVp9Prob()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if (!IsFirstPipe())
{
return eStatus;
}
CODECHAL_DEBUG_TOOL(
uint32_t hucRegionSize[16] = { 0 };
const char* hucRegionName[16] = { "\0" };
hucRegionName[0] = "_UpdatedProbBuffer"; // hucRegionName[0] is used to dump region 0 after HuC is run, which has updated probabilities. Input Region 0 is dumped separetely before HuC.
hucRegionSize[0] = 32 * CODECHAL_CACHELINE_SIZE;
hucRegionName[1] = "_CountersBuffer";
hucRegionSize[1] = 193 * CODECHAL_CACHELINE_SIZE;
hucRegionName[2] = "_ProbBuffer";
hucRegionSize[2] = 32 * CODECHAL_CACHELINE_SIZE;
hucRegionName[3] = "_ProbDeltaBuffer";
hucRegionSize[3] = 29 * CODECHAL_CACHELINE_SIZE;
hucRegionName[4] = "_UncompressedHdr";
hucRegionSize[4] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
hucRegionName[5] = "_CompressedHdr";
hucRegionSize[5] = 32 * CODECHAL_CACHELINE_SIZE;
hucRegionName[6] = "_SecondLevelBatchBuffer";
hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
hucRegionName[7] = "_SecondLevelBatchBuffer";
hucRegionSize[7] = m_vdencPicStateSecondLevelBatchBufferSize;
hucRegionName[8] = "_UncompressedHdr";
hucRegionSize[8] = CODECHAL_ENCODE_VP9_PAK_INSERT_UNCOMPRESSED_HEADER;
hucRegionName[9] = "_DefaultProbs";
hucRegionSize[9] = sizeof(Keyframe_Default_Probs) + sizeof(Inter_Default_Probs);
hucRegionName[10] = "_SuperFrameBuffer";
hucRegionSize[10] = CODECHAL_ENCODE_VP9_BRC_SUPER_FRAME_BUFFER_SIZE;
hucRegionName[11] = "_DataExtension";
hucRegionSize[11] = CODECHAL_ENCODE_VP9_VDENC_DATA_EXTENSION_SIZE;
)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
{
bool requestFrameTracking = false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
// Send command buffer header at the beginning (OS dependent)
// frame tracking tag is only added in the last command buffer header
requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
m_firstTaskInPhase = false;
}
int currPass = GetCurrentPass();
if (m_scalableMode && m_isTilingSupported)
{
// Define huc done semaphore to be empty at the start
for (auto i = 0; i < m_numPipe; i++)
{
SetSemaphoreMem(&m_hucDoneSemaphoreMem[i].sResource, &cmdBuffer, 0);
}
// Wait here for pak int done from previous pass
if (IsLastPass())
{
SendHWWaitCommand(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, currPass);
SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, 0);
}
}
// load kernel from WOPCM into L2 storage RAM
MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
MOS_ZeroMemory(&imemParams, sizeof(imemParams));
imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencProbKernelDescriptor;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
// pipe mode select
MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
pipeModeSelectParams.Mode = m_mode;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCVp9Prob());
MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
dmemParams.presHucDataSource = &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx];
dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucProbDmem), CODECHAL_CACHELINE_SIZE);
dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
// Add Virtual addr
MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
// Input regions
virtualAddrParams.regionParams[0].presRegion = &m_resProbBuffer[m_vp9PicParams->PicFlags.fields.frame_context_idx];
virtualAddrParams.regionParams[0].isWritable = true; // Region 0 is both read and write for HuC. Has input probabilities before running HuC and updated probabilities after running HuC, which will then be input to next pass
if (m_scalableMode)
{
virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
virtualAddrParams.regionParams[1].dwOffset = m_frameStatsOffset.counterBuffer;
}
else
{
virtualAddrParams.regionParams[1].presRegion = &m_resProbabilityCounterBuffer;
virtualAddrParams.regionParams[1].dwOffset = 0;
}
// If BRC enabled, BRC Pass 2 output SLBB -> input SLBB for HPU on pass 2 (HPU pass 1 and 3. BRC Update pass 1 and 2)
// BRC Pass 1 output SLBB -> input SLBB for HPU on pass 1
// If BRC not on, Driver prepared SLBB -> input to HPU on both passes
if (m_vdencBrcEnabled)
{
virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
}
else
{
virtualAddrParams.regionParams[7].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
}
virtualAddrParams.regionParams[8].presRegion = &m_resHucPakInsertUncompressedHeaderReadBuffer[m_currRecycledBufIdx];
virtualAddrParams.regionParams[9].presRegion = &m_resHucDefaultProbBuffer;
// Output regions
virtualAddrParams.regionParams[2].presRegion = &m_resHucProbOutputBuffer; // Final probability output from HuC after each pass
virtualAddrParams.regionParams[2].isWritable = true;
virtualAddrParams.regionParams[3].presRegion = &m_resProbabilityDeltaBuffer;
virtualAddrParams.regionParams[3].isWritable = true;
virtualAddrParams.regionParams[4].presRegion = &m_resHucPakInsertUncompressedHeaderWriteBuffer;
virtualAddrParams.regionParams[4].isWritable = true;
virtualAddrParams.regionParams[5].presRegion = &m_resCompressedHeaderBuffer;
virtualAddrParams.regionParams[5].isWritable = true;
virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
virtualAddrParams.regionParams[6].isWritable = true;
virtualAddrParams.regionParams[10].presRegion = &m_resBitstreamBuffer;
virtualAddrParams.regionParams[10].isWritable = true;
virtualAddrParams.regionParams[11].presRegion = &m_resVdencDataExtensionBuffer;
virtualAddrParams.regionParams[11].isWritable = true;
m_hpuVirtualAddrParams = virtualAddrParams;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
// Store HUC_STATUS2 register bit 6 before HUC_Start command
// This bit will be cleared by HW at the end of a HUC workload
// (HUC_Start command with last start bit set).
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
// wait Huc completion (use HEVC bit for now)
MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
vdPipeFlushParams.Flags.bFlushHEVC = 1;
vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
// Flush the engine to ensure memory written out
MHW_MI_FLUSH_DW_PARAMS flushDwParams;
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
flushDwParams.bVideoPipelineCacheInvalidate = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
// Write HUC_STATUS mask: DW1 (mask value)
MHW_MI_STORE_DATA_PARAMS storeDataParams;
MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
storeDataParams.pOsResource = &m_resHucPakMmioBuffer;
storeDataParams.dwResourceOffset = sizeof(uint32_t);
storeDataParams.dwValue = 1 << 31; //Repak bit for HUC is bit 31
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
// store HUC_STATUS register
MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
storeRegParams.dwOffset = 0;
storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
// In case of other pipes running other tiles, signal the vdenc/pak hw commands there to proceed because huc done
if (m_scalableMode && m_isTilingSupported)
{
for (auto i = 1; i < m_numPipe; i++)
{
if (!Mos_ResourceIsNull(&m_hucDoneSemaphoreMem[i].sResource))
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(
SetSemaphoreMem(
&m_hucDoneSemaphoreMem[i].sResource,
&cmdBuffer,
(currPass + 1))
);
}
}
}
// For superframe pass, after HuC executes, write the updated size (combined frame size) to status report
// So app knows total size instead of just the showframe size
if (m_superFrameHucPass)
{
EncodeStatusBuffer* encodeStatusBuf = &m_encodeStatusBuf;
uint32_t baseOffset =
(encodeStatusBuf->wCurrIndex * m_encodeStatusBuf.dwReportSize) +
sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
MHW_MI_COPY_MEM_MEM_PARAMS copyMemMemParams;
MOS_ZeroMemory(&copyMemMemParams, sizeof(copyMemMemParams));
copyMemMemParams.presSrc = virtualAddrParams.regionParams[11].presRegion;
copyMemMemParams.dwSrcOffset = 0; // Updated framesize is 1st DW in buffer
copyMemMemParams.presDst = &encodeStatusBuf->resStatusBuffer;
copyMemMemParams.dwDstOffset = baseOffset + encodeStatusBuf->dwBSByteCountOffset;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(
&cmdBuffer,
&copyMemMemParams));
}
if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectEndCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
}
// Dump input probabilites before running HuC
CODECHAL_DEBUG_TOOL(
CodechalHucRegionDumpType dumpType = m_superFrameHucPass ? CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame : CodechalHucRegionDumpType::hucRegionDumpHpu;
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[0].presRegion,
0,
hucRegionSize[0],
0,
"_ProbBuffer",
(virtualAddrParams.regionParams[0].isWritable ? true : false),
currPass,
dumpType);
)
ReturnCommandBuffer(&cmdBuffer);
// For Temporal scaling, super frame pass is initiated after the command buffer submission in ExecuteSliceLevel.
// So if Single Task Phase is enabled, then we need to explicitly submit the command buffer here to call HuC
if ((!m_singleTaskPhaseSupported && !m_scalableMode) || m_superFrameHucPass)
{
bool renderFlags = m_videoContextUsesNullHw;
CODECHAL_DEBUG_TOOL(
std::string nameCmdPass = (m_superFrameHucPass ? "HPU_SuperFramePass" : "HPU_Pass") + std::to_string(currPass);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
CODECHAL_NUM_MEDIA_STATES,
nameCmdPass.c_str()));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, m_vp9PicParams->PicFlags.fields.super_frame));
ReturnCommandBuffer(&cmdBuffer);
CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderFlags));
CODECHAL_DEBUG_TOOL(
CodechalHucRegionDumpType dumpType = m_superFrameHucPass ? CodechalHucRegionDumpType::hucRegionDumpHpuSuperFrame : CodechalHucRegionDumpType::hucRegionDumpHpu;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
&m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx],
sizeof(HucProbDmem),
currPass,
dumpType));
for (auto i = 0; i < 16; i++)
{
if (virtualAddrParams.regionParams[i].presRegion)
{
if (m_scalableMode && m_isTilingSupported && virtualAddrParams.regionParams[i].isWritable && i != 11)
{
continue;
}
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[i].presRegion,
virtualAddrParams.regionParams[i].dwOffset,
hucRegionSize[i],
i,
hucRegionName[i],
!virtualAddrParams.regionParams[i].isWritable,
currPass,
dumpType);
}
})
}
return eStatus;
}
/*----------------------------------------------------------------------------
| Name : HuCBrcInitReset
| Purpose : Start/Submit VP9 HuC BrcInit kernel to HW
|
| Returns : MOS_STATUS
\---------------------------------------------------------------------------*/
MOS_STATUS CodechalVdencVp9StateG12::HuCBrcInitReset()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
int currPass = GetCurrentPass();
CODECHAL_DEBUG_TOOL(
uint32_t hucRegionSize[16];
const char* hucRegionName[16];
hucRegionName[0] = "_BrcHistoryBuffer";
hucRegionSize[0] = m_brcHistoryBufferSize;
)
MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
#if (_DEBUG || _RELEASE_INTERNAL)
if (m_swBrcMode)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(false));
// Set region params for dumping only
MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
virtualAddrParams.regionParams[0].isWritable = true;
m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
&m_resVdencBrcInitDmemBuffer,
sizeof(HucBrcInitDmem),
0,
CodechalHucRegionDumpType::hucRegionDumpInit));
for (auto i = 0; i < 16; i++)
{
if (virtualAddrParams.regionParams[i].presRegion)
{
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[i].presRegion,
virtualAddrParams.regionParams[i].dwOffset,
hucRegionSize[i],
i,
hucRegionName[i],
!virtualAddrParams.regionParams[i].isWritable,
currPass,
CodechalHucRegionDumpType::hucRegionDumpInit);
}
}
)
return eStatus;
}
#endif
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
if ((!m_singleTaskPhaseSupported || m_firstTaskInPhase) && !m_scalableMode)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
// Send command buffer header at the beginning (OS dependent)
bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
m_firstTaskInPhase = false;
}
// load kernel from WOPCM into L2 storage RAM
MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
MOS_ZeroMemory(&imemParams, sizeof(imemParams));
imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcInitKernelDescriptor;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
// pipe mode select
MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
pipeModeSelectParams.Mode = m_mode;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcInitReset());
m_inputBitsPerFrame = ((m_vp9SeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS) * 100.) / ((m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiNumerator * 100.) / m_vp9SeqParams->FrameRate[m_vp9SeqParams->NumTemporalLayersMinus1].uiDenominator);
m_curTargetFullness = m_vp9SeqParams->TargetBitRate[m_vp9SeqParams->NumTemporalLayersMinus1] * CODECHAL_ENCODE_BRC_KBPS;
// set HuC DMEM param
MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
dmemParams.presHucDataSource = &m_resVdencBrcInitDmemBuffer;
dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcInitDmem), CODECHAL_CACHELINE_SIZE);
dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
virtualAddrParams.regionParams[0].isWritable = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
// Store HUC_STATUS2 register bit 6 before HUC_Start command
// This bit will be cleared by HW at the end of a HUC workload
// (HUC_Start command with last start bit set).
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
// wait Huc completion (use HEVC bit for now)
MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
vdPipeFlushParams.Flags.bFlushHEVC = 1;
vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
// Flush the engine to ensure memory written out
MHW_MI_FLUSH_DW_PARAMS flushDwParams;
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
flushDwParams.bVideoPipelineCacheInvalidate = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_perfProfiler->AddPerfCollectStartCmd((void *)this, m_osInterface, m_miInterface, &cmdBuffer));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
}
ReturnCommandBuffer(&cmdBuffer);
if (!m_singleTaskPhaseSupported)
{
bool renderingFlags = m_videoContextUsesNullHw;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
&m_resVdencBrcInitDmemBuffer,
sizeof(HucBrcInitDmem),
0,
CodechalHucRegionDumpType::hucRegionDumpInit));
for (auto i = 0; i < 16; i++)
{
if (virtualAddrParams.regionParams[i].presRegion)
{
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[i].presRegion,
virtualAddrParams.regionParams[i].dwOffset,
hucRegionSize[i],
i,
hucRegionName[i],
!virtualAddrParams.regionParams[i].isWritable,
0,
CodechalHucRegionDumpType::hucRegionDumpInit);
}
}
)
}
return eStatus;
}
/*----------------------------------------------------------------------------
| Name : HuCBrcUpdate
| Purpose : Start/Submit VP9 HuC BrcUpdate kernel to HW
|
| Returns : MOS_STATUS
\---------------------------------------------------------------------------*/
MOS_STATUS CodechalVdencVp9StateG12::HuCBrcUpdate()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
int currPass = GetCurrentPass();
CODECHAL_DEBUG_TOOL(
uint32_t hucRegionSize[16];
const char* hucRegionName[16];
hucRegionName[0] = "_BrcHistory";
hucRegionSize[0] = m_brcHistoryBufferSize;
hucRegionName[1] = "_VDEncStats";
hucRegionSize[1] = m_vdencBrcStatsBufferSize;
hucRegionName[2] = "_PAKStats";
hucRegionSize[2] = m_vdencBrcPakStatsBufferSize;
hucRegionName[3] = "_InputSLBB";
hucRegionSize[3] = m_vdencPicStateSecondLevelBatchBufferSize;
hucRegionName[4] = "_BRCData";
hucRegionSize[4] = CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE;
hucRegionName[5] = "_ConstData";
hucRegionSize[5] = m_brcConstantSurfaceSize;
hucRegionName[6] = "_OutputSLBB";
hucRegionSize[6] = m_vdencPicStateSecondLevelBatchBufferSize;
hucRegionName[7] = "_PAKMMIO";
hucRegionSize[7] = MOS_ALIGN_CEIL(CODECHAL_ENCODE_VP9_HUC_BRC_DATA_BUFFER_SIZE, CODECHAL_PAGE_SIZE);
)
MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams;
#if (_DEBUG || _RELEASE_INTERNAL)
if (m_swBrcMode)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer, m_pictureCodingType));
// Set region params for dumping only
MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
virtualAddrParams.regionParams[0].isWritable = true;
virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
virtualAddrParams.regionParams[4].isWritable = true;
virtualAddrParams.regionParams[5].presRegion = &m_brcBuffers.resBrcConstantDataBuffer;
virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
virtualAddrParams.regionParams[6].isWritable = true;
virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion(// Dump history IN since it's both IN/OUT, OUT will dump at end of function, rest of buffers are IN XOR OUT (not both)
virtualAddrParams.regionParams[0].presRegion,
virtualAddrParams.regionParams[0].dwOffset,
hucRegionSize[0],
0,
hucRegionName[0],
true,
currPass,
CodechalHucRegionDumpType::hucRegionDumpUpdate));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SoftwareBRC(true));
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
&m_resVdencBrcUpdateDmemBuffer[currPass],
sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
currPass,
CodechalHucRegionDumpType::hucRegionDumpUpdate));
for (auto i = 0; i < 16; i++)
{
if (virtualAddrParams.regionParams[i].presRegion)
{
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[i].presRegion,
virtualAddrParams.regionParams[i].dwOffset,
hucRegionSize[i],
i,
hucRegionName[i],
!virtualAddrParams.regionParams[i].isWritable,
currPass,
CodechalHucRegionDumpType::hucRegionDumpUpdate);
}
}
);
// We increment by the average frame value once for each frame
if (IsFirstPass())
{
m_curTargetFullness += m_inputBitsPerFrame;
}
return eStatus;
}
#endif
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer));
if ((!m_singleTaskPhaseSupported || (m_firstTaskInPhase && !m_brcInit)) && !m_scalableMode)
{
// Send command buffer header at the beginning (OS dependent)
bool requestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, requestFrameTracking));
m_firstTaskInPhase = false;
}
// For Scalability, wait here for previous pass PAK int done
if (m_scalableMode && !IsFirstPass() && m_isTilingSupported && !m_brcInit && !m_brcReset)
{
SendHWWaitCommand(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, currPass);
SetSemaphoreMem(&m_pakIntDoneSemaphoreMem.sResource, &cmdBuffer, 0);
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitBrcConstantBuffer(&m_brcBuffers.resBrcConstantDataBuffer, m_pictureCodingType));
// load kernel from WOPCM into L2 storage RAM
MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams;
MOS_ZeroMemory(&imemParams, sizeof(imemParams));
imemParams.dwKernelDescriptor = m_vdboxHucVp9VdencBrcUpdateKernelDescriptor;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucImemStateCmd(&cmdBuffer, &imemParams));
// pipe mode select
MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams;
pipeModeSelectParams.Mode = m_mode;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucPipeModeSelectCmd(&cmdBuffer, &pipeModeSelectParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCBrcUpdate());
// set HuC DMEM param
MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams;
MOS_ZeroMemory(&dmemParams, sizeof(dmemParams));
dmemParams.presHucDataSource = &m_resVdencBrcUpdateDmemBuffer[currPass];
dmemParams.dwDataLength = MOS_ALIGN_CEIL(sizeof(HucBrcUpdateDmem), CODECHAL_CACHELINE_SIZE);
dmemParams.dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; // how to set?
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucDmemStateCmd(&cmdBuffer, &dmemParams));
// Set surfaces to HuC regions
MOS_ZeroMemory(&virtualAddrParams, sizeof(virtualAddrParams));
// History Buffer - IN/OUT
virtualAddrParams.regionParams[0].presRegion = &m_brcBuffers.resBrcHistoryBuffer;
virtualAddrParams.regionParams[0].isWritable = true;
if (IsFirstPass()) //First BRC pass needs stats from last frame
{
if (m_lastFrameScalableMode) // Frame (n-1) Scalable mode output -> input for frame n, BRC pass 0
{
//VDEnc Stats Buffer - IN
virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
virtualAddrParams.regionParams[1].dwOffset = m_tileStatsOffset.vdencStats;
// Frame (not PAK) Stats Buffer - IN
virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
// PAK MMIO - IN
virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
}
else
{
virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
virtualAddrParams.regionParams[1].dwOffset = 0;
virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
virtualAddrParams.regionParams[2].dwOffset = 0;
virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
}
}
else // Second BRC Update Pass
{
if (m_scalableMode)
{
// VDEnc Stats Buffer - IN
virtualAddrParams.regionParams[1].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
virtualAddrParams.regionParams[1].dwOffset = m_tileStatsOffset.vdencStats;
// Frame (not PAK) Stats Buffer - IN
virtualAddrParams.regionParams[2].presRegion = &m_frameStatsPakIntegrationBuffer.sResource;
virtualAddrParams.regionParams[2].dwOffset = m_frameStatsOffset.pakStats;
// PAK MMIO - IN
virtualAddrParams.regionParams[7].presRegion = &m_hucPakIntBrcDataBuffer;
}
else
{
virtualAddrParams.regionParams[1].presRegion = &m_resVdencBrcStatsBuffer;
virtualAddrParams.regionParams[1].dwOffset = 0;
virtualAddrParams.regionParams[2].presRegion = &m_resFrameStatStreamOutBuffer;
virtualAddrParams.regionParams[2].dwOffset = 0;
virtualAddrParams.regionParams[7].presRegion = &m_brcBuffers.resBrcBitstreamSizeBuffer;
}
}
// Input SLBB (second level batch buffer) - IN
virtualAddrParams.regionParams[3].presRegion = &m_resVdencPictureState2NdLevelBatchBufferRead[currPass][m_vdencPictureState2ndLevelBBIndex];
// BRC Data - OUT
virtualAddrParams.regionParams[4].presRegion = &m_brcBuffers.resBrcHucDataBuffer;
virtualAddrParams.regionParams[4].isWritable = true;
// Const Data - IN
virtualAddrParams.regionParams[5].presRegion = &m_brcBuffers.resBrcConstantDataBuffer;
// Output SLBB - OUT
virtualAddrParams.regionParams[6].presRegion = &m_resVdencPictureState2NdLevelBatchBufferWrite[0];
virtualAddrParams.regionParams[6].isWritable = true;
// Load HuC Regions into Cmd Buf
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucVirtualAddrStateCmd(&cmdBuffer, &virtualAddrParams));
// Store HUC_STATUS2 register bit 6 before HUC_Start command
// This bit will be cleared by HW at the end of a HUC workload
// (HUC_Start command with last start bit set).
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHuCStatus2Register(&cmdBuffer));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hucInterface->AddHucStartCmd(&cmdBuffer, true));
// wait Huc completion (use HEVC bit for now)
MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams;
MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams));
vdPipeFlushParams.Flags.bFlushHEVC = 1;
vdPipeFlushParams.Flags.bWaitDoneHEVC = 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipeFlushParams));
// Flush the engine to ensure memory written out
MHW_MI_FLUSH_DW_PARAMS flushDwParams;
MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams));
flushDwParams.bVideoPipelineCacheInvalidate = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams));
MHW_MI_STORE_DATA_PARAMS storeDataParams;
MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams));
storeDataParams.pOsResource = &m_resHucPakMmioBuffer;
storeDataParams.dwResourceOffset = sizeof(uint32_t);
storeDataParams.dwValue = 1 << 31;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, &storeDataParams));
MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams;
MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams));
storeRegParams.presStoreBuffer = &m_resHucPakMmioBuffer;
storeRegParams.dwOffset = 0;
storeRegParams.dwRegister = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1)->hucStatusRegOffset;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &storeRegParams));
auto mmioRegisters = m_hucInterface->GetMmioRegisters(MHW_VDBOX_NODE_1);
CODECHAL_ENCODE_CHK_STATUS_RETURN(StoreHucErrorStatus(mmioRegisters, &cmdBuffer, false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(InsertConditionalBBEndWithHucErrorStatus(&cmdBuffer));
if (!m_singleTaskPhaseSupported && (m_osInterface->bNoParsingAssistanceInKmd) && !m_scalableMode)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
}
ReturnCommandBuffer(&cmdBuffer);
if (!m_singleTaskPhaseSupported)
{
bool renderingFlags = m_videoContextUsesNullHw;
// Dump history input before HuC runs
CODECHAL_DEBUG_TOOL(
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[0].presRegion,
0,
hucRegionSize[0],
0,
hucRegionName[0],
true,
currPass,
CodechalHucRegionDumpType::hucRegionDumpUpdate);
);
CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, renderingFlags));
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem(
&m_resVdencBrcUpdateDmemBuffer[currPass],
sizeof(HucBrcUpdateDmem), // Change buffer and size to update dmem
currPass,
CodechalHucRegionDumpType::hucRegionDumpUpdate));
for (auto i = 0; i < 16; i++)
{
if (virtualAddrParams.regionParams[i].presRegion)
{
m_debugInterface->DumpHucRegion(
virtualAddrParams.regionParams[i].presRegion,
virtualAddrParams.regionParams[i].dwOffset,
hucRegionSize[i],
i,
hucRegionName[i],
!virtualAddrParams.regionParams[i].isWritable,
currPass,
CodechalHucRegionDumpType::hucRegionDumpUpdate);
}
}
)
}
// We increment by the average frame value once for each frame
if (IsFirstPass())
{
m_curTargetFullness += m_inputBitsPerFrame;
}
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::InitMmcState()
{
CODECHAL_ENCODE_FUNCTION_ENTER;
#ifdef _MMC_SUPPORTED
m_mmcState = MOS_New(CodechalMmcEncodeVp9G12, m_hwInterface, &m_reconSurface, &m_rawSurface);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState);
#endif
return MOS_STATUS_SUCCESS;
}
MOS_STATUS CodechalVdencVp9StateG12::AddCommandsVp9(uint32_t commandType, PMOS_COMMAND_BUFFER cmdBuffer )
{
auto qp = m_vp9PicParams->LumaACQIndex;
auto vp9FrameType = m_vp9PicParams->PicFlags.fields.frame_type;
double QPScale = (vp9FrameType == CODEC_VP9_KEY_FRAME) ? 0.31 : 0.33;
double lambda = QPScale * CODECHAL_VP9_QUANT_AC[qp] / 8;
auto sadQpLambda = lambda * 4 + 0.5;
auto rdQpLambda = lambda * lambda *4 + 0.5;
if (commandType == CODECHAL_CMD1)
{
MHW_VDBOX_VDENC_CMD1_PARAMS cmd1Params;
MOS_ZeroMemory(&cmd1Params, sizeof(cmd1Params));
cmd1Params.Mode = CODECHAL_ENCODE_MODE_VP9;
cmd1Params.usSADQPLambda = (uint16_t)sadQpLambda;
cmd1Params.usRDQPLambda = (uint16_t)rdQpLambda;
cmd1Params.pVp9EncPicParams = m_vp9PicParams;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd1Cmd(cmdBuffer, nullptr, &cmd1Params));
}
else if (commandType == CODECHAL_CMD2)
{
PMHW_VDBOX_VDENC_CMD2_STATE cmd2Params(new MHW_VDBOX_VDENC_CMD2_STATE);
CODECHAL_ENCODE_CHK_NULL_RETURN(cmd2Params);
MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
MOS_ZeroMemory(&segmentState, sizeof(segmentState));
segmentState.Mode = m_mode;
segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
cmd2Params->Mode = m_mode;
cmd2Params->pVp9EncPicParams = m_vp9PicParams;
cmd2Params->pVp9EncSeqParams = m_vp9SeqParams;
cmd2Params->bSegmentationEnabled = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
cmd2Params->pVp9SegmentState = &segmentState;
cmd2Params->bPrevFrameSegEnabled = m_prevFrameSegEnabled;
cmd2Params->bStreamInEnabled = m_segmentMapProvided || m_16xMeEnabled;
cmd2Params->ucNumRefIdxL0ActiveMinus1 = (m_vp9PicParams->PicFlags.fields.frame_type) ? m_numRefFrames - 1 : 0;
cmd2Params->usSADQPLambda = (uint16_t)sadQpLambda;
cmd2Params->usRDQPLambda = (uint16_t)rdQpLambda;
cmd2Params->bPakOnlyMultipassEnable = m_vdencPakonlyMultipassEnabled;
cmd2Params->bDynamicScalingEnabled = ((m_dysRefFrameFlags != DYS_REF_NONE) && !m_dysVdencMultiPassEnabled);
cmd2Params->temporalMVpEnable = m_vp9PicParams->PicFlags.fields.frame_type && !m_prevFrameInfo.KeyFrame;
if ((m_vp9PicParams->RefFlags.fields.LastRefIdx == m_vp9PicParams->RefFlags.fields.AltRefIdx
&& m_vp9PicParams->RefFlags.fields.AltRefIdx == m_vp9PicParams->RefFlags.fields.GoldenRefIdx
&& m_vp9PicParams->RefFlags.fields.GoldenRefIdx == m_vp9PicParams->RefFlags.fields.LastRefIdx)
|| (m_vp9SeqParams->TargetUsage == TU_QUALITY)) {
cmd2Params->temporalMVpEnable = 0;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdencCmd2Cmd(cmdBuffer, nullptr, cmd2Params));
}
return MOS_STATUS_SUCCESS;
}
MOS_STATUS CodechalVdencVp9StateG12::ConfigStitchDataBuffer()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
int32_t currentPass = GetCurrentPass();
MOS_LOCK_PARAMS lockFlagsWriteOnly;
MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsWriteOnly.WriteOnly = 1;
HucCommandData *hucStitchDataBuf = (HucCommandData *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly);
MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
hucStitchDataBuf->TotalCommands = 1;
hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF;
HucInputCmdG12 hucInputCmd;
MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12));
CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
hucInputCmd.CmdMode = HUC_CMD_LIST_MODE;
hucInputCmd.LengthOfTable = (uint8_t)GetNumTilesInFrame();
hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize;
PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBBIndex].sResource;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
m_osInterface,
presSrc,
false,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
m_osInterface,
&m_resBitstreamBuffer,
true,
true));
uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer);
hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF);
hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32);
MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12));
m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]);
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::SetDmemHuCVp9Prob()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
MOS_LOCK_PARAMS lockFlagsWriteOnly;
MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS));
lockFlagsWriteOnly.WriteOnly = 1;
HucProbDmem *dmem = nullptr;
HucProbDmem *dmemTemp = nullptr;
int currPass = GetCurrentPass();
if (IsFirstPass())
{
for (auto i = 0; i < 3; i++)
{
dmem = (HucProbDmem *)m_osInterface->pfnLockResource(
m_osInterface, &m_resHucProbDmemBuffer[i][m_currRecycledBufIdx], &lockFlagsWriteOnly);
CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
if (i == 0)
{
dmemTemp = dmem;
}
MOS_SecureMemcpy(dmem, sizeof(HucProbDmem), m_probDmem, sizeof(HucProbDmem));
if (i != 0)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucProbDmemBuffer[i][m_currRecycledBufIdx]));
dmem = dmemTemp;
}
}
}
else
{
dmem = (HucProbDmem *)m_osInterface->pfnLockResource(
m_osInterface, &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx], &lockFlagsWriteOnly);
CODECHAL_ENCODE_CHK_NULL_RETURN(dmem);
}
// for BRC cases, HuC needs to be called on Pass 1
if (m_superFrameHucPass)
{
dmem->HuCPassNum = CODECHAL_ENCODE_VP9_HUC_SUPERFRAME_PASS;
}
else
{
if (m_dysBrc)
{
//For BRC+Dynamic Scaling, we need to run as HUC pass 1 in the last pass since the curr_pass was changed to 0.
dmem->HuCPassNum = currPass != 0;
}
else
{
//For Non-dynamic scaling BRC cases, HuC needs to run as HuC pass one only in last pass.
dmem->HuCPassNum = ((m_vdencBrcEnabled && currPass == 1) ? 0 : (currPass != 0));
}
}
dmem->FrameWidth = m_oriFrameWidth;
dmem->FrameHeight = m_oriFrameHeight;
for (auto i = 0; i < CODEC_VP9_MAX_SEGMENTS; i++)
{
dmem->SegmentRef[i] = (m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentReferenceEnabled == true) ? m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentReference : CODECHAL_ENCODE_VP9_REF_SEGMENT_DISABLED;
dmem->SegmentSkip[i] = m_vp9SegmentParams->SegData[i].SegmentFlags.fields.SegmentSkipped;
}
if (m_vp9PicParams->PicFlags.fields.frame_type == CODEC_VP9_KEY_FRAME && m_currPass == 0)
{
for (auto i = 1; i < CODEC_VP9_NUM_CONTEXTS; i++)
{
uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(
m_osInterface,
&m_resProbBuffer[i],
&lockFlagsWriteOnly);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
ContextBufferInit(data, 0);
CtxBufDiffInit(data, 0);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(
m_osInterface,
&m_resProbBuffer[i]));
}
}
// in multipasses, only delta seg qp (SegCodeAbs = 0) is supported, confirmed by the arch team
dmem->SegCodeAbs = 0;
dmem->SegTemporalUpdate = m_vp9PicParams->PicFlags.fields.segmentation_temporal_update;
dmem->LastRefIndex = m_vp9PicParams->RefFlags.fields.LastRefIdx;
dmem->GoldenRefIndex = m_vp9PicParams->RefFlags.fields.GoldenRefIdx;
dmem->AltRefIndex = m_vp9PicParams->RefFlags.fields.AltRefIdx;
dmem->RefreshFrameFlags = m_vp9PicParams->RefFlags.fields.refresh_frame_flags;
dmem->RefFrameFlags = m_refFrameFlags;
dmem->ContextFrameTypes = m_contextFrameTypes[m_vp9PicParams->PicFlags.fields.frame_context_idx];
dmem->FrameToShow = GetReferenceBufferSlotIndex(dmem->RefreshFrameFlags);
dmem->FrameCtrl.FrameType = m_vp9PicParams->PicFlags.fields.frame_type;
dmem->FrameCtrl.ShowFrame = m_vp9PicParams->PicFlags.fields.show_frame;
dmem->FrameCtrl.ErrorResilientMode = m_vp9PicParams->PicFlags.fields.error_resilient_mode;
dmem->FrameCtrl.IntraOnly = m_vp9PicParams->PicFlags.fields.intra_only;
dmem->FrameCtrl.ContextReset = m_vp9PicParams->PicFlags.fields.reset_frame_context;
dmem->FrameCtrl.LastRefFrameBias = m_vp9PicParams->RefFlags.fields.LastRefSignBias;
dmem->FrameCtrl.GoldenRefFrameBias = m_vp9PicParams->RefFlags.fields.GoldenRefSignBias;
dmem->FrameCtrl.AltRefFrameBias = m_vp9PicParams->RefFlags.fields.AltRefSignBias;
dmem->FrameCtrl.AllowHighPrecisionMv = m_vp9PicParams->PicFlags.fields.allow_high_precision_mv;
dmem->FrameCtrl.McompFilterMode = m_vp9PicParams->PicFlags.fields.mcomp_filter_type;
dmem->FrameCtrl.TxMode = m_txMode;
dmem->FrameCtrl.RefreshFrameContext = m_vp9PicParams->PicFlags.fields.refresh_frame_context;
dmem->FrameCtrl.FrameParallelDecode = m_vp9PicParams->PicFlags.fields.frame_parallel_decoding_mode;
dmem->FrameCtrl.CompPredMode = m_vp9PicParams->PicFlags.fields.comp_prediction_mode;
dmem->FrameCtrl.FrameContextIdx = m_vp9PicParams->PicFlags.fields.frame_context_idx;
dmem->FrameCtrl.SharpnessLevel = m_vp9PicParams->sharpness_level;
dmem->FrameCtrl.SegOn = m_vp9PicParams->PicFlags.fields.segmentation_enabled;
dmem->FrameCtrl.SegMapUpdate = m_vp9PicParams->PicFlags.fields.segmentation_update_map;
dmem->FrameCtrl.SegUpdateData = m_vp9PicParams->PicFlags.fields.seg_update_data;
dmem->StreamInSegEnable = (uint8_t)m_segmentMapProvided;
dmem->StreamInEnable = (uint8_t)m_segmentMapProvided; // Currently unused, if used may || with HME enabled
dmem->FrameCtrl.log2TileRows = m_vp9PicParams->log2_tile_rows;
dmem->FrameCtrl.log2TileCols = m_vp9PicParams->log2_tile_columns;
dmem->PrevFrameInfo = m_prevFrameInfo;
// For DyS CQP or BRC case, there is no Repak on last pass. So Repak flag is disabled here.
// We also disable repak pass in TU7 speed mode usage for performance reasons.
dmem->RePak = (m_numPasses > 0 && IsLastPass() && !(m_dysCqp || m_dysBrc) && (m_vp9SeqParams->TargetUsage != TU_PERFORMANCE));
if (dmem->RePak && m_adaptiveRepakSupported)
{
MOS_SecureMemcpy(dmem->RePakThreshold, sizeof(uint32_t) * CODEC_VP9_QINDEX_RANGE, m_rePakThreshold, sizeof(uint32_t) * CODEC_VP9_QINDEX_RANGE);
}
dmem->LFLevelBitOffset = m_vp9PicParams->BitOffsetForLFLevel;
dmem->QIndexBitOffset = m_vp9PicParams->BitOffsetForQIndex;
dmem->SegBitOffset = m_vp9PicParams->BitOffsetForSegmentation + 1; // exclude segment_enable bit
dmem->SegLengthInBits = m_vp9PicParams->BitSizeForSegmentation - 1; // exclude segment_enable bit
dmem->UnCompHdrTotalLengthInBits = m_vp9PicParams->BitOffsetForFirstPartitionSize + 16;
dmem->PicStateOffset = m_hucPicStateOffset;
dmem->SLBBSize = m_hucSlbbSize;
dmem->IVFHeaderSize = (m_frameNum == 0) ? 44 : 12;
dmem->VDEncImgStateOffset = m_slbbImgStateOffset;
dmem->PakOnlyEnable = ((dmem->RePak) && m_vdencPakonlyMultipassEnabled) ? 1 : 0;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucProbDmemBuffer[currPass][m_currRecycledBufIdx]));
return eStatus;
}
MOS_STATUS CodechalVdencVp9StateG12::InsertConditionalBBEndWithHucErrorStatus(PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS miEnhancedConditionalBatchBufferEndParams;
MOS_ZeroMemory(
&miEnhancedConditionalBatchBufferEndParams,
sizeof(MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
miEnhancedConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_resHucErrorStatusBuffer;
miEnhancedConditionalBatchBufferEndParams.dwParamsType = MHW_MI_ENHANCED_CONDITIONAL_BATCH_BUFFER_END_PARAMS::ENHANCED_PARAMS;
miEnhancedConditionalBatchBufferEndParams.enableEndCurrentBatchBuffLevel = false;
miEnhancedConditionalBatchBufferEndParams.compareOperation = MAD_EQUAL_IDD;
miEnhancedConditionalBatchBufferEndParams.bDisableCompareMask = false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd(
cmdBuffer,
(PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)(&miEnhancedConditionalBatchBufferEndParams)));
return eStatus;
}