| /* |
| * Copyright (c) 2017-2019, Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| //! |
| //! \file codechal_encode_hevc_g12.cpp |
| //! \brief HEVC dual-pipe encoder for GEN12. |
| //! |
| |
| #include "codechal_encode_hevc_g12.h" |
| #include "codechal_encode_csc_ds_g12.h" |
| #include "codechal_mmc_encode_hevc_g12.h" |
| #include "codechal_encode_wp_g12.h" |
| #include "codechal_kernel_header_g12.h" |
| #include "codechal_kernel_hme_g12.h" |
| #include "codechal_debug.h" |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| #include "igcodeckrn_g12.h" |
| #endif |
| #include "codeckrnheader.h" |
| #include "mhw_vdbox_hcp_g12_X.h" |
| #include "mhw_vdbox_g12_X.h" |
| #include "mhw_mi_g12_X.h" |
| #include "mhw_render_g12_X.h" |
| #include "media_user_settings_mgr_g12.h" |
| #include "cm_queue_rt.h" |
| #include "codechal_debug.h" |
| |
| //! \cond SKIP_DOXYGEN |
| #define CRECOST(lambda, mode, lcu, slice) (Map44LutValue((uint32_t)((lambda) * (m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)])), 0x8F)) |
| #define RDEBITS62(mode, lcu, slice) (GetU62ModeBits((float)((m_modeBits[(lcu)][(mode)][(slice)]) * (m_modeBitsScale[(mode)][(slice)])))) |
| //! \endcond |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetGpuCtxCreatOption() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)) |
| { |
| CodechalEncoderState::SetGpuCtxCreatOption(); |
| } |
| else |
| { |
| m_gpuCtxCreatOpt = MOS_New(MOS_GPUCTX_CREATOPTIONS_ENHANCED); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_gpuCtxCreatOpt); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AddHcpPipeModeSelectCmd(MOS_COMMAND_BUFFER *cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // call MI_VD_CONTROL_STATE before HCP_PIPE_SELECT to init the pipe. |
| { |
| MHW_MI_VD_CONTROL_STATE_PARAMS vdControlStateParams; |
| //set up VD_CONTROL_STATE command |
| { |
| MOS_ZeroMemory(&vdControlStateParams, sizeof(MHW_MI_VD_CONTROL_STATE_PARAMS)); |
| vdControlStateParams.initialization = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| static_cast<MhwMiInterfaceG12 *>(m_miInterface)->AddMiVdControlStateCmd(cmdBuffer, &vdControlStateParams)); |
| } |
| } |
| |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 pipeModeSelectParams; |
| SetHcpPipeModeSelectParams(pipeModeSelectParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams)); |
| |
| return eStatus; |
| } |
| |
| void CodechalEncHevcStateG12::SetHcpPipeModeSelectParams(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS &vdboxPipeModeSelectParams) |
| { |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 &pipeModeSelectParams = |
| static_cast<MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12 &>(vdboxPipeModeSelectParams); |
| pipeModeSelectParams = {}; |
| CodechalEncodeHevcBase::SetHcpPipeModeSelectParams(vdboxPipeModeSelectParams); |
| |
| pipeModeSelectParams.pakPiplnStrmoutEnabled = m_pakPiplStrmOutEnable; |
| pipeModeSelectParams.pakFrmLvlStrmoutEnable = (m_brcEnabled && m_numPipe > 1); |
| |
| if (m_numPipe > 1) |
| { |
| // Running in the multiple VDBOX mode |
| if (IsFirstPipe()) |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT; |
| } |
| else if (IsLastPipe()) |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT; |
| } |
| else |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE; |
| } |
| pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE; |
| } |
| else |
| { |
| pipeModeSelectParams.MultiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY; |
| pipeModeSelectParams.PipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY; |
| } |
| } |
| |
| void CodechalEncHevcStateG12::SetHcpPicStateParams(MHW_VDBOX_HEVC_PIC_STATE &picStateParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalEncodeHevcBase::SetHcpPicStateParams(picStateParams); |
| picStateParams.sseEnabledInVmeEncode = m_sseEnabled; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AddHcpSurfaceStateCmds(MOS_COMMAND_BUFFER *cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MHW_VDBOX_SURFACE_PARAMS srcSurfaceParams; |
| SetHcpSrcSurfaceParams(srcSurfaceParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &srcSurfaceParams)); |
| |
| MHW_VDBOX_SURFACE_PARAMS reconSurfaceParams; |
| SetHcpReconSurfaceParams(reconSurfaceParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &reconSurfaceParams)); |
| |
| // Add the surface state for reference picture, GEN12 HW change |
| reconSurfaceParams.ucSurfaceStateId = CODECHAL_HCP_REF_SURFACE_ID; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpSurfaceCmd(cmdBuffer, &reconSurfaceParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AddHcpPictureStateCmd(MOS_COMMAND_BUFFER *cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MHW_VDBOX_HEVC_PIC_STATE_G12 picStateParams; |
| |
| SetHcpPicStateParams(picStateParams); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPicStateCmd(cmdBuffer, &picStateParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::UpdateYUY2SurfaceInfo( |
| MOS_SURFACE &surface, |
| bool is10Bit) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (surface.Format == Format_YUY2V) |
| { |
| // surface has been updated |
| return eStatus; |
| } |
| |
| if (surface.Format != Format_YUY2 && |
| surface.Format != Format_Y210 && |
| surface.Format != Format_Y216) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (surface.dwWidth < m_oriFrameWidth / 2 || surface.dwHeight < m_oriFrameHeight * 2) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| surface.Format = is10Bit ? Format_Y216V : Format_YUY2V; |
| surface.dwWidth = m_oriFrameWidth; |
| surface.dwHeight = m_oriFrameHeight; |
| |
| surface.YPlaneOffset.iSurfaceOffset = 0; |
| surface.YPlaneOffset.iXOffset = 0; |
| surface.YPlaneOffset.iYOffset = 0; |
| |
| surface.UPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch; |
| surface.UPlaneOffset.iXOffset = 0; |
| surface.UPlaneOffset.iYOffset = surface.dwHeight; |
| |
| surface.VPlaneOffset.iSurfaceOffset = surface.dwHeight * surface.dwPitch; |
| surface.VPlaneOffset.iXOffset = 0; |
| surface.VPlaneOffset.iYOffset = surface.dwHeight; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::InitializePicture(const EncoderParams ¶ms) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::InitializePicture(params)); |
| |
| if (m_resolutionChanged) |
| { |
| ResizeBufferOffset(); |
| } |
| |
| m_sseEnabled = false; |
| // only 420 format support SSE output |
| // see TDR in scalability case, disable SSE for now before HW confirm the capability. |
| if (m_sseSupported && |
| m_hevcSeqParams->chroma_format_idc == HCP_CHROMA_FORMAT_YUV420 && |
| m_numPipe == 1) |
| { |
| m_sseEnabled = true; |
| } |
| |
| // for HEVC VME, HUC based WP is not supported. |
| m_hevcPicParams->bEnableGPUWeightedPrediction = false; |
| |
| m_pakPiplStrmOutEnable = m_sseEnabled || (m_brcEnabled && m_numPipe > 1); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetTileData(m_tileParams, params.dwBitstreamSize)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateTileStatistics()); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResourcesVariableSize()); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetPictureStructs() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetPictureStructs()); |
| |
| if (m_minMaxQpControlEnabled) |
| { |
| //if Min Max QP is on disable Frame Panic Mode |
| m_enableFramePanicMode = false; |
| } |
| |
| // This is an additional (the 5th) PAK pass for BRC panic mode. Enabled for the single pipe case only. |
| // Panic mode is not supported with Min/Max QP |
| if (m_brcEnabled && m_enableFramePanicMode && (false == m_hevcSeqParams->DisableHRDConformance) && |
| (I_TYPE != m_hevcPicParams->CodingType) && |
| (m_numPipe == 1)) |
| { |
| m_numPasses++; |
| } |
| |
| m_virtualEngineBbIndex = m_currOriginalPic.FrameIdx; |
| |
| if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat && |
| (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat) |
| { |
| uint8_t currRefIdx = m_hevcPicParams->CurrReconstructedPic.FrameIdx; |
| UpdateYUY2SurfaceInfo(m_refList[currRefIdx]->sRefBuffer, m_is10BitHevc); |
| |
| if (m_pictureCodingType != I_TYPE) |
| { |
| for (uint32_t i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++) |
| { |
| if (!m_picIdx[i].bValid || !m_currUsedRefPic[i]) |
| { |
| continue; |
| } |
| uint8_t picIdx = m_picIdx[i].ucPicIdx; |
| CODECHAL_ENCODE_ASSERT(picIdx < 127); |
| |
| UpdateYUY2SurfaceInfo((m_refList[picIdx]->sRefBuffer), m_is10BitHevc); |
| } |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ConvertY210ToY210V( |
| PMOS_SURFACE source, |
| PMOS_SURFACE target) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(source); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(target); |
| |
| if (m_oriFrameWidth > target->dwWidth || m_oriFrameHeight > target->dwHeight || |
| m_oriFrameWidth > source->dwWidth || m_oriFrameHeight > source->dwHeight) |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| MOS_LOCK_PARAMS lockRead, lockWrite; |
| MOS_ZeroMemory(&lockRead, sizeof(lockRead)); |
| MOS_ZeroMemory(&lockWrite, sizeof(lockWrite)); |
| |
| lockRead.ReadOnly = 1; |
| lockWrite.WriteOnly = 1; |
| |
| uint16_t *srcData = (uint16_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &source->OsResource, |
| &lockRead); |
| |
| if (srcData == nullptr) |
| { |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| uint8_t *dstData = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &target->OsResource, |
| &lockWrite); |
| |
| if (dstData == nullptr) |
| { |
| // release the lock on srcData acquired above before returning here |
| m_osInterface->pfnUnlockResource(m_osInterface, &source->OsResource); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| uint32_t highBits = MOS_ALIGN_CEIL(target->dwWidth, 32); |
| uint32_t srcPitch = source->dwPitch / sizeof(srcData[0]); |
| uint32_t dstPitch = target->dwPitch / sizeof(dstData[0]); |
| |
| //Y |
| for (uint32_t h = 0; h < m_oriFrameHeight; h++) |
| { |
| for (uint32_t w = 0; w < m_oriFrameWidth; w++) |
| { |
| uint16_t d = srcData[w * 2 + h * srcPitch]; |
| |
| dstData[w + h * dstPitch + 0] = (uint8_t)(d >> 8); |
| dstData[w + h * dstPitch + highBits] = (uint8_t)(d >> 6) & 3; |
| } |
| } |
| |
| uint32_t uvOffset = target->dwPitch * target->dwHeight; |
| |
| //UV |
| for (uint32_t h = 0; h < m_oriFrameHeight; h++) |
| { |
| for (uint32_t w = 0; w < m_oriFrameWidth; w++) |
| { |
| uint16_t d = srcData[w * 2 + 1 + h * srcPitch]; |
| |
| dstData[uvOffset + w + h * dstPitch + 0] = (uint8_t)(d >> 8); |
| dstData[uvOffset + w + h * dstPitch + highBits] = (uint8_t)(d >> 6) & 3; |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ConvertP010ToP010V( |
| PMOS_SURFACE source, |
| PMOS_SURFACE target) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(source); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(target); |
| |
| if (m_oriFrameWidth > target->dwWidth || m_oriFrameHeight > target->dwHeight || |
| m_oriFrameWidth > source->dwWidth || m_oriFrameHeight > source->dwHeight) |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| MOS_LOCK_PARAMS lockRead, lockWrite; |
| MOS_ZeroMemory(&lockRead, sizeof(lockRead)); |
| MOS_ZeroMemory(&lockWrite, sizeof(lockWrite)); |
| lockRead.ReadOnly = 1; |
| lockWrite.WriteOnly = 1; |
| |
| uint16_t *srcData = (uint16_t *)m_osInterface->pfnLockResource(m_osInterface, |
| &source->OsResource, |
| &lockRead); |
| if (srcData == nullptr) |
| { |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| uint8_t *dstData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &target->OsResource, &lockWrite); |
| if (dstData == nullptr) |
| { |
| // release the lock on srcData acquired above before returning here |
| m_osInterface->pfnUnlockResource(m_osInterface, &source->OsResource); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| uint32_t highBits = MOS_ALIGN_CEIL(target->dwWidth, 32); |
| uint32_t srcPitch = source->dwPitch / sizeof(srcData[0]); |
| uint32_t dstPitch = target->dwPitch / sizeof(dstData[0]); |
| |
| //Y |
| for (uint32_t h = 0; h < m_oriFrameHeight; h++) |
| { |
| for (uint32_t w = 0; w < m_oriFrameWidth; w++) |
| { |
| uint16_t d = srcData[w + h * srcPitch]; |
| |
| dstData[w + h * dstPitch + 0] = (uint8_t)(d >> 8); |
| dstData[w + h * dstPitch + highBits] = (uint8_t)(d >> 6) & 3; |
| } |
| } |
| |
| uint32_t dstUvOffset = target->dwPitch * target->dwHeight; |
| uint32_t srcUvOffset = srcPitch * source->dwHeight; |
| |
| //UV |
| for (uint32_t h = 0; h < m_oriFrameHeight / 2; h++) |
| { |
| for (uint32_t w = 0; w < m_oriFrameWidth; w++) |
| { |
| uint16_t d = srcData[srcUvOffset + w + h * srcPitch]; |
| |
| dstData[dstUvOffset + w + h * dstPitch + 0] = (uint8_t)(d >> 8); |
| dstData[dstUvOffset + w + h * dstPitch + highBits] = (uint8_t)(d >> 6) & 3; |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ConvertYUY2ToYUY2V( |
| PMOS_SURFACE source, |
| PMOS_SURFACE target) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(source); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(target); |
| |
| if (m_oriFrameWidth > target->dwWidth || m_oriFrameHeight > target->dwHeight || |
| m_oriFrameWidth > source->dwWidth || m_oriFrameHeight > source->dwHeight) |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| MOS_LOCK_PARAMS lockRead, lockWrite; |
| MOS_ZeroMemory(&lockRead, sizeof(lockRead)); |
| MOS_ZeroMemory(&lockWrite, sizeof(lockWrite)); |
| |
| lockRead.ReadOnly = 1; |
| lockWrite.WriteOnly = 1; |
| |
| uint8_t *srcData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &source->OsResource, &lockRead); |
| if (srcData == nullptr) |
| { |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| uint8_t *dstData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &target->OsResource, &lockWrite); |
| if (dstData == nullptr) |
| { |
| // release the lock on srcData acquired above before returning here |
| m_osInterface->pfnUnlockResource(m_osInterface, &source->OsResource); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| //Y |
| for (uint32_t h = 0; h < m_oriFrameHeight; h++) |
| { |
| for (uint32_t w = 0; w < m_oriFrameWidth; w++) |
| { |
| uint8_t d = srcData[w * 2 + h * source->dwPitch]; |
| |
| dstData[w + h * target->dwPitch] = d; |
| } |
| } |
| |
| uint32_t uvOffset = target->dwPitch * target->dwHeight; |
| |
| //UV |
| for (uint32_t h = 0; h < m_oriFrameHeight; h++) |
| { |
| for (uint32_t w = 0; w < m_oriFrameWidth; w++) |
| { |
| uint8_t d = srcData[w * 2 + 1 + h * source->dwPitch]; |
| |
| dstData[uvOffset + w + h * target->dwPitch] = d; |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::DownScaling2X( |
| PMOS_SURFACE source, |
| PMOS_SURFACE target) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(source); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(target); |
| |
| if ((source->Format != Format_NV12 && source->Format != Format_YUY2V && source->Format != Format_Y216V) || |
| (target->Format != Format_NV12 && target->Format != Format_YUY2V)) |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| MOS_LOCK_PARAMS lockRead, lockWrite; |
| MOS_ZeroMemory(&lockRead, sizeof(lockRead)); |
| MOS_ZeroMemory(&lockWrite, sizeof(lockWrite)); |
| |
| lockRead.ReadOnly = 1; |
| lockWrite.WriteOnly = 1; |
| |
| uint8_t *srcData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &source->OsResource, &lockRead); |
| if (srcData == nullptr) |
| { |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| uint8_t *dstData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &target->OsResource, &lockWrite); |
| if (dstData == nullptr) |
| { |
| // release the lock on srcData acquired above before returning here |
| m_osInterface->pfnUnlockResource(m_osInterface, &source->OsResource); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| //Y |
| for (uint32_t h = 0, h2 = 0; h < m_oriFrameHeight; h += 2, h2++) |
| { |
| for (uint32_t w = 0, w2 = 0; w < m_oriFrameWidth; w += 2, w2++) |
| { |
| int16_t sum = |
| (int16_t)srcData[(h + 0) * source->dwPitch + w + 0] + |
| (int16_t)srcData[(h + 0) * source->dwPitch + w + 1] + |
| (int16_t)srcData[(h + 1) * source->dwPitch + w + 0] + |
| (int16_t)srcData[(h + 1) * source->dwPitch + w + 1]; |
| |
| sum = sum >> 2; |
| dstData[h2 * target->dwPitch + w2] = (uint8_t)sum; |
| } |
| } |
| |
| srcData = srcData + source->dwHeight * source->dwPitch; |
| dstData = dstData + target->dwHeight * target->dwPitch; |
| |
| uint32_t uvHeightRatio = (source->Format == Format_NV12) ? 2 : 1; |
| |
| //UV |
| for (uint32_t h = 0, h2 = 0; h < m_oriFrameHeight / uvHeightRatio; h += 2, h2++) |
| { |
| for (uint32_t w = 0, w2 = 0; w < m_oriFrameWidth; w += 4, w2 += 2) |
| { |
| // U |
| int16_t sum = |
| (int16_t)srcData[(h + 0) * source->dwPitch + w + 0] + |
| (int16_t)srcData[(h + 0) * source->dwPitch + w + 2] + |
| (int16_t)srcData[(h + 1) * source->dwPitch + w + 0] + |
| (int16_t)srcData[(h + 1) * source->dwPitch + w + 2]; |
| sum = sum >> 2; |
| dstData[h2 * target->dwPitch + w2 + 0] = (uint8_t)sum; |
| |
| // V |
| sum = |
| (int16_t)srcData[(h + 0) * source->dwPitch + w + 1] + |
| (int16_t)srcData[(h + 0) * source->dwPitch + w + 3] + |
| (int16_t)srcData[(h + 1) * source->dwPitch + w + 1] + |
| (int16_t)srcData[(h + 1) * source->dwPitch + w + 3]; |
| sum = sum >> 2; |
| dstData[h2 * target->dwPitch + w2 + 1] = (uint8_t)sum; |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetKernelParams( |
| EncOperation encOperation, |
| MHW_KERNEL_PARAM *kernelParams, |
| uint32_t idx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| kernelParams->iThreadCount = m_hwInterface->GetRenderInterface()->GetHwCaps()->dwMaxThreads; |
| kernelParams->iIdCount = 1; |
| |
| uint32_t curbeAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment(); |
| switch (encOperation) |
| { |
| case ENC_MBENC: |
| { |
| switch (idx) |
| { |
| case MBENC_LCU32_KRNIDX: |
| kernelParams->iBTCount = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU32_BTI), (size_t)curbeAlignment); |
| kernelParams->iBlockWidth = CODECHAL_HEVC_MAX_LCU_SIZE_G9; |
| kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G9; |
| break; |
| |
| case MBENC_LCU64_KRNIDX: |
| kernelParams->iBTCount = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MBENC_LCU64_BTI), (size_t)curbeAlignment); |
| kernelParams->iBlockWidth = CODECHAL_HEVC_MAX_LCU_SIZE_G10; |
| kernelParams->iBlockHeight = CODECHAL_HEVC_MAX_LCU_SIZE_G10; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| break; |
| |
| case ENC_BRC: |
| { |
| switch (idx) |
| { |
| case CODECHAL_HEVC_BRC_INIT: |
| case CODECHAL_HEVC_BRC_RESET: |
| kernelParams->iBTCount = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRC_INITRESET_CURBE), (size_t)curbeAlignment); |
| kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE; |
| kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE; |
| break; |
| |
| case CODECHAL_HEVC_BRC_FRAME_UPDATE: |
| kernelParams->iBTCount = BRC_UPDATE_END - BRC_UPDATE_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment); |
| kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE; |
| kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE; |
| break; |
| |
| case CODECHAL_HEVC_BRC_LCU_UPDATE: |
| kernelParams->iBTCount = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(BRCUPDATE_CURBE), (size_t)curbeAlignment); |
| kernelParams->iBlockWidth = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE; |
| kernelParams->iBlockHeight = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetBindingTable( |
| EncOperation encOperation, |
| PCODECHAL_ENCODE_BINDING_TABLE_GENERIC hevcEncBindingTable, |
| uint32_t idx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hevcEncBindingTable); |
| |
| MOS_ZeroMemory(hevcEncBindingTable, sizeof(*hevcEncBindingTable)); |
| |
| switch (encOperation) |
| { |
| case ENC_MBENC: |
| { |
| switch (idx) |
| { |
| case MBENC_LCU32_KRNIDX: |
| case MBENC_LCU64_KRNIDX: |
| hevcEncBindingTable->dwNumBindingTableEntries = MBENC_B_FRAME_END - MBENC_B_FRAME_BEGIN; |
| hevcEncBindingTable->dwBindingTableStartOffset = MBENC_B_FRAME_BEGIN; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| break; |
| |
| case ENC_BRC: |
| { |
| switch (idx) |
| { |
| case CODECHAL_HEVC_BRC_INIT: |
| hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN; |
| hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_BRC_RESET: |
| hevcEncBindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN; |
| hevcEncBindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_BRC_FRAME_UPDATE: |
| hevcEncBindingTable->dwNumBindingTableEntries = BRC_UPDATE_END - BRC_UPDATE_BEGIN; |
| hevcEncBindingTable->dwBindingTableStartOffset = BRC_UPDATE_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_BRC_LCU_UPDATE: |
| hevcEncBindingTable->dwNumBindingTableEntries = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN; |
| hevcEncBindingTable->dwBindingTableStartOffset = BRC_LCU_UPDATE_BEGIN; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| for (uint32_t i = 0; i < hevcEncBindingTable->dwNumBindingTableEntries; i++) |
| { |
| hevcEncBindingTable->dwBindingTableEntries[i] = i; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AllocateEncResources() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // Surfaces used by I & B Kernels |
| uint32_t width = 0, height = 0; |
| uint32_t size = 0; |
| |
| if (!m_useMdf) |
| { |
| // Intermediate CU Record surface |
| if (Mos_ResourceIsNull(&m_intermediateCuRecordSurfaceLcu32.OsResource)) |
| { |
| width = m_widthAlignedLcu32; |
| height = m_heightAlignedLcu32 >> 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_intermediateCuRecordSurfaceLcu32, |
| width, |
| height, |
| "Intermediate CU record surface", |
| MOS_TILE_Y)); |
| } |
| |
| // Scratch Surface for I-kernel |
| if (Mos_ResourceIsNull(&m_scratchSurface.OsResource)) |
| { |
| width = m_widthAlignedLcu32 >> 3; |
| height = m_heightAlignedLcu32 >> 5; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_scratchSurface, |
| width, |
| height, |
| "Scratch surface for I and B Kernels")); |
| } |
| |
| // CU based QP surface |
| if (Mos_ResourceIsNull(&m_16x16QpInputData.OsResource)) |
| { |
| width = MOS_ALIGN_CEIL(m_picWidthInMb, 64); |
| height = MOS_ALIGN_CEIL(m_picHeightInMb, 64); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_16x16QpInputData, |
| width, |
| height, |
| "16x16 QP Data Input surface")); |
| } |
| |
| // Surfaces used by B Kernels |
| // Enc constant table for B LCU32 |
| if (Mos_ResourceIsNull(&m_encConstantTableForB.sResource)) |
| { |
| size = m_encConstantDataLutSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_encConstantTableForB, |
| size, |
| "Enc Constant Table surface For LCU32/LCU64")); |
| } |
| |
| //Debug surface |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++) |
| { |
| if (Mos_ResourceIsNull(&m_debugSurface[i].sResource)) |
| { |
| size = m_debugSurfaceSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_debugSurface[i], |
| size, |
| "Kernel debug surface")); |
| } |
| } |
| } |
| |
| // LCU Level Input Data |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++) |
| { |
| if (Mos_ResourceIsNull(&m_lcuLevelInputDataSurface[i].OsResource)) |
| { |
| width = 16 * ((m_widthAlignedMaxLcu >> 6) << 1); |
| height = ((m_heightAlignedMaxLcu >> 6) << 1); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_lcuLevelInputDataSurface[i], |
| width, |
| height, |
| "Lcu Level Data Input surface")); |
| } |
| } |
| |
| m_brcInputForEncKernelBuffer = nullptr; |
| |
| //Current Picture Y with Reconstructed boundary pixels |
| if (Mos_ResourceIsNull(&m_currPicWithReconBoundaryPix.OsResource)) |
| { |
| width = m_widthAlignedLcu32; |
| height = m_heightAlignedLcu32; |
| |
| if (m_isMaxLcu64) |
| { |
| width = m_widthAlignedMaxLcu; |
| height = m_heightAlignedMaxLcu; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface( |
| &m_currPicWithReconBoundaryPix, |
| width, |
| height * m_alignReconFactor, |
| "Current Picture Y with Reconstructed Boundary Pixels surface")); |
| } |
| |
| // Encoder History Input Surface |
| if (Mos_ResourceIsNull(&m_encoderHistoryInputBuffer.OsResource)) |
| { |
| width = 32 * ((m_widthAlignedMaxLcu >> 6) << 1); |
| height = ((m_heightAlignedMaxLcu >> 6) << 1); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_encoderHistoryInputBuffer, |
| width, |
| height, |
| "Encoder History Input surface")); |
| } |
| |
| // Encoder History Output Surface |
| if (Mos_ResourceIsNull(&m_encoderHistoryOutputBuffer.OsResource)) |
| { |
| width = 32 * ((m_widthAlignedMaxLcu >> 6) << 1); |
| height = ((m_heightAlignedMaxLcu >> 6) << 1); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_encoderHistoryOutputBuffer, |
| width, |
| height, |
| "Encoder History Output surface")); |
| } |
| |
| if (m_hmeSupported && !m_useMdf) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->AllocateResources()); |
| // BRC Distortion surface |
| if (Mos_ResourceIsNull(&m_brcBuffers.sMeBrcDistortionBuffer.OsResource)) |
| { |
| width = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x << 3), 64); |
| height = MOS_ALIGN_CEIL((m_downscaledHeightInMb4x << 2), 8) << 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_brcBuffers.sMeBrcDistortionBuffer, |
| width, |
| height, |
| "Brc Distortion surface Buffer")); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources()); |
| } |
| |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++) |
| { |
| if (Mos_ResourceIsNull(&m_encBCombinedBuffer1[i].sResource)) |
| { |
| size = sizeof(MBENC_COMBINED_BUFFER1); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_encBCombinedBuffer1[i], |
| size, |
| "Enc B combined buffer1")); |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer1[i].sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(data, size); |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer1[i].sResource); |
| } |
| } |
| |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++) |
| { |
| if (Mos_ResourceIsNull(&m_encBCombinedBuffer2[i].sResource)) |
| { |
| uint32_t numLcu64 = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 64 / 64; |
| MBENC_COMBINED_BUFFER2 fixedBuf; |
| |
| m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE); |
| m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE); |
| |
| size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize; |
| |
| m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE); |
| m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_encBCombinedBuffer2[i], |
| size, |
| "Enc B combined buffer2")); |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer2[i].sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(data, size); |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer2[i].sResource); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::FreeEncResources() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_DeleteArray(m_mbEncKernelStates); |
| m_mbEncKernelStates = nullptr; |
| MOS_FreeMemory(m_mbEncKernelBindingTable); |
| m_mbEncKernelBindingTable = nullptr; |
| |
| MOS_DeleteArray(m_brcKernelStates); |
| m_brcKernelStates = nullptr; |
| MOS_FreeMemory(m_brcKernelBindingTable); |
| m_brcKernelBindingTable = nullptr; |
| |
| HmeParams hmeParams; |
| MOS_ZeroMemory(&hmeParams, sizeof(hmeParams)); |
| hmeParams.presMvAndDistortionSumSurface = &m_mvAndDistortionSumSurface.sResource; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(DestroyMEResources(&hmeParams)); |
| |
| // Surfaces used by I kernel |
| // Release Intermediate CU Record Surface |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_intermediateCuRecordSurfaceLcu32.OsResource); |
| |
| // Release Scratch Surface for I-kernel |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_scratchSurface.OsResource); |
| |
| // Release CU based QP surface |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_16x16QpInputData.OsResource); |
| |
| // Release LCU Level Input Data |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_lcuLevelInputDataSurface); i++) |
| { |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_lcuLevelInputDataSurface[i].OsResource); |
| } |
| |
| // Release Current Picture Y with Reconstructed boundary pixels surface |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_currPicWithReconBoundaryPix.OsResource); |
| |
| // Release Encoder History Input Data |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_encoderHistoryInputBuffer.OsResource); |
| |
| // Release Encoder History Output Data |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_encoderHistoryOutputBuffer.OsResource); |
| |
| // Release Debug surface |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++) |
| { |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_debugSurface[i].sResource); |
| } |
| |
| // Surfaces used by B Kernels |
| // Enc constant table for B LCU32 |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_encConstantTableForB.sResource); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(FreeMeResources()); |
| |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer1); i++) |
| { |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_encBCombinedBuffer1[i].sResource); |
| } |
| |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_encBCombinedBuffer2); i++) |
| { |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_encBCombinedBuffer2[i].sResource); |
| } |
| |
| if (m_swScoreboard) |
| { |
| MOS_FreeMemory(m_swScoreboard); |
| m_swScoreboard = nullptr; |
| } |
| |
| if (m_numDelay) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resDelayMinus); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AllocateMeResources() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // Mv and Distortion Summation Surface |
| if (Mos_ResourceIsNull(&m_mvAndDistortionSumSurface.sResource)) |
| { |
| uint32_t size = m_mvdistSummationSurfSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_mvAndDistortionSumSurface, |
| size, |
| "Mv and Distortion Summation surface")); |
| |
| // Initialize the surface to zero for now till HME is updated to output the data into this surface |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_mvAndDistortionSumSurface.sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(data, size); |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_mvAndDistortionSumSurface.sResource); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::FreeMeResources() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_brcBuffers.sMeBrcDistortionBuffer.OsResource); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AllocatePakResources() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| uint32_t mvt_size = MOS_ALIGN_CEIL(((m_frameWidth + 63) >> 6) * ((m_frameHeight + 15) >> 4), 2) * CODECHAL_CACHELINE_SIZE; |
| uint32_t mvtb_size = MOS_ALIGN_CEIL(((m_frameWidth + 31) >> 5) * ((m_frameHeight + 31) >> 5), 2) * CODECHAL_CACHELINE_SIZE; |
| m_sizeOfMvTemporalBuffer = MOS_MAX(mvt_size, mvtb_size); |
| |
| const uint32_t minLcuSize = 16; |
| const uint32_t picWidthInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameWidth, minLcuSize); //assume smallest LCU to get max width |
| const uint32_t picHeightInMinLCU = MOS_ROUNDUP_DIVIDE(m_frameHeight, minLcuSize); //assume smallest LCU to get max height |
| |
| MHW_VDBOX_HCP_BUFFER_SIZE_PARAMS hcpBufSizeParam; |
| MOS_ZeroMemory(&hcpBufSizeParam, sizeof(hcpBufSizeParam)); |
| hcpBufSizeParam.ucMaxBitDepth = m_bitDepth; |
| hcpBufSizeParam.ucChromaFormat = m_chromaFormat; |
| // We should move the buffer allocation to picture level if the size is dependent on LCU size |
| hcpBufSizeParam.dwCtbLog2SizeY = 6; //assume Max LCU size |
| hcpBufSizeParam.dwPicWidth = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE); |
| hcpBufSizeParam.dwPicHeight = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE); |
| |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| |
| // Deblocking Filter Row Store Scratch data surface |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "DeblockingScratchBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDeblockingFilterRowStoreScratchBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| // Deblocking Filter Tile Row Store Scratch data surface |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "DeblockingTileRowScratchBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDeblockingFilterTileRowStoreScratchBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| // Deblocking Filter Column Row Store Scratch data surface |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_DBLK_TILE_COL, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Deblocking Filter Tile Column Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "DeblockingColumnScratchBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDeblockingFilterColumnRowStoreScratchBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Deblocking Filter Tile Column Row Store Scratch Buffer."); |
| return eStatus; |
| } |
| |
| // Metadata Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_META_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "MetadataLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resMetadataLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Line Buffer."); |
| return eStatus; |
| } |
| |
| // Metadata Tile Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "MetadataTileLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resMetadataTileLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| // Metadata Tile Column buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_META_TILE_COL, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for Metadata Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "MetadataTileColumnBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resMetadataTileColumnBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate Metadata Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| // SAO Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "SaoLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Line Buffer."); |
| return eStatus; |
| } |
| |
| // SAO Tile Line buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_LINE, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "SaoTileLineBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoTileLineBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Line Buffer."); |
| return eStatus; |
| } |
| |
| // SAO Tile Column buffer |
| eStatus = (MOS_STATUS)m_hcpInterface->GetHevcBufferSize( |
| MHW_VDBOX_HCP_INTERNAL_BUFFER_SAO_TILE_COL, |
| &hcpBufSizeParam); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to get the size for SAO Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| allocParamsForBufferLinear.dwBytes = hcpBufSizeParam.dwBufferSize; |
| allocParamsForBufferLinear.pBufName = "SaoTileColumnBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoTileColumnBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO Tile Column Buffer."); |
| return eStatus; |
| } |
| |
| // Lcu ILDB StreamOut buffer |
| // Allocate the buffer size |
| // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here |
| allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE; |
| allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resLcuIldbStreamOutBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU ILDB StreamOut Buffer."); |
| return eStatus; |
| } |
| |
| // Lcu Base Address buffer |
| // HEVC Encoder Mode: Slice size is written to this buffer when slice size conformance is enabled. |
| // 1 CL (= 16 DWs = 64 bytes) per slice * Maximum number of slices in a frame. |
| // Align to page for HUC requirement |
| uint32_t maxLcu = picWidthInMinLCU * picHeightInMinLCU; |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(maxLcu * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| allocParamsForBufferLinear.pBufName = "LcuBaseAddressBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resLcuBaseAddressBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate LCU Base Address Buffer."); |
| return eStatus; |
| } |
| |
| // SAO StreamOut buffer |
| // size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * 16 |
| uint32_t size = MOS_ALIGN_CEIL(picWidthInMinLCU, 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU; |
| //extra added size to cover tile enabled case, per tile width aligned to 4. 20: max tile column No. |
| size += 3 * 20 * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU; |
| allocParamsForBufferLinear.dwBytes = size; |
| allocParamsForBufferLinear.pBufName = "SaoStreamOutBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSaoStreamOutBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO StreamOut Buffer."); |
| return eStatus; |
| } |
| |
| uint32_t maxTileNumber = (MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE) * |
| (MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_HEVC_MIN_TILE_SIZE) / CODECHAL_HEVC_MIN_TILE_SIZE); |
| |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| |
| // Allocate Frame Statistics Streamout Data Destination Buffer. DW98-100 in HCP pipe buffer address command |
| allocParamsForBufferLinear.dwBytes = m_sizeOfHcpPakFrameStats * maxTileNumber; //Each tile has 8 cache size bytes of data |
| allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resFrameStatStreamOutBuffer)); |
| |
| // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command |
| // One CU has 16-byte. But, each tile needs to be aliged to the cache line |
| uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE); |
| uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE); |
| size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE); |
| allocParamsForBufferLinear.dwBytes = size; |
| allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resPakcuLevelStreamoutData.sResource)); |
| m_resPakcuLevelStreamoutData.dwSize = size; |
| CODECHAL_ENCODE_VERBOSEMESSAGE("first allocate cu steam out buffer, size=0x%x.\n", size); |
| |
| // Allocate SSE Source Pixel Row Store Buffer. Implementation for each tile column is shown as below: |
| // tileWidthInLCU = ((tileWidthInLCU+3) * BYTES_PER_CACHE_LINE)*(4+4) ; tileWidthInLCU <<= 1; // double the size as RTL treats it as 10 bit data |
| // Here, we consider each LCU column is one tile column. |
| |
| m_sizeOfSseSrcPixelRowStoreBufferPerLcu = (CODECHAL_CACHELINE_SIZE * (4 + 4)) << 1; //size per LCU plus 10-bit |
| size = m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (picWidthInMinLCU + 3); // already aligned to cacheline size |
| allocParamsForBufferLinear.dwBytes = size; |
| allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resSseSrcPixelRowStoreBuffer)); |
| |
| // SAO Row Store buffer, HSAO |
| // Aligned to 4 for each tile column |
| uint32_t maxTileColumn = MOS_ROUNDUP_DIVIDE(m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE); |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(picWidthInMinLCU + 3 * maxTileColumn, 4) * 16; |
| allocParamsForBufferLinear.pBufName = "SaoRowStoreBuffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_SAORowStoreBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SAO row store Buffer."); |
| return eStatus; |
| } |
| |
| //HCP scalability Sync buffer |
| size = CODECHAL_HEVC_MAX_NUM_HCP_PIPE * CODECHAL_CACHELINE_SIZE; |
| allocParamsForBufferLinear.dwBytes = size; |
| allocParamsForBufferLinear.pBufName = "GEN12 Hcp scalability Sync buffer "; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHcpScalabilitySyncBuffer.sResource)); |
| m_resHcpScalabilitySyncBuffer.dwSize = size; |
| |
| // create the tile coding state parameters |
| m_tileParams = (PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12)MOS_AllocAndZeroMemory(sizeof(MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12) * maxTileNumber); |
| |
| if (m_enableHWSemaphore) |
| { |
| // Create the HW sync objects which will be used by each reference frame and BRC in GEN12 |
| allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); |
| allocParamsForBufferLinear.pBufName = "SemaphoreMemory"; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++) |
| { |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resBrcSemaphoreMem[i].sResource); |
| m_resBrcSemaphoreMem[i].dwSize = allocParamsForBufferLinear.dwBytes; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create BRC HW Semaphore Memory."); |
| |
| uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resBrcSemaphoreMem[i].sResource, |
| &lockFlagsWriteOnly); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| *data = 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_resBrcSemaphoreMem[i].sResource)); |
| } |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resPipeStartSemaMem); |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe start sync HW semaphore."); |
| |
| uint32_t *data = (uint32_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resPipeStartSemaMem, |
| &lockFlagsWriteOnly); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| *data = 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_resPipeStartSemaMem)); |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resPipeCompleteSemaMem); |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Cannot create Scalability pipe completion sync HW semaphore."); |
| |
| data = (uint32_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resPipeCompleteSemaMem, |
| &lockFlagsWriteOnly); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| *data = 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_resPipeCompleteSemaMem)); |
| } |
| |
| if (m_hucPakStitchEnabled) |
| { |
| if (Mos_ResourceIsNull(&m_resHucStatus2Buffer)) |
| { |
| // HUC STATUS 2 Buffer for HuC status check in COND_BB_END |
| allocParamsForBufferLinear.dwBytes = sizeof(uint64_t); |
| allocParamsForBufferLinear.pBufName = "HUC STATUS 2 Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN( |
| m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHucStatus2Buffer), |
| "%s: Failed to allocate HUC STATUS 2 Buffer\n", |
| __FUNCTION__); |
| } |
| |
| uint8_t *data; |
| |
| // Pak stitch DMEM |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE); |
| allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer"; |
| auto numOfPasses = CODECHAL_DP_MAX_NUM_BRC_PASSES; |
| for (auto j = 0; j < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; j++) |
| { |
| for (auto i = 0; i < numOfPasses; i++) |
| { |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHucPakStitchDmemBuffer[j][i]); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate PAK Stitch Dmem Buffer."); |
| return eStatus; |
| } |
| } |
| } |
| // BRC Data Buffer |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); |
| allocParamsForBufferLinear.pBufName = "BRC Data Buffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resBrcDataBuffer); |
| |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate BRC Data Buffer Buffer."); |
| return eStatus; |
| } |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| |
| data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resBrcDataBuffer, |
| &lockFlags); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory( |
| data, |
| allocParamsForBufferLinear.dwBytes); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resBrcDataBuffer); |
| for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) |
| { |
| for (auto j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++) |
| { |
| // HuC stitching Data buffer |
| allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE); |
| allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer"; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHucStitchDataBuffer[i][j])); |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resHucStitchDataBuffer[i][j], |
| &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(pData); |
| MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]); |
| } |
| } |
| |
| //Second level BB for huc stitching cmd |
| MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer)); |
| m_HucStitchCmdBatchBuffer.bSecondLevel = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( |
| m_osInterface, |
| &m_HucStitchCmdBatchBuffer, |
| nullptr, |
| m_hwInterface->m_HucStitchCmdBatchBufferSize)); |
| } |
| |
| // Pak obj and CU records for skip frame |
| uint32_t mbCodeSize = m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE; // Must reserve at least 8 cachelines after MI_BATCH_BUFFER_END_CMD since HW prefetch max 8 cachelines from BB everytime |
| |
| MOS_ALLOC_GFXRES_PARAMS allocParams; |
| MOS_ZeroMemory(&allocParams, sizeof(allocParams)); |
| allocParams.Type = MOS_GFXRES_BUFFER; |
| allocParams.Format = Format_Buffer; |
| allocParams.TileType = MOS_TILE_LINEAR; |
| allocParams.dwBytes = mbCodeSize; |
| allocParams.pBufName = "skipFrameMbCodeSurface"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParams, |
| &m_skipFrameInfo.m_resMbCodeSkipFrameSurface); |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate PAK object buffer for skip frame"); |
| return eStatus; |
| } |
| |
| if (m_numDelay) |
| { |
| allocParamsForBufferLinear.dwBytes = sizeof(uint32_t); |
| allocParamsForBufferLinear.pBufName = "DelayMinusMemory"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resDelayMinus), |
| "Failed to allocate delay minus memory."); |
| |
| uint8_t * data; |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resDelayMinus, |
| &lockFlags); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(data, sizeof(uint32_t)); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resDelayMinus); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::FreePakResources() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // Release Frame Statistics Streamout Data Destination Buffer |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resFrameStatStreamOutBuffer); |
| |
| // PAK CU Level Stream out buffer |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resPakcuLevelStreamoutData.sResource); |
| |
| // Release SSE Source Pixel Row Store Buffer |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resSseSrcPixelRowStoreBuffer); |
| |
| // Release Hcp scalability Sync buffer |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resHcpScalabilitySyncBuffer.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resPakcuLevelStreamoutData.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resPakSliceLevelStreamoutData.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_SAORowStoreBuffer); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_skipFrameInfo.m_resMbCodeSkipFrameSurface); |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resTileBasedStatisticsBuffer); i++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[i].sResource); |
| } |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_tileRecordBuffer); i++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[i].sResource); |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource); |
| |
| MOS_FreeMemory(m_tileParams); |
| |
| if (m_useVirtualEngine) |
| { |
| for (uint32_t i = 0; i < CODECHAL_NUM_UNCOMPRESSED_SURFACE_HEVC; i++) |
| { |
| for (uint32_t j = 0; j < CODECHAL_HEVC_MAX_NUM_HCP_PIPE; j++) |
| { |
| for (auto k = 0; k < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; k++) |
| { |
| PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[i][j][k]; |
| if (cmdBuffer->pCmdBase) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource); |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &cmdBuffer->OsResource); |
| } |
| } |
| } |
| } |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_refSync); i++) |
| { |
| auto sync = &m_refSync[i]; |
| |
| if (!Mos_ResourceIsNull(&sync->resSyncObject)) |
| { |
| // if this object has been signaled before, we need to wait to ensure singal-wait is in pair. |
| if (sync->uiSemaphoreObjCount || sync->bInUsed) |
| { |
| MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_renderContext; |
| syncParams.presSyncResource = &sync->resSyncObject; |
| syncParams.uiSemaphoreCount = sync->uiSemaphoreObjCount; |
| m_osInterface->pfnEngineWait(m_osInterface, &syncParams); |
| } |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &sync->resSemaphoreMem.sResource); |
| } |
| |
| for (auto i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_resBrcSemaphoreMem); i++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcSemaphoreMem[i].sResource); |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeStartSemaMem); |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resPipeCompleteSemaMem); |
| |
| if (m_hucPakStitchEnabled) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStatus2Buffer); |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resBrcDataBuffer); |
| |
| for (int i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) |
| { |
| for (int j = 0; j < CODECHAL_HEVC_MAX_NUM_BRC_PASSES; j++) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHucPakStitchDmemBuffer[i][j]); |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resHucStitchDataBuffer[i][j]); |
| } |
| } |
| Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr); |
| } |
| return CodechalEncHevcState::FreePakResources(); |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GetKernelHeaderAndSize( |
| void * binary, |
| EncOperation operation, |
| uint32_t krnStateIdx, |
| void * krnHeader, |
| uint32_t * krnSize) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(binary); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize); |
| |
| PCODECHAL_HEVC_KERNEL_HEADER kernelHeaderTable = (PCODECHAL_HEVC_KERNEL_HEADER)binary; |
| |
| PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr; |
| switch (operation) |
| { |
| case ENC_MBENC: |
| { |
| switch (krnStateIdx) |
| { |
| case MBENC_LCU32_KRNIDX: |
| currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU32; |
| break; |
| |
| case MBENC_LCU64_KRNIDX: |
| currKrnHeader = &kernelHeaderTable->HEVC_Enc_LCU64; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| break; |
| |
| case ENC_BRC: |
| { |
| switch (krnStateIdx) |
| { |
| case CODECHAL_HEVC_BRC_INIT: |
| currKrnHeader = &kernelHeaderTable->HEVC_brc_init; |
| break; |
| |
| case CODECHAL_HEVC_BRC_RESET: |
| currKrnHeader = &kernelHeaderTable->HEVC_brc_reset; |
| break; |
| |
| case CODECHAL_HEVC_BRC_FRAME_UPDATE: |
| currKrnHeader = &kernelHeaderTable->HEVC_brc_update; |
| break; |
| |
| case CODECHAL_HEVC_BRC_LCU_UPDATE: |
| currKrnHeader = &kernelHeaderTable->HEVC_brc_lcuqp; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested, krnStateIdx=%d", krnStateIdx); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| break; |
| } |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader; |
| |
| PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1); |
| PCODECHAL_KERNEL_HEADER invalidEntry = &(kernelHeaderTable->HEVC_brc_lcuqp) + 1; |
| uint32_t nextKrnOffset = *krnSize; |
| if (nextKrnHeader < invalidEntry) |
| { |
| nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT; |
| } |
| *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::InitKernelStateMbEnc() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface; |
| m_numMbEncEncKrnStates = MBENC_NUM_KRN; |
| |
| m_mbEncKernelStates = |
| MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates); |
| |
| m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory( |
| sizeof(GenericBindingTable) * m_numMbEncEncKrnStates); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable); |
| |
| PMHW_KERNEL_STATE kernelStatePtr = m_mbEncKernelStates; |
| |
| for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++) |
| { |
| auto kernelSize = m_combinedKernelSize; |
| CODECHAL_KERNEL_HEADER currKrnHeader; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize( |
| m_kernelBinary, |
| ENC_MBENC, |
| krnStateIdx, |
| &currKrnHeader, |
| &kernelSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams( |
| ENC_MBENC, |
| &kernelStatePtr->KernelParams, |
| krnStateIdx)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable( |
| ENC_MBENC, |
| &m_mbEncKernelBindingTable[krnStateIdx], |
| krnStateIdx)); |
| |
| kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| kernelStatePtr->KernelParams.pBinary = |
| m_kernelBinary + |
| (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| kernelStatePtr->KernelParams.iSize = kernelSize; |
| kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| kernelStatePtr->KernelParams.iSize = kernelSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested( |
| stateHeapInterface, |
| kernelStatePtr->KernelParams.iBTCount, |
| &kernelStatePtr->dwSshSize, |
| &kernelStatePtr->dwBindingTableSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr)); |
| |
| kernelStatePtr++; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::InitKernelStateBrc() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PMHW_STATE_HEAP_INTERFACE stateHeapInterface = m_hwInterface->GetRenderInterface()->m_stateHeapInterface; |
| m_numBrcKrnStates = CODECHAL_HEVC_BRC_NUM; |
| |
| m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates); |
| |
| m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory( |
| sizeof(GenericBindingTable) * m_numBrcKrnStates); |
| |
| PMHW_KERNEL_STATE kernelStatePtr = m_brcKernelStates; |
| |
| kernelStatePtr++; // Skipping BRC_COARSE_INTRA as it not in Gen11 |
| |
| // KrnStateIdx initialization starts at 1 as Gen11 does not support BRC_COARSE_INTRA kernel in BRC. It is part of the Combined Common Kernel |
| for (uint32_t krnStateIdx = 1; krnStateIdx < m_numBrcKrnStates; krnStateIdx++) |
| { |
| auto kernelSize = m_combinedKernelSize; |
| CODECHAL_KERNEL_HEADER currKrnHeader; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize( |
| m_kernelBinary, |
| ENC_BRC, |
| krnStateIdx, |
| &currKrnHeader, |
| (uint32_t *)&kernelSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams( |
| ENC_BRC, |
| &kernelStatePtr->KernelParams, |
| krnStateIdx)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable( |
| ENC_BRC, |
| &m_brcKernelBindingTable[krnStateIdx], |
| krnStateIdx)); |
| |
| kernelStatePtr->dwCurbeOffset = stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| kernelStatePtr->KernelParams.iSize = kernelSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(stateHeapInterface->pfnCalculateSshAndBtSizesRequested( |
| stateHeapInterface, |
| kernelStatePtr->KernelParams.iBTCount, |
| &kernelStatePtr->dwSshSize, |
| &kernelStatePtr->dwBindingTableSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(stateHeapInterface, kernelStatePtr)); |
| |
| kernelStatePtr++; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GetFrameBrcLevel() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| //if L0/L1 both points to previous frame, then its LBD otherwise its is level 1 RA B. |
| auto B_or_LDB_brclevel = m_lowDelay ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B; |
| std::map<int, HEVC_BRC_FRAME_TYPE> codingtype_to_brclevel{ |
| {I_TYPE, HEVC_BRC_FRAME_TYPE_I}, |
| {P_TYPE, HEVC_BRC_FRAME_TYPE_P_OR_LB}, |
| {B_TYPE, B_or_LDB_brclevel}, |
| {B1_TYPE, HEVC_BRC_FRAME_TYPE_B1}, |
| {B2_TYPE, HEVC_BRC_FRAME_TYPE_B2}}; |
| |
| //Both I or P/LDB type at same HierarchLevelPlus1 |
| auto intra_LDBFrame_to_Brclevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : HEVC_BRC_FRAME_TYPE_P_OR_LB; |
| std::map<int, HEVC_BRC_FRAME_TYPE> hierchLevelPlus1_to_brclevel{ |
| {1, intra_LDBFrame_to_Brclevel}, |
| {2, HEVC_BRC_FRAME_TYPE_B}, |
| {3, HEVC_BRC_FRAME_TYPE_B1}, |
| {4, HEVC_BRC_FRAME_TYPE_B2}}; |
| |
| if (m_hevcSeqParams->HierarchicalFlag && m_hevcSeqParams->GopRefDist > 1 && m_hevcSeqParams->GopRefDist <= 8) |
| { |
| if (m_hevcPicParams->HierarchLevelPlus1 > 0) // LDB or RAB |
| { |
| m_currFrameBrcLevel = hierchLevelPlus1_to_brclevel.count(m_hevcPicParams->HierarchLevelPlus1) ? hierchLevelPlus1_to_brclevel[m_hevcPicParams->HierarchLevelPlus1] : HEVC_BRC_FRAME_TYPE_INVALID; |
| //Invalid HierarchLevelPlus1 or LBD frames at level 3 eror check. |
| if ((m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_INVALID) || |
| (m_hevcSeqParams->LowDelayMode && m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_B2)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("HEVC_BRC_FRAME_TYPE_INVALID or LBD picture doesn't support Level 4\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| else |
| { |
| if (!m_hevcSeqParams->LowDelayMode) // RA B |
| { |
| m_currFrameBrcLevel = codingtype_to_brclevel.count(m_pictureCodingType) ? codingtype_to_brclevel[m_pictureCodingType] : HEVC_BRC_FRAME_TYPE_INVALID; |
| //Invalid CodingType. |
| if (m_currFrameBrcLevel == HEVC_BRC_FRAME_TYPE_INVALID) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid CodingType\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| else // Low Delay mode: Flat case |
| { |
| m_currFrameBrcLevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : HEVC_BRC_FRAME_TYPE_P_OR_LB; |
| } |
| } |
| } |
| else // Flat B |
| { |
| m_currFrameBrcLevel = (m_pictureCodingType == I_TYPE) ? HEVC_BRC_FRAME_TYPE_I : B_or_LDB_brclevel; |
| } |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| uint32_t CodechalEncHevcStateG12::GetMaxBtCount() |
| { |
| uint16_t btIdxAlignment = m_hwInterface->GetRenderInterface()->m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment(); |
| |
| // BRC Init kernel |
| uint32_t btCountPhase1 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount, btIdxAlignment); |
| |
| // SwScoreboard kernel |
| uint32_t btCountPhase2 = MOS_ALIGN_CEIL(m_swScoreboardState->GetBTCount(), btIdxAlignment); |
| |
| // Csc+Ds+Conversion kernel |
| btCountPhase2 += MOS_ALIGN_CEIL(m_cscDsState->GetBTCount(), btIdxAlignment); |
| |
| // Intra Distortion kernel |
| if (m_intraDistKernel) |
| { |
| btCountPhase2 += MOS_ALIGN_CEIL(m_intraDistKernel->GetBTCount(), btIdxAlignment); |
| } |
| |
| // HME 4x, 16x, 32x kernel |
| if (m_hmeKernel) |
| { |
| btCountPhase2 += (MOS_ALIGN_CEIL(m_hmeKernel->GetBTCount(), btIdxAlignment) * 3); |
| } |
| |
| // Weighted prediction kernel |
| btCountPhase2 += MOS_ALIGN_CEIL(m_wpState->GetBTCount(), btIdxAlignment); |
| uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) + |
| MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) + |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU32_KRNIDX].KernelParams.iBTCount, btIdxAlignment); |
| |
| uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) + |
| MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) + |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_LCU64_KRNIDX].KernelParams.iBTCount, btIdxAlignment); |
| |
| uint32_t maxBtCount = MOS_MAX(btCountPhase1, btCountPhase2); |
| maxBtCount = MOS_MAX(maxBtCount, btCountPhase3); |
| maxBtCount = MOS_MAX(maxBtCount, btCountPhase4); |
| |
| return maxBtCount; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::CalcScaledDimensions() |
| { |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| void CodechalEncHevcStateG12::GetMaxRefFrames(uint8_t &maxNumRef0, uint8_t &maxNumRef1) |
| { |
| maxNumRef0 = m_maxNumVmeL0Ref; |
| maxNumRef1 = m_maxNumVmeL1Ref; |
| |
| return; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GetStatusReport( |
| EncodeStatus * encodeStatus, |
| EncodeStatusReport *encodeStatusReport) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatus); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(encodeStatusReport); |
| |
| if (encodeStatusReport->UsedVdBoxNumber <= 1) |
| { |
| return CodechalEncodeHevcBase::GetStatusReport(encodeStatus, encodeStatusReport); |
| } |
| |
| PCODECHAL_ENCODE_BUFFER tileSizeStatusReport = &m_tileRecordBuffer[encodeStatusReport->CurrOriginalPic.FrameIdx]; |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| HCPPakHWTileSizeRecord_G12 *tileStatusReport = (HCPPakHWTileSizeRecord_G12 *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &tileSizeStatusReport->sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tileStatusReport); |
| |
| encodeStatusReport->CodecStatus = CODECHAL_STATUS_SUCCESSFUL; |
| encodeStatusReport->PanicMode = false; |
| encodeStatusReport->AverageQp = 0; |
| encodeStatusReport->QpY = 0; |
| encodeStatusReport->SuggestedQpYDelta = 0; |
| encodeStatusReport->NumberPasses = 1; |
| encodeStatusReport->bitstreamSize = 0; |
| encodeStatus->ImageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQp = 0; |
| |
| uint32_t totalCU = 0; |
| double sumQp = 0.0; |
| for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++) |
| { |
| if (tileStatusReport[i].Length == 0) |
| { |
| encodeStatusReport->CodecStatus = CODECHAL_STATUS_INCOMPLETE; |
| return eStatus; |
| } |
| |
| encodeStatusReport->bitstreamSize += tileStatusReport[i].Length; |
| totalCU += (m_tileParams[i].TileHeightInMinCbMinus1 + 1) * (m_tileParams[i].TileWidthInMinCbMinus1 + 1); |
| sumQp += tileStatusReport[i].Hcp_Qp_Status_Count; |
| } |
| |
| encodeStatusReport->NumberPasses = (uint8_t)encodeStatus->dwNumberPasses + 1; |
| CODECHAL_ENCODE_VERBOSEMESSAGE("BRC Scalability Mode Exectued PAK Pass number: %d.\n", encodeStatusReport->NumberPasses); |
| |
| if (encodeStatusReport->bitstreamSize == 0 || |
| encodeStatusReport->bitstreamSize > m_bitstreamUpperBound) |
| { |
| encodeStatusReport->CodecStatus = CODECHAL_STATUS_ERROR; |
| encodeStatusReport->bitstreamSize = 0; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Bit-stream size exceeds upper bound!"); |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| |
| if (m_sseEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CalculatePSNR(encodeStatus, encodeStatusReport)); |
| } |
| |
| encodeStatusReport->QpY = encodeStatusReport->AverageQp = |
| (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU |
| |
| if (m_enableTileStitchByHW) |
| { |
| return eStatus; |
| } |
| |
| uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr; |
| tempBsBuffer = bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(encodeStatusReport->bitstreamSize); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(tempBsBuffer); |
| |
| CODEC_REF_LIST currRefList = *(encodeStatus->encodeStatusReport.pCurrRefList); |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.ReadOnly = 1; |
| uint8_t *bitstream = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &currRefList.resBitstreamBuffer, |
| &lockFlags); |
| if (bitstream == nullptr) |
| { |
| MOS_SafeFreeMemory(tempBsBuffer); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr); |
| } |
| |
| for (uint32_t i = 0; i < encodeStatusReport->NumberTilesInFrame; i++) |
| { |
| uint32_t offset = m_tileParams[i].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE; |
| uint32_t len = tileStatusReport[i].Length; |
| |
| MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len); |
| bufPtr += len; |
| } |
| |
| MOS_SecureMemcpy(bitstream, encodeStatusReport->bitstreamSize, tempBsBuffer, encodeStatusReport->bitstreamSize); |
| MOS_ZeroMemory(&bitstream[encodeStatusReport->bitstreamSize], |
| m_bitstreamUpperBound - encodeStatusReport->bitstreamSize); |
| |
| if (tempBsBuffer) |
| { |
| MOS_FreeMemory(tempBsBuffer); |
| } |
| |
| if (m_osInterface && bitstream) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &currRefList.resBitstreamBuffer); |
| } |
| |
| if (m_osInterface && tileStatusReport) |
| { |
| // clean-up the tile status report buffer |
| MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * encodeStatusReport->NumberTilesInFrame); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &tileSizeStatusReport->sResource); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AllocateResourcesVariableSize() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| return eStatus; |
| } |
| |
| uint32_t bufSize = 0; |
| if (m_pakPiplStrmOutEnable) |
| { |
| // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command |
| // One CU has 16-byte. But, each tile needs to be aliged to the cache line |
| uint32_t tileWidthInCus = 0; |
| uint32_t tileHeightInCus = 0; |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++) |
| { |
| for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++) |
| { |
| uint32_t idx = tileRow * numTileColumns + tileCol; |
| |
| tileHeightInCus = m_tileParams[idx].TileHeightInMinCbMinus1 + 1; |
| tileWidthInCus = m_tileParams[idx].TileWidthInMinCbMinus1 + 1; |
| bufSize += (tileWidthInCus * tileHeightInCus * 16); |
| bufSize = MOS_ALIGN_CEIL(bufSize, CODECHAL_CACHELINE_SIZE); |
| } |
| } |
| if (Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) || |
| (bufSize > m_resPakcuLevelStreamoutData.dwSize)) |
| { |
| if (!Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource)) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resPakcuLevelStreamoutData.sResource); |
| } |
| |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = bufSize; |
| allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN((MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resPakcuLevelStreamoutData.sResource)); |
| m_resPakcuLevelStreamoutData.dwSize = bufSize; |
| CODECHAL_ENCODE_VERBOSEMESSAGE("reallocate cu steam out buffer, size=0x%x.\n", bufSize); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ExecutePictureLevel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| m_firstTaskInPhase = m_singleTaskPhaseSupported ? IsFirstPass() : true; |
| m_lastTaskInPhase = m_singleTaskPhaseSupported ? IsLastPass() : true; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifyCommandBufferSize()); |
| |
| if (!m_singleTaskPhaseSupportedInPak) |
| { |
| // Command buffer or patch list size are too small and so we cannot submit multiple pass of PAKs together |
| m_firstTaskInPhase = true; |
| m_lastTaskInPhase = true; |
| } |
| |
| if (m_vdboxIndex > m_mfxInterface->GetMaxVdboxIndex()) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| if ((!m_singleTaskPhaseSupported) || m_firstTaskInPhase) |
| { |
| // Send command buffer header at the beginning (OS dependent) |
| // frame tracking tag is only added in the last command buffer header |
| bool bRequestFrameTracking = m_singleTaskPhaseSupported ? m_firstTaskInPhase : m_lastTaskInPhase; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendPrologWithFrameTracking(&cmdBuffer, bRequestFrameTracking)); |
| } |
| |
| // clean-up per VDBOX semaphore memory |
| int32_t currentPipe = GetCurrentPipe(); |
| if (currentPipe < 0) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (m_numPipe >= 2 && |
| ((m_singleTaskPhaseSupported && IsFirstPass()) || |
| !m_singleTaskPhaseSupported)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStopCmd(&cmdBuffer)); |
| //HW Semaphore cmd to make sure all pipes start encode at the same time |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand( |
| &m_resPipeStartSemaMem, |
| &cmdBuffer, |
| m_numPipe)); |
| |
| // Program some placeholder cmds to resolve the hazard between BEs sync |
| MHW_MI_STORE_DATA_PARAMS dataParams; |
| dataParams.pOsResource = &m_resDelayMinus; |
| dataParams.dwResourceOffset = 0; |
| dataParams.dwValue = 0xDE1A; |
| for (uint32_t i = 0; i < m_numDelay; i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &dataParams)); |
| } |
| |
| //clean HW semaphore memory |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeStartSemaMem, 1, MHW_MI_ATOMIC_DEC, &cmdBuffer)); |
| |
| //Start Watchdog Timer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddWatchdogTimerStartCmd(&cmdBuffer)); |
| |
| //To help test media reset, this hw semaphore wait will never be reached. |
| if (m_enableTestMediaReset) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand( |
| &m_resPipeStartSemaMem, |
| &cmdBuffer, |
| m_numPipe + 2)); |
| } |
| } |
| |
| if (m_brcEnabled && !IsFirstPass()) // Only the regular BRC passes have the conditional batch buffer end |
| { |
| // Ensure the previous PAK BRC pass is done, mainly for pipes other than pipe0. |
| if (m_singleTaskPhaseSupported && m_numPipe >= 2 && |
| !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| SendHWWaitCommand( |
| &m_resBrcSemaphoreMem[currentPipe].sResource, |
| &cmdBuffer, |
| 1)); |
| } |
| |
| // Insert conditional batch buffer end |
| MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS miConditionalBatchBufferEndParams; |
| MOS_ZeroMemory( |
| &miConditionalBatchBufferEndParams, |
| sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS)); |
| uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + |
| sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource ; |
| |
| if (m_hucPakStitchEnabled && m_numPipe >= 2) //BRC scalability |
| { |
| CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned |
| CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwHuCStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwHuCStatusRegOffset); |
| |
| miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer; |
| miConditionalBatchBufferEndParams.dwOffset = baseOffset + m_encodeStatusBuf.dwHuCStatusMaskOffset; |
| } |
| else |
| { |
| CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset & 7) == 0); // Make sure uint64_t aligned |
| CODECHAL_ENCODE_ASSERT((m_encodeStatusBuf.dwImageStatusMaskOffset + sizeof(uint32_t)) == m_encodeStatusBuf.dwImageStatusCtrlOffset); |
| |
| miConditionalBatchBufferEndParams.presSemaphoreBuffer = &m_encodeStatusBuf.resStatusBuffer; |
| miConditionalBatchBufferEndParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusMaskOffset; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiConditionalBatchBufferEndCmd( |
| &cmdBuffer, |
| &miConditionalBatchBufferEndParams)); |
| |
| auto mmioRegisters = m_hcpInterface->GetMmioRegisters(m_vdboxIndex); |
| MHW_MI_STORE_REGISTER_MEM_PARAMS miStoreRegMemParams; |
| MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams; |
| if (m_hucPakStitchEnabled && m_numPipe >= 2) |
| { |
| // Write back the HCP image control register with HUC PAK Int Kernel output |
| MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams; |
| MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams)); |
| miLoadRegMemParams.presStoreBuffer = &m_resBrcDataBuffer; |
| miLoadRegMemParams.dwOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl); |
| miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams)); |
| |
| if (IsFirstPipe()) |
| { |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams)); |
| miCpyMemMemParams.presSrc = &m_resBrcDataBuffer; |
| miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl); |
| miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite]; |
| miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(&cmdBuffer, &miCpyMemMemParams)); |
| |
| MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); |
| miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer; |
| miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset; |
| miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams)); |
| } |
| } |
| else |
| { |
| // Write back the HCP image control register for RC6 may clean it out |
| MHW_MI_LOAD_REGISTER_MEM_PARAMS miLoadRegMemParams; |
| MOS_ZeroMemory(&miLoadRegMemParams, sizeof(miLoadRegMemParams)); |
| miLoadRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer; |
| miLoadRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOffset; |
| miLoadRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiLoadRegisterMemCmd(&cmdBuffer, &miLoadRegMemParams)); |
| |
| MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); |
| miStoreRegMemParams.presStoreBuffer = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite]; |
| miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS); |
| miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams)); |
| |
| MOS_ZeroMemory(&miStoreRegMemParams, sizeof(miStoreRegMemParams)); |
| miStoreRegMemParams.presStoreBuffer = &m_encodeStatusBuf.resStatusBuffer; |
| miStoreRegMemParams.dwOffset = baseOffset + m_encodeStatusBuf.dwImageStatusCtrlOfLastBRCPassOffset; |
| miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(&cmdBuffer, &miStoreRegMemParams)); |
| } |
| } |
| |
| if (IsFirstPipe() && IsFirstPass() && m_osInterface->bTagResourceSync) |
| { |
| // This is a short term solution to solve the sync tag issue: the sync tag write for PAK is inserted at the end of 2nd pass PAK BB |
| // which may be skipped in multi-pass PAK enabled case. The idea here is to insert the previous frame's tag at the beginning |
| // of the BB and keep the current frame's tag at the end of the BB. There will be a delay for tag update but it should be fine |
| // as long as Dec/VP/Enc won't depend on this PAK so soon. |
| |
| MOS_RESOURCE globalGpuContextSyncTagBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource( |
| m_osInterface, |
| &globalGpuContextSyncTagBuffer)); |
| |
| MHW_MI_STORE_DATA_PARAMS params; |
| params.pOsResource = &globalGpuContextSyncTagBuffer; |
| params.dwResourceOffset = m_osInterface->pfnGetGpuStatusTagOffset(m_osInterface, m_osInterface->CurrentGpuContextOrdinal); |
| uint32_t value = m_osInterface->pfnGetGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal); |
| params.dwValue = (value > 0) ? (value - 1) : 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(&cmdBuffer, ¶ms)); |
| } |
| |
| if (IsFirstPipe()) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(StartStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES)); |
| } |
| |
| if (m_numPipe >= 2) |
| { |
| // clean up hw semaphore for BRC PAK pass sync, used only in single task phase. |
| if (m_singleTaskPhaseSupported && |
| m_brcEnabled && |
| !Mos_ResourceIsNull(&m_resBrcSemaphoreMem[currentPipe].sResource)) |
| { |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resBrcSemaphoreMem[currentPipe].sResource; |
| storeDataParams.dwResourceOffset = 0; |
| storeDataParams.dwValue = 0; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &storeDataParams)); |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeModeSelectCmd(&cmdBuffer)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpSurfaceStateCmds(&cmdBuffer)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPipeBufAddrCmd(&cmdBuffer)); |
| |
| MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS indObjBaseAddrParams; |
| SetHcpIndObjBaseAddrParams(indObjBaseAddrParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpIndObjBaseAddrCmd(&cmdBuffer, &indObjBaseAddrParams)); |
| |
| MHW_VDBOX_QM_PARAMS fqmParams, qmParams; |
| SetHcpQmStateParams(fqmParams, qmParams); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpFqmStateCmd(&cmdBuffer, &fqmParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpQmStateCmd(&cmdBuffer, &qmParams)); |
| |
| if (m_brcEnabled) |
| { |
| uint32_t picStateCmdOffset; |
| if (m_hucPakStitchEnabled && m_numPipe >= 2) |
| { |
| //for non fist PAK pass, always use the 2nd HCP PIC STATE cmd buffer |
| picStateCmdOffset = IsFirstPass() ? 0 : 1; |
| } |
| else |
| { |
| picStateCmdOffset = GetCurrentPass(); |
| } |
| |
| MOS_RESOURCE &brcHcpStateWriteBuffer = m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]; |
| if (IsPanicModePass()) |
| { |
| // BRC kernel supports only 4 BrcImageStates read/write buffers. |
| // So for panic PAK pass use HCP_PIC_STATE command from previous PAK pass. |
| picStateCmdOffset -= 1; |
| } |
| |
| MHW_BATCH_BUFFER batchBuffer; |
| MOS_ZeroMemory(&batchBuffer, sizeof(batchBuffer)); |
| batchBuffer.OsResource = brcHcpStateWriteBuffer; |
| batchBuffer.dwOffset = picStateCmdOffset * BRC_IMG_STATE_SIZE_PER_PASS_G12; |
| batchBuffer.bSecondLevel = true; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd( |
| &cmdBuffer, |
| &batchBuffer)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AddHcpPictureStateCmd(&cmdBuffer)); |
| } |
| |
| // Send HEVC_VP9_RDOQ_STATE command |
| if (m_hevcRdoqEnabled) |
| { |
| MHW_VDBOX_HEVC_PIC_STATE picStateParams; |
| SetHcpPicStateParams(picStateParams); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcVp9RdoqStateCmd(&cmdBuffer, &picStateParams)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| return eStatus; |
| } |
| |
| void CodechalEncHevcStateG12::SetHcpSliceStateCommonParams( |
| MHW_VDBOX_HEVC_SLICE_STATE &sliceState) |
| { |
| CodechalEncHevcState::SetHcpSliceStateCommonParams(sliceState); |
| |
| sliceState.RoundingIntra = m_roundingIntraInUse; |
| sliceState.RoundingInter = m_roundingInterInUse; |
| |
| if ((m_hevcSliceParams->slice_type == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) || |
| (m_hevcSliceParams->slice_type == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag)) |
| { |
| sliceState.bWeightedPredInUse = true; |
| } |
| else |
| { |
| sliceState.bWeightedPredInUse = false; |
| } |
| |
| static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).dwNumPipe = m_numPipe; |
| |
| sliceState.presDataBuffer = IsPanicModePass() ? &m_skipFrameInfo.m_resMbCodeSkipFrameSurface : &m_resMbCodeSurface; |
| } |
| |
| void CodechalEncHevcStateG12::SetHcpSliceStateParams( |
| MHW_VDBOX_HEVC_SLICE_STATE & sliceState, |
| PCODEC_ENCODER_SLCDATA slcData, |
| uint16_t slcCount, |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 tileCodingParams, |
| bool lastSliceInTile, |
| uint32_t idx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| sliceState.pEncodeHevcSliceParams = &m_hevcSliceParams[slcCount]; |
| sliceState.dwDataBufferOffset = slcData[slcCount].CmdOffset; |
| sliceState.dwOffset = slcData[slcCount].SliceOffset; |
| sliceState.dwLength = slcData[slcCount].BitSize; |
| sliceState.uiSkipEmulationCheckCount = slcData[slcCount].SkipEmulationByteCount; |
| sliceState.dwSliceIndex = (uint32_t)slcCount; |
| sliceState.bLastSlice = (slcCount == m_numSlices - 1); |
| sliceState.bLastSliceInTile = lastSliceInTile; |
| sliceState.bLastSliceInTileColumn = (bool)lastSliceInTile & tileCodingParams[idx].IsLastTileofColumn; |
| sliceState.bFirstPass = IsFirstPass(); |
| sliceState.bLastPass = IsLastPass(); |
| sliceState.bInsertBeforeSliceHeaders = (slcCount == 0); |
| sliceState.bSaoLumaFlag = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_luma_flag : 0; |
| sliceState.bSaoChromaFlag = (m_hevcSeqParams->SAO_enabled_flag) ? m_hevcSliceParams[slcCount].slice_sao_chroma_flag : 0; |
| static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).pTileCodingParams = tileCodingParams + idx; |
| static_cast<MHW_VDBOX_HEVC_SLICE_STATE_G12 &>(sliceState).dwTileID = idx; |
| |
| sliceState.DeblockingFilterDisable = m_hevcSliceParams[slcCount].slice_deblocking_filter_disable_flag; |
| sliceState.TcOffsetDiv2 = m_hevcSliceParams[slcCount].tc_offset_div2; |
| sliceState.BetaOffsetDiv2 = m_hevcSliceParams[slcCount].beta_offset_div2; |
| |
| CalcTransformSkipParameters(sliceState.EncodeHevcTransformSkipParams); |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetMfxVideoCopyCmdParams( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface()); |
| MhwCpInterface *cpInterface = m_hwInterface->GetCpInterface(); |
| |
| uint32_t index = m_virtualEngineBbIndex; |
| |
| MHW_CP_COPY_PARAMS cpCopyParams; |
| MOS_ZeroMemory(&cpCopyParams, sizeof(cpCopyParams)); |
| |
| cpCopyParams.size = m_hwInterface->m_tileRecordSize; |
| cpCopyParams.presSrc = &m_tileRecordBuffer[index].sResource; |
| cpCopyParams.presDst = &m_resBitstreamBuffer; |
| cpCopyParams.lengthOfTable = (uint8_t)(m_numTiles); |
| cpCopyParams.isEncodeInUse = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(cpInterface->SetCpCopy(m_osInterface, cmdBuffer, &cpCopyParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ExecuteSliceLevel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_slcData); |
| |
| if (m_pakOnlyTest) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(LoadPakCommandAndCuRecordFromFile()); |
| } |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::ExecuteSliceLevel()); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncTileLevel()); |
| } |
| |
| /* |
| if ((m_useMdf) && (m_rawSurfaceToEnc)) |
| { |
| m_osInterface->pfnWaitOnResource(m_osInterface, |
| &m_rawSurfaceToEnc->OsResource); |
| } |
| */ |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncTileLevel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| int32_t currentPipe = GetCurrentPipe(); |
| int32_t currentPass = GetCurrentPass(); |
| |
| if (currentPipe < 0 || currentPass < 0) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid pipe number or pass number"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| MHW_VDBOX_HEVC_SLICE_STATE_G12 sliceState; |
| SetHcpSliceStateCommonParams(sliceState); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| |
| for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++) |
| { |
| for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++) |
| { |
| PCODEC_ENCODER_SLCDATA slcData = m_slcData; |
| uint32_t slcCount, idx, sliceNumInTile = 0; |
| |
| idx = tileRow * numTileColumns + tileCol; |
| |
| if ((m_numPipe > 1) && (tileCol != currentPipe)) |
| { |
| continue; |
| } |
| |
| // HCP_TILE_CODING commmand |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| static_cast<MhwVdboxHcpInterfaceG12 *>(m_hcpInterface)->AddHcpTileCodingCmd(&cmdBuffer, &m_tileParams[idx])); |
| |
| for (slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount, |
| &m_tileParams[idx], |
| &sliceInTile, |
| &lastSliceInTile)); |
| |
| if (!sliceInTile) |
| { |
| continue; |
| } |
| |
| if (IsFirstPass()) |
| { |
| uint32_t startLcu = 0; |
| for (uint32_t ii = 0; ii < slcCount; ii++) |
| { |
| startLcu += m_hevcSliceParams[ii].NumLCUsInSlice; |
| } |
| slcData[slcCount].CmdOffset = startLcu * (m_hwInterface->GetHcpInterface()->GetHcpPakObjSize()) * sizeof(uint32_t); |
| } |
| |
| SetHcpSliceStateParams(sliceState, slcData, (uint16_t)slcCount, m_tileParams, lastSliceInTile, idx); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(&cmdBuffer, &sliceState)); |
| |
| sliceNumInTile++; |
| } // end of slice |
| |
| if (0 == sliceNumInTile) |
| { |
| // One tile must have at least one slice |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| } // end of row tile |
| } // end of column tile |
| |
| // Insert end of sequence/stream if set |
| if ((m_lastPicInStream || m_lastPicInSeq) && IsLastPipe()) |
| { |
| MHW_VDBOX_PAK_INSERT_PARAMS pakInsertObjectParams; |
| MOS_ZeroMemory(&pakInsertObjectParams, sizeof(pakInsertObjectParams)); |
| pakInsertObjectParams.bLastPicInSeq = m_lastPicInSeq; |
| pakInsertObjectParams.bLastPicInStream = m_lastPicInStream; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPakInsertObject(&cmdBuffer, &pakInsertObjectParams)); |
| } |
| |
| // Send VD_PIPELINE_FLUSH command |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipelineFlushParams; |
| MOS_ZeroMemory(&vdPipelineFlushParams, sizeof(vdPipelineFlushParams)); |
| vdPipelineFlushParams.Flags.bWaitDoneHEVC = 1; |
| vdPipelineFlushParams.Flags.bFlushHEVC = 1; |
| vdPipelineFlushParams.Flags.bWaitDoneVDCmdMsgParser = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_vdencInterface->AddVdPipelineFlushCmd(&cmdBuffer, &vdPipelineFlushParams)); |
| |
| // Send MI_FLUSH command |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| //HW Semaphore cmd to make sure all pipes completion encode |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMIAtomicCmd(&m_resPipeCompleteSemaMem, 1, MHW_MI_ATOMIC_INC, &cmdBuffer)); |
| |
| if (IsFirstPipe()) |
| { |
| // first pipe needs to ensure all other pipes are ready |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendHWWaitCommand( |
| &m_resPipeCompleteSemaMem, |
| &cmdBuffer, |
| m_numPipe)); |
| |
| //clean HW semaphore memory |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resPipeCompleteSemaMem; |
| storeDataParams.dwValue = 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &storeDataParams)); |
| |
| // Use HW stitch commands only in the scalable mode |
| if (m_numPipe > 1 && m_enableTileStitchByHW) |
| { |
| //call PAK Int Kernel in scalability case |
| if (m_hucPakStitchEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(HucPakIntegrate(&cmdBuffer)); |
| #if 0 // Need to enable this code once Gen12 becomes open source \ |
| // 2nd level BB buffer for stitching cmd \ |
| // current location to add cmds in 2nd level batch buffer |
| m_HucStitchCmdBatchBuffer.iCurrent = 0; |
| // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass |
| m_HucStitchCmdBatchBuffer.dwOffset = 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferStartCmd(&cmdBuffer, &m_HucStitchCmdBatchBuffer)); |
| // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMfxWaitCmd(&cmdBuffer, nullptr, m_osInterface->osCpInterface->IsCpEnabled() ? true : false)); |
| #endif |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMfxVideoCopyCmdParams(&cmdBuffer)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(&cmdBuffer)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, CODECHAL_NUM_MEDIA_STATES)); |
| |
| if (m_numPipe <= 1) // single pipe mode can read the info from MMIO register. Otherwise, we have to use the tile size statistic buffer |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer)); |
| |
| // BRC PAK statistics different for each pass |
| if (m_brcEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStats(&cmdBuffer)); |
| } |
| } |
| else |
| { //scalability mode |
| if (m_brcEnabled) |
| { |
| //MMIO register is not used in scalability BRC case. all information is in TileSizeRecord stream out buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadBrcPakStatisticsForScalability(&cmdBuffer)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(&cmdBuffer)); |
| } |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| //this is to support BRC scalbility test to match with single pipe. Will be removed later after enhanced BRC Scalability is enabled. |
| if (m_brcEnabled && m_forceSinglePakPass) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ResetImgCtrlRegInPAKStatisticsBuffer(&cmdBuffer)); |
| } |
| #endif |
| |
| if (m_singleTaskPhaseSupported && |
| m_brcEnabled && m_numPipe >= 2 && !IsLastPass()) |
| { |
| // Signal HW semaphore for the BRC dependency (i.e., next BRC pass waits for the current BRC pass) |
| for (auto i = 0; i < m_numPipe; i++) |
| { |
| if (!Mos_ResourceIsNull(&m_resBrcSemaphoreMem[i].sResource)) |
| { |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resBrcSemaphoreMem[i].sResource; |
| storeDataParams.dwValue = 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| &cmdBuffer, |
| &storeDataParams)); |
| } |
| } |
| } |
| } |
| |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(&cmdBuffer, &flushDwParams)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr)); |
| } |
| |
| std::string pakPassName = "PAK_PASS" + std::to_string(static_cast<uint32_t>(m_currPass)); |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| CODECHAL_NUM_MEDIA_STATES, |
| pakPassName.data()));) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ReturnCommandBuffer(&cmdBuffer)); |
| |
| if (IsFirstPipe() && |
| (m_pakOnlyTest == 0) && // In the PAK only test, no need to wait for ENC's completion |
| IsFirstPass() && |
| !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse)) |
| { |
| MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_videoContext; |
| syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams)); |
| } |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| bool nullRendering = m_videoContextUsesNullHw; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SubmitCommandBuffer(&cmdBuffer, nullRendering)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpHucDebugOutputBuffers()); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(DumpPakOutput()); |
| if (m_mmcState) { |
| m_mmcState->UpdateUserFeatureKey(&m_reconSurface); |
| }) |
| |
| if ((IsLastPipe()) && |
| (IsLastPass()) && |
| m_signalEnc && |
| m_currRefSync && |
| !Mos_ResourceIsNull(&m_currRefSync->resSyncObject)) |
| { |
| // signal semaphore |
| MOS_SYNC_PARAMS syncParams; |
| syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_videoContext; |
| syncParams.presSyncResource = &m_currRefSync->resSyncObject; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams)); |
| m_currRefSync->uiSemaphoreObjCount++; |
| m_currRefSync->bInUsed = true; |
| } |
| } |
| |
| // Reset parameters for next PAK execution |
| if (IsLastPipe() && IsLastPass()) |
| { |
| if (!m_singleTaskPhaseSupported) |
| { |
| m_osInterface->pfnResetPerfBufferID(m_osInterface); |
| } |
| |
| m_currPakSliceIdx = (m_currPakSliceIdx + 1) % CODECHAL_HEVC_NUM_PAK_SLICE_BATCH_BUFFERS; |
| |
| if (m_hevcSeqParams->ParallelBRC) |
| { |
| m_brcBuffers.uiCurrBrcPakStasIdxForWrite = |
| (m_brcBuffers.uiCurrBrcPakStasIdxForWrite + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; |
| } |
| |
| m_newPpsHeader = 0; |
| m_newSeqHeader = 0; |
| m_frameNum++; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::DecideEncodingPipeNumber() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_numPipe = m_numVdbox; |
| |
| uint8_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| |
| if (numTileColumns > m_numPipe) |
| { |
| m_numPipe = 1; |
| } |
| |
| if (numTileColumns < m_numPipe) |
| { |
| if (numTileColumns >= 1 && numTileColumns <= 4) |
| { |
| m_numPipe = numTileColumns; |
| } |
| else |
| { |
| m_numPipe = 1; // invalid tile column test cases and switch back to the single VDBOX mode |
| } |
| } |
| |
| m_useVirtualEngine = true; //always use virtual engine interface for single pipe and scalability mode |
| |
| if (!m_forceScalability) |
| { |
| //resolution < 4K, always go with single pipe |
| if (m_frameWidth * m_frameHeight < ENCODE_HEVC_4K_PIC_WIDTH * ENCODE_HEVC_4K_PIC_HEIGHT) |
| { |
| m_numPipe = 1; |
| } |
| } |
| |
| m_numUsedVdbox = m_numPipe; |
| m_numberTilesInFrame = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1); |
| |
| if (m_scalabilityState) |
| { |
| // Create/ re-use a GPU context with 2 pipes |
| m_scalabilityState->ucScalablePipeNum = m_numPipe; |
| } |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::PlatformCapabilityCheck() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(DecideEncodingPipeNumber()); |
| |
| if (MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncodeScalability_ChkGpuCtxReCreation(this, m_scalabilityState, (PMOS_GPUCTX_CREATOPTIONS_ENHANCED)m_gpuCtxCreatOpt)); |
| } |
| |
| if (m_frameWidth * m_frameHeight > ENCODE_HEVC_MAX_16K_PIC_WIDTH * ENCODE_HEVC_MAX_16K_PIC_HEIGHT) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(eStatus, "Frame resolution greater than 16k not supported"); |
| } |
| |
| if (m_vdencEnabled && m_chromaFormat == HCP_CHROMA_FORMAT_YUV444 && m_hevcSeqParams->TargetUsage == 7) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Speed mode is not supported in VDENC 444, resetting TargetUsage to Normal mode\n"); |
| m_hevcSeqParams->TargetUsage = 4; |
| } |
| |
| if ((uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_chromaFormat && |
| (uint8_t)HCP_CHROMA_FORMAT_YUV422 == m_outputChromaFormat && |
| Format_YUY2 == m_reconSurface.Format) |
| { |
| if (m_reconSurface.dwHeight < m_oriFrameHeight * 2 || |
| m_reconSurface.dwWidth < m_oriFrameWidth / 2) |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| |
| // set RDOQ Intra blocks Threshold for Gen11+ |
| m_rdoqIntraTuThreshold = 0; |
| if (m_hevcRdoqEnabled) |
| { |
| if (1 == m_hevcSeqParams->TargetUsage) |
| { |
| m_rdoqIntraTuThreshold = 0xffff; |
| } |
| else if (4 == m_hevcSeqParams->TargetUsage) |
| { |
| m_rdoqIntraTuThreshold = m_picWidthInMb * m_picHeightInMb; |
| m_rdoqIntraTuThreshold = MOS_MIN(m_rdoqIntraTuThreshold / 10, 0xffff); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| bool CodechalEncHevcStateG12::CheckSupportedFormat(PMOS_SURFACE surface) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| bool isColorFormatSupported = false; |
| |
| if (nullptr == surface) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer."); |
| return isColorFormatSupported; |
| } |
| |
| switch (surface->Format) |
| { |
| case Format_NV12: |
| isColorFormatSupported = IS_Y_MAJOR_TILE_FORMAT(surface->TileType); |
| break; |
| case Format_YUY2: |
| case Format_YUYV: |
| case Format_A8R8G8B8: |
| case Format_P010: |
| case Format_P016: |
| case Format_Y210: |
| case Format_Y216: |
| break; |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format); |
| break; |
| } |
| |
| return isColorFormatSupported; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GetSystemPipeNumberCommon() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_USER_FEATURE_VALUE_DATA userFeatureData; |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| |
| MOS_STATUS statusKey = MOS_STATUS_SUCCESS; |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_ENCODE_DISABLE_SCALABILITY, |
| &userFeatureData); |
| |
| bool disableScalability = true; |
| if (statusKey == MOS_STATUS_SUCCESS) |
| { |
| disableScalability = userFeatureData.i32Data ? true : false; |
| } |
| |
| MEDIA_SYSTEM_INFO *gtSystemInfo = m_osInterface->pfnGetGtSystemInfo(m_osInterface); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(gtSystemInfo); |
| |
| if (gtSystemInfo && disableScalability == false) |
| { |
| // Both VE mode and media solo mode should be able to get the VDBOX number via the same interface |
| m_numVdbox = (uint8_t)(gtSystemInfo->VDBoxInfo.NumberOfVDBoxEnabled); |
| } |
| else |
| { |
| m_numVdbox = 1; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::HucPakIntegrate( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| CODECHAL_ENCODE_CHK_COND_RETURN( |
| (m_vdboxIndex > m_hwInterface->GetMfxInterface()->GetMaxVdboxIndex()), |
| "ERROR - vdbox index exceed the maximum"); |
| |
| auto mmioRegisters = m_hwInterface->GetHucInterface()->GetMmioRegisters(m_vdboxIndex); |
| |
| // load kernel from WOPCM into L2 storage RAM |
| MHW_VDBOX_HUC_IMEM_STATE_PARAMS imemParams; |
| MOS_ZeroMemory(&imemParams, sizeof(imemParams)); |
| imemParams.dwKernelDescriptor = VDBOX_HUC_PAK_INTEGRATION_KERNEL_DESCRIPTOR; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucImemStateCmd(cmdBuffer, &imemParams)); |
| |
| // pipe mode select |
| MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; |
| pipeModeSelectParams.Mode = m_mode; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucPipeModeSelectCmd(cmdBuffer, &pipeModeSelectParams)); |
| |
| // DMEM set |
| MHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams; |
| if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrate(&dmemParams)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetDmemHuCPakIntegrateCqp(&dmemParams)); |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucDmemStateCmd(cmdBuffer, &dmemParams)); |
| |
| MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams; |
| if (m_brcEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrate(&virtualAddrParams)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetRegionsHuCPakIntegrateCqp(&virtualAddrParams)); |
| } |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucVirtualAddrStateCmd(cmdBuffer, &virtualAddrParams)); |
| |
| // Write HUC_STATUS2 mask - bit 6 - valid IMEM loaded |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_resHucStatus2Buffer; |
| storeDataParams.dwResourceOffset = 0; |
| storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatus2ImemLoadedMask(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams)); |
| |
| // Store HUC_STATUS2 register |
| MHW_MI_STORE_REGISTER_MEM_PARAMS storeRegParams; |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &m_resHucStatus2Buffer; |
| storeRegParams.dwOffset = sizeof(uint32_t); |
| storeRegParams.dwRegister = mmioRegisters->hucStatus2RegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd(cmdBuffer, &storeRegParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucInterface()->AddHucStartCmd(cmdBuffer, true)); |
| |
| // wait Huc completion (use HEVC bit for now) |
| MHW_VDBOX_VD_PIPE_FLUSH_PARAMS vdPipeFlushParams; |
| MOS_ZeroMemory(&vdPipeFlushParams, sizeof(vdPipeFlushParams)); |
| vdPipeFlushParams.Flags.bFlushHEVC = 1; |
| vdPipeFlushParams.Flags.bWaitDoneHEVC = 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetVdencInterface()->AddVdPipelineFlushCmd(cmdBuffer, &vdPipeFlushParams)); |
| |
| // Flush the engine to ensure memory written out |
| MHW_MI_FLUSH_DW_PARAMS flushDwParams; |
| MOS_ZeroMemory(&flushDwParams, sizeof(flushDwParams)); |
| flushDwParams.bVideoPipelineCacheInvalidate = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiFlushDwCmd(cmdBuffer, &flushDwParams)); |
| |
| EncodeStatusBuffer encodeStatusBuf = m_encodeStatusBuf; |
| |
| uint32_t baseOffset = |
| (encodeStatusBuf.wCurrIndex * encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // pEncodeStatus is offset by 2 DWs in the resource |
| |
| // Write HUC_STATUS mask |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &encodeStatusBuf.resStatusBuffer; |
| storeDataParams.dwResourceOffset = baseOffset + encodeStatusBuf.dwHuCStatusMaskOffset; |
| storeDataParams.dwValue = m_hwInterface->GetHucInterface()->GetHucStatusReEncodeMask(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| cmdBuffer, |
| &storeDataParams)); |
| |
| // store HUC_STATUS register |
| MOS_ZeroMemory(&storeRegParams, sizeof(storeRegParams)); |
| storeRegParams.presStoreBuffer = &encodeStatusBuf.resStatusBuffer; |
| storeRegParams.dwOffset = baseOffset + encodeStatusBuf.dwHuCStatusRegOffset; |
| storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreRegisterMemCmd( |
| cmdBuffer, |
| &storeRegParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::Initialize(CodechalSetting *settings) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_USER_FEATURE_VALUE_DATA userFeatureData; |
| MOS_STATUS statusKey = MOS_STATUS_SUCCESS; |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| char stringData[MOS_USER_CONTROL_MAX_DATA_SIZE]; |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| userFeatureData.StringData.pStringData = stringData; |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID, |
| &userFeatureData); |
| |
| if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0) |
| { |
| MOS_SecureStrcpy(m_pakOnlyDataFolder, |
| sizeof(m_pakOnlyDataFolder) / sizeof(m_pakOnlyDataFolder[0]), |
| stringData); |
| |
| uint32_t len = strlen(m_pakOnlyDataFolder); |
| if (m_pakOnlyDataFolder[len - 1] == '\\') |
| { |
| m_pakOnlyDataFolder[len - 1] = 0; |
| } |
| |
| m_pakOnlyTest = true; |
| // PAK only mode does not need to init any kernel |
| } |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| userFeatureData.StringData.pStringData = stringData; |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_LOAD_KERNEL_INPUT_ID_G12, |
| &userFeatureData); |
| |
| if (statusKey == MOS_STATUS_SUCCESS && userFeatureData.StringData.uSize > 0) |
| { |
| MOS_SecureStrcpy(m_loadKernelInputDataFolder, |
| sizeof(m_loadKernelInputDataFolder) / sizeof(m_loadKernelInputDataFolder[0]), |
| stringData); |
| |
| uint32_t len = strlen(m_loadKernelInputDataFolder); |
| if (m_loadKernelInputDataFolder[len - 1] == '\\') |
| { |
| m_loadKernelInputDataFolder[len - 1] = 0; |
| } |
| m_loadKernelInput = true; |
| } |
| #endif |
| |
| // Common initialization |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings)); |
| |
| m_numDelay = 15; //Value suggested by HW team. |
| m_bmeMethodTable = (uint8_t *)m_meMethod; |
| m_b4XMeDistortionBufferSupported = true; |
| m_brcBuffers.dwBrcConstantSurfaceWidth = HEVC_BRC_CONSTANT_SURFACE_WIDTH_G9; |
| m_brcBuffers.dwBrcConstantSurfaceHeight = HEVC_BRC_CONSTANT_SURFACE_HEIGHT_G10; |
| m_brcHistoryBufferSize = HEVC_BRC_HISTORY_BUFFER_SIZE_G12; |
| m_maxNumSlicesSupported = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6; |
| m_brcBuffers.dwBrcHcpPicStateSize = BRC_IMG_STATE_SIZE_PER_PASS_G12 * CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID, |
| &userFeatureData); |
| m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID, |
| &userFeatureData); |
| // Region number must be greater than 1 |
| m_numberConcurrentGroup = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data; |
| |
| if (m_numberConcurrentGroup > 16) |
| { |
| // Region number cannot be larger than 16 |
| m_numberConcurrentGroup = 16; |
| } |
| |
| m_sizeOfHcpPakFrameStats = 9 * CODECHAL_CACHELINE_SIZE; //Frame statistics occupying 9 caceline on gen12 |
| |
| // Subthread number used in the ENC kernel |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID_G12, |
| &userFeatureData); |
| m_numberEncKernelSubThread = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data; |
| |
| if (m_numberEncKernelSubThread > m_hevcThreadTaskDataNum) |
| { |
| m_numberEncKernelSubThread = m_hevcThreadTaskDataNum; // support up to 2 sub-threads in one LCU64x64 |
| } |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID, |
| &userFeatureData); |
| m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID, |
| &userFeatureData); |
| m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VME_ENCODE_SSE_ENABLE_ID, |
| &userFeatureData); |
| m_sseSupported = userFeatureData.i32Data ? true : false; |
| |
| // Overriding the defaults here with 32 aligned dimensions |
| // 2x Scaling WxH |
| m_downscaledWidth2x = |
| CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameWidth); |
| m_downscaledHeight2x = |
| CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameHeight); |
| |
| // HME Scaling WxH |
| m_downscaledWidth4x = |
| CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameWidth); |
| m_downscaledHeight4x = |
| CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameHeight); |
| m_downscaledWidthInMb4x = |
| CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x); |
| m_downscaledHeightInMb4x = |
| CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight4x); |
| |
| // SuperHME Scaling WxH |
| m_downscaledWidth16x = |
| CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledWidth4x); |
| m_downscaledHeight16x = |
| CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledHeight4x); |
| m_downscaledWidthInMb16x = |
| CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x); |
| m_downscaledHeightInMb16x = |
| CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight16x); |
| |
| // UltraHME Scaling WxH |
| m_downscaledWidth32x = |
| CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledWidth16x); |
| m_downscaledHeight32x = |
| CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledHeight16x); |
| m_downscaledWidthInMb32x = |
| CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x); |
| m_downscaledHeightInMb32x = |
| CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight32x); |
| |
| // disable MMCD if we enable Codechal dump. Because dump code changes the surface state from compressed to uncompressed, |
| // this causes mis-match issue between dump is enabled or disabled. |
| CODECHAL_DEBUG_TOOL( |
| if (m_mmcState && m_debugInterface && m_debugInterface->m_dbgCfgHead){ |
| //m_mmcState->SetMmcDisabled(); |
| }) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSystemPipeNumberCommon()); |
| |
| if (MOS_VE_SUPPORTED(m_osInterface)) |
| { |
| m_scalabilityState = (PCODECHAL_ENCODE_SCALABILITY_STATE)MOS_AllocAndZeroMemory(sizeof(CODECHAL_ENCODE_SCALABILITY_STATE)); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalabilityState); |
| //scalability initialize |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_InitializeState(m_scalabilityState, m_hwInterface)); |
| } |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_STITCH, |
| &userFeatureData); |
| m_enableTileStitchByHW = userFeatureData.i32Data ? true : false; |
| |
| statusKey = MOS_STATUS_SUCCESS; |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_HW_SEMAPHORE, |
| &userFeatureData); |
| m_enableHWSemaphore = userFeatureData.i32Data ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_WP_SUPPORT_ID, |
| &userFeatureData); |
| m_weightedPredictionSupported = userFeatureData.i32Data ? true : false; |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, |
| &userFeatureData); |
| m_kmdVeOveride.Value = (uint64_t)userFeatureData.i64Data; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VME_FORCE_SCALABILITY_ID, |
| &userFeatureData); |
| m_forceScalability = userFeatureData.i32Data ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| statusKey = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VME_DISABLE_PANIC_MODE_ID_G12, |
| &userFeatureData); |
| if (statusKey == MOS_STATUS_SUCCESS) |
| { |
| m_enableFramePanicMode = userFeatureData.i32Data ? false : true; |
| } |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_INTERVAL_ID, |
| &userFeatureData); |
| m_ltrInterval = (uint32_t)(userFeatureData.i32Data); |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_VME_BRC_LTR_DISABLE_ID, |
| &userFeatureData); |
| m_enableBrcLTR = (userFeatureData.i32Data) ? false : true; |
| #endif |
| |
| if (m_codecFunction != CODECHAL_FUNCTION_PAK) |
| { |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID, |
| &userFeatureData); |
| m_hmeSupported = (userFeatureData.i32Data) ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID, |
| &userFeatureData); |
| m_16xMeSupported = (userFeatureData.i32Data) ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_32xME_ENABLE_ID, |
| &userFeatureData); |
| // Keeping UHME by Default ON for Gen12 |
| m_32xMeSupported = (userFeatureData.i32Data) ? false : true; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_NUM_THREADS_PER_LCU_ID, |
| &userFeatureData); |
| m_totalNumThreadsPerLcu = (uint16_t)userFeatureData.i32Data; |
| |
| if (m_totalNumThreadsPerLcu < m_minThreadsPerLcuB || m_totalNumThreadsPerLcu > m_maxThreadsPerLcuB) |
| { |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| |
| if (m_frameWidth < 128 || m_frameHeight < 128) |
| { |
| m_16xMeSupported = false; |
| m_32xMeSupported = false; |
| } |
| else if (m_frameWidth < 512 || m_frameHeight < 512) |
| { |
| m_32xMeSupported = false; |
| } |
| |
| return eStatus; |
| } |
| |
| void CodechalEncHevcStateG12::LoadCosts(uint8_t sliceType, uint8_t qp) |
| { |
| if (sliceType >= CODECHAL_HEVC_NUM_SLICE_TYPES) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid slice type"); |
| sliceType = CODECHAL_HEVC_I_SLICE; |
| } |
| |
| double qpScale = 0.60; |
| int32_t qpMinus12 = qp - 12; |
| double lambda = sqrt(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0)); |
| uint8_t lcuIdx = ((m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3) == 6) ? 1 : 0; |
| m_lambdaRD = (uint16_t)(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0) * 4 + 0.5); |
| |
| m_modeCostCre[LUTCREMODE_INTRA_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTRA_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTRA_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTRA_CHROMA] = CRECOST(lambda, LUTMODEBITS_INTRA_CHROMA, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTER_32X32] = CRECOST(lambda, LUTMODEBITS_INTER_32X32, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTER_32X16] = CRECOST(lambda, LUTMODEBITS_INTER_32X16, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTER_16X16] = CRECOST(lambda, LUTMODEBITS_INTER_16X16, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTER_16X8] = CRECOST(lambda, LUTMODEBITS_INTER_16X8, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTER_8X8] = CRECOST(lambda, LUTMODEBITS_INTER_8X8, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTER_BIDIR] = CRECOST(lambda, LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTER_SKIP] = CRECOST(lambda, LUTMODEBITS_INTER_SKIP, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_16X16, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8] = CRECOST(lambda, LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType); |
| m_modeCostCre[LUTCREMODE_INTRA_NONPRED] = CRECOST(lambda, LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType); |
| |
| m_modeCostRde[LUTRDEMODE_INTRA_64X64] = RDEBITS62(LUTMODEBITS_INTRA_64X64, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_32X32] = RDEBITS62(LUTMODEBITS_INTRA_32X32, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_16X16] = RDEBITS62(LUTMODEBITS_INTRA_16X16, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_8X8] = RDEBITS62(LUTMODEBITS_INTRA_8X8, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_NXN] = RDEBITS62(LUTMODEBITS_INTRA_NXN, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_MPM] = RDEBITS62(LUTMODEBITS_INTRA_MPM, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_DC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_DC_32X32, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_DC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_DC_8X8, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_32X32, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8] = RDEBITS62(LUTMODEBITS_INTRA_NONDC_8X8, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTER_BIDIR] = RDEBITS62(LUTMODEBITS_INTER_BIDIR, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTER_REFID] = RDEBITS62(LUTMODEBITS_INTER_REFID, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_SKIP_64X64] = RDEBITS62(LUTMODEBITS_SKIP_64X64, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_SKIP_32X32] = RDEBITS62(LUTMODEBITS_SKIP_32X32, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_SKIP_16X16] = RDEBITS62(LUTMODEBITS_SKIP_16X16, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_SKIP_8X8] = RDEBITS62(LUTMODEBITS_SKIP_8X8, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_MERGE_64X64] = RDEBITS62(LUTMODEBITS_MERGE_64X64, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_MERGE_32X32] = RDEBITS62(LUTMODEBITS_MERGE_32X32, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_MERGE_16X16] = RDEBITS62(LUTMODEBITS_MERGE_16X16, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_MERGE_8X8] = RDEBITS62(LUTMODEBITS_MERGE_8X8, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTER_32X32] = RDEBITS62(LUTMODEBITS_INTER_32X32, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTER_32X16] = RDEBITS62(LUTMODEBITS_INTER_32X16, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTER_16X16] = RDEBITS62(LUTMODEBITS_INTER_16X16, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTER_16X8] = RDEBITS62(LUTMODEBITS_INTER_16X8, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_INTER_8X8] = RDEBITS62(LUTMODEBITS_INTER_8X8, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_TU_DEPTH_0] = RDEBITS62(LUTMODEBITS_TU_DEPTH_0, lcuIdx, sliceType); |
| m_modeCostRde[LUTRDEMODE_TU_DEPTH_1] = RDEBITS62(LUTMODEBITS_TU_DEPTH_1, lcuIdx, sliceType); |
| |
| for (uint8_t i = 0; i < 8; i++) |
| { |
| m_modeCostRde[LUTRDEMODE_CBF + i] = RDEBITS62(LUTMODEBITS_CBF + i, lcuIdx, sliceType); |
| } |
| } |
| |
| // ------------------------------------------------------------------------------ |
| //| Purpose: Setup curbe for HEVC MbEnc B Kernels |
| //| Return: N/A |
| //------------------------------------------------------------------------------ |
| MOS_STATUS CodechalEncHevcStateG12::SetCurbeMbEncBKernel() |
| { |
| uint32_t curIdx = m_currRecycledBufIdx; |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage) / 3) % 3; // Map TU 1,4,6 to 0,1,2 |
| |
| // Initialize the CURBE data |
| MBENC_CURBE curbe; |
| |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP) |
| { |
| curbe.QPType = QP_TYPE_CONSTANT; |
| curbe.ROIEnable = m_hevcPicParams->NumROI ? true : false; |
| } |
| else |
| { |
| curbe.QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME; |
| } |
| |
| // TU based settings |
| curbe.EnableCu64Check = m_tuSettings[EnableCu64CheckTuParam][tuMapping]; |
| curbe.MaxNumIMESearchCenter = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping]; |
| curbe.MaxTransformDepthInter = m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping]; |
| curbe.MaxTransformDepthIntra = m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping]; |
| curbe.Dynamic64Order = m_tuSettings[Dynamic64OrderTuParam][tuMapping]; |
| curbe.DynamicOrderTh = m_tuSettings[DynamicOrderThTuParam][tuMapping]; |
| curbe.Dynamic64Enable = m_tuSettings[Dynamic64EnableTuParam][tuMapping]; |
| curbe.Dynamic64Th = m_tuSettings[Dynamic64ThTuParam][tuMapping]; |
| curbe.IncreaseExitThresh = m_tuSettings[IncreaseExitThreshTuParam][tuMapping]; |
| curbe.IntraSpotCheck = m_tuSettings[IntraSpotCheckFlagTuParam][tuMapping]; |
| curbe.Fake32Enable = m_tuSettings[Fake32EnableTuParam][tuMapping]; |
| |
| curbe.FrameWidthInSamples = m_frameWidth; |
| curbe.FrameHeightInSamples = m_frameHeight; |
| |
| curbe.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| curbe.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| curbe.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2; |
| curbe.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| |
| curbe.ChromaFormatType = m_hevcSeqParams->chroma_format_idc; |
| |
| curbe.TUDepthControl = curbe.MaxTransformDepthInter; |
| |
| int32_t sliceQp = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY; |
| curbe.FrameQP = abs(sliceQp); |
| curbe.FrameQPSign = (sliceQp > 0) ? 0 : 1; |
| |
| #if 0 // no need in the optimized kernel because kernel does the table look-up |
| LoadCosts(CODECHAL_HEVC_B_SLICE, (uint8_t)sliceQp); |
| curbe.DW4_ModeIntra32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_32X32]; |
| curbe.DW4_ModeIntraNonDC32x32Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_32X32]; |
| |
| curbe.DW5_ModeIntra16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_16X16]; |
| curbe.DW5_ModeIntraNonDC16x16Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_16X16]; |
| curbe.DW5_ModeIntra8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_8X8]; |
| curbe.DW5_ModeIntraNonDC8x8Cost = m_modeCostCre[LUTCREMODE_INTRA_NONDC_8X8]; |
| |
| curbe.DW6_ModeIntraNonPred = m_modeCostCre[LUTCREMODE_INTRA_NONPRED]; |
| |
| curbe.DW7_ChromaIntraModeCost = m_modeCostCre[LUTCREMODE_INTRA_CHROMA]; |
| |
| curbe.DW12_IntraModeCostMPM = m_modeCostRde[LUTRDEMODE_INTRA_MPM]; |
| |
| curbe.DW13_IntraTUDept0Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_0]; |
| curbe.DW13_IntraTUDept1Cost = m_modeCostRde[LUTRDEMODE_TU_DEPTH_1]; |
| |
| curbe.DW14_IntraTU4x4CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_4X4]; |
| curbe.DW14_IntraTU8x8CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_8X8]; |
| curbe.DW14_IntraTU16x16CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_16X16]; |
| curbe.DW14_IntraTU32x32CBFCost = m_modeCostRde[LUTRDEMODE_INTRA_CBF_32X32]; |
| curbe.DW15_LambdaRD = (uint16_t)m_lambdaRD; |
| curbe.DW17_IntraNonDC8x8Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_8X8]; |
| curbe.DW17_IntraNonDC32x32Penalty = m_modeCostRde[LUTRDEMODE_INTRA_NONDC_32X32]; |
| #endif |
| |
| curbe.NumofColumnTile = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| curbe.NumofRowTile = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| curbe.HMEFlag = m_hmeSupported ? 3 : 0; |
| |
| curbe.MaxRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1; |
| curbe.MaxRefIdxL1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10 - 1; |
| curbe.MaxBRefIdxL0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10 - 1; |
| |
| // Check whether Last Frame is I frame or not |
| if (m_frameNum == 0 || m_picHeightInMb == I_TYPE || (m_frameNum && m_lastPictureCodingType == I_TYPE)) |
| { |
| // This is the flag to notify kernel not to use the history buffer |
| curbe.LastFrameIsIntra = true; |
| } |
| else |
| { |
| curbe.LastFrameIsIntra = false; |
| } |
| |
| curbe.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag; |
| curbe.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag; |
| curbe.theSameRefList = m_sameRefList; |
| curbe.IsLowDelay = m_lowDelay; |
| curbe.MaxNumMergeCand = m_hevcSliceParams->MaxNumMergeCand; |
| curbe.NumRefIdxL0 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1; |
| curbe.NumRefIdxL1 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1; |
| |
| if (m_hevcSeqParams->TargetUsage == 1) |
| { |
| // MaxNumMergeCand C Model uses 4 for TU1, |
| // for quality consideration, make sure not larger than the value from App as it will be used in PAK |
| curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 4); |
| } |
| else |
| { |
| // MaxNumMergeCand C Model uses 2 for TU4 and TU7, |
| // for quality consideration, make sure not larger than the value from App as it will be used in PAK |
| curbe.MaxNumMergeCand = MOS_MIN(m_hevcSliceParams->MaxNumMergeCand, 2); |
| } |
| |
| int32_t tbRefListL0[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10] = {0}, tbRefListL1[CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10] = {0}; |
| curbe.FwdPocNumber_L0_mTb_0 = tbRefListL0[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]); |
| curbe.BwdPocNumber_L1_mTb_0 = tbRefListL1[0] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]); |
| curbe.FwdPocNumber_L0_mTb_1 = tbRefListL0[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]); |
| curbe.BwdPocNumber_L1_mTb_1 = tbRefListL1[1] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]); |
| |
| curbe.FwdPocNumber_L0_mTb_2 = tbRefListL0[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]); |
| curbe.BwdPocNumber_L1_mTb_2 = tbRefListL1[2] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]); |
| curbe.FwdPocNumber_L0_mTb_3 = tbRefListL0[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]); |
| curbe.BwdPocNumber_L1_mTb_3 = tbRefListL1[3] = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]); |
| |
| curbe.RefFrameWinHeight = m_frameHeight; |
| curbe.RefFrameWinWidth = m_frameWidth; |
| |
| // Hard coding for now from Gen10HEVC_TU4_default.par |
| curbe.RoundingInter = (m_roundingInter + 1) << 4; // Should be an input from par(slice state) |
| curbe.RoundingIntra = (m_roundingIntra + 1) << 4; // Should be an input from par(slice state) |
| curbe.RDEQuantRoundValue = (m_roundingInter + 1) << 4; |
| |
| uint32_t gopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0; |
| uint32_t gopB = m_hevcSeqParams->GopPicSize - 1 - gopP; |
| |
| curbe.CostScalingForRA = 1; // default setting |
| |
| // get the min distance between current pic and ref pics |
| uint32_t minPocDist = 255; |
| uint32_t costTableIndex = 0; |
| if (curbe.CostScalingForRA == 1) |
| { |
| for (uint8_t ref = 0; ref < curbe.NumRefIdxL0; ref++) |
| { |
| if ((uint32_t)abs(tbRefListL0[ref]) < minPocDist) |
| minPocDist = abs(tbRefListL0[ref]); |
| } |
| for (uint8_t ref = 0; ref < curbe.NumRefIdxL1; ref++) |
| { |
| if ((uint32_t)abs(tbRefListL1[ref]) < minPocDist) |
| minPocDist = abs(tbRefListL1[ref]); |
| } |
| |
| if (gopB == 4) |
| { |
| if (minPocDist == 1 || minPocDist == 2 || minPocDist == 4) |
| costTableIndex = minPocDist; |
| } |
| if (gopB == 8) |
| { |
| if (minPocDist == 1 || minPocDist == 2 || minPocDist == 4 || minPocDist == 8) |
| costTableIndex = minPocDist + 3; |
| } |
| } |
| |
| curbe.CostTableIndex = costTableIndex; |
| |
| // the following fields are needed by the new optimized kernel in v052417 |
| curbe.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2; |
| curbe.MaxIntraRdeIter = 1; |
| curbe.CornerNeighborPixel = 0; |
| curbe.IntraNeighborAvailFlags = 0; |
| curbe.SubPelMode = 3; // qual-pel search |
| curbe.InterSADMeasure = 2; // Haar transform |
| curbe.IntraSADMeasure = 2; // Haar transform |
| curbe.IntraPrediction = 0; // enable 32x32, 16x16, and 8x8 luma intra prediction |
| curbe.RefIDCostMode = 1; // 0: AVC and 1: linear method |
| curbe.TUBasedCostSetting = 0; |
| curbe.ConcurrentGroupNum = m_numberConcurrentGroup; |
| curbe.NumofUnitInWaveFront = m_numWavefrontInOneRegion; |
| curbe.LoadBalenceEnable = 0; // when this flag is false, kernel does not use LoadBalance (or MBENC_B_FRAME_CONCURRENT_TG_DATA) buffe |
| curbe.ThreadNumber = MOS_MIN(2, m_numberEncKernelSubThread); |
| curbe.Pic_init_qp_B = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY; |
| curbe.Pic_init_qp_P = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY; |
| curbe.Pic_init_qp_I = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY; |
| curbe.WaveFrontSplitsEnable = (m_numberConcurrentGroup == 1) ? false : true; |
| curbe.SuperHME = m_16xMeSupported; |
| curbe.UltraHME = m_32xMeSupported; |
| curbe.PerBFrameQPOffset = 0; |
| |
| switch (m_hevcSeqParams->TargetUsage) |
| { |
| case 1: |
| curbe.Degree45 = 0; |
| curbe.Break12Dependency = 0; |
| break; |
| case 4: |
| default: |
| curbe.Degree45 = 1; |
| curbe.Break12Dependency = 1; |
| break; |
| } |
| |
| curbe.LongTermReferenceFlags_L0 = 0; |
| for (uint32_t i = 0; i < curbe.NumRefIdxL0; i++) |
| { |
| curbe.LongTermReferenceFlags_L0 |= (m_hevcSliceParams->RefPicList[0][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i; |
| } |
| curbe.LongTermReferenceFlags_L1 = 0; |
| for (uint32_t i = 0; i < curbe.NumRefIdxL1; i++) |
| { |
| curbe.LongTermReferenceFlags_L1 |= (m_hevcSliceParams->RefPicList[1][i].PicFlags & PICTURE_LONG_TERM_REFERENCE) << i; |
| } |
| |
| curbe.Stepping = 0; |
| curbe.Cu64SkipCheckOnly = 0; |
| curbe.Cu642Nx2NCheckOnly = 0; |
| curbe.EnableCu64AmpCheck = 1; |
| curbe.IntraSpeedMode = 0; // 35 mode |
| curbe.DisableIntraNxN = 0; |
| |
| if (m_hwInterface->GetPlatform().usRevId == 0) |
| { |
| curbe.Stepping = 1; |
| curbe.TUDepthControl = 1; |
| curbe.MaxTransformDepthInter = 1; |
| curbe.MaxTransformDepthIntra = 0; |
| //buf->curbe.EnableCu64Check = 1; |
| curbe.Cu64SkipCheckOnly = 0; |
| curbe.Cu642Nx2NCheckOnly = 1; |
| curbe.EnableCu64AmpCheck = 0; |
| curbe.IntraSpeedMode = 0; // 35 mode |
| curbe.DisableIntraNxN = 1; |
| curbe.MaxNumMergeCand = 1; |
| } |
| |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| auto buf = (PMBENC_COMBINED_BUFFER1)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer1[curIdx].sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(buf); |
| |
| if (curbe.Degree45) |
| { |
| MOS_ZeroMemory(&buf->concurrent, sizeof(buf->concurrent)); |
| } |
| buf->Curbe = curbe; |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer1[curIdx].sResource); |
| |
| // clean-up the thread dependency buffer in the second combined buffer |
| if (m_numberEncKernelSubThread > 1) |
| { |
| MOS_LOCK_PARAMS lockFlags; |
| |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| auto data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer2[curIdx].sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(&data[m_threadTaskBufferOffset], m_threadTaskBufferSize); |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_encBCombinedBuffer2[curIdx].sResource); |
| } |
| |
| if (m_initEncConstTable) |
| { |
| // Initialize the Enc Constant Table surface |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| |
| auto data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_encConstantTableForB.sResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| if (m_isMaxLcu64) |
| { |
| MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize, (const void *)m_encLcu64ConstantDataLut, sizeof(m_encLcu64ConstantDataLut)); |
| } |
| else |
| { |
| MOS_SecureMemcpy(data, m_encConstantTableForB.dwSize, (const void *)m_encLcu32ConstantDataLut, sizeof(m_encLcu32ConstantDataLut)); |
| } |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_encConstantTableForB.sResource); |
| m_initEncConstTable = false; |
| } |
| |
| // binding table index |
| MBENC_COMBINED_BTI params; |
| if (m_isMaxLcu64) |
| { |
| for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++) |
| { |
| params.BTI_LCU64.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1; |
| params.BTI_LCU64.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2; |
| params.BTI_LCU64.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0; |
| params.BTI_LCU64.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y; |
| params.BTI_LCU64.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX; |
| params.BTI_LCU64.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD; |
| params.BTI_LCU64.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ; |
| params.BTI_LCU64.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD; |
| params.BTI_LCU64.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD; |
| params.BTI_LCU64.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA; |
| params.BTI_LCU64.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT; |
| params.BTI_LCU64.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA; |
| params.BTI_LCU64.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA; |
| params.BTI_LCU64.VME2XInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR; |
| } |
| params.BTI_LCU64.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE; |
| params.BTI_LCU64.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1; |
| params.BTI_LCU64.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2; |
| params.BTI_LCU64.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3; |
| params.BTI_LCU64.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE; |
| params.BTI_LCU64.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA; |
| } |
| else |
| { |
| for (uint32_t i = 0; i < MAX_MULTI_FRAME_NUMBER; i++) |
| { |
| params.BTI_LCU32.Combined1DSurIndexMF1[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1; |
| params.BTI_LCU32.Combined1DSurIndexMF2[i] = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2; |
| params.BTI_LCU32.VMEInterPredictionSurfIndexMF[i] = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0; |
| params.BTI_LCU32.SrcSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y; |
| params.BTI_LCU32.SrcReconSurfIndexMF[i] = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX; |
| params.BTI_LCU32.CURecordSurfIndexMF[i] = MBENC_B_FRAME_ENC_CU_RECORD; |
| params.BTI_LCU32.PAKObjectSurfIndexMF[i] = MBENC_B_FRAME_PAK_OBJ; |
| params.BTI_LCU32.CUPacketSurfIndexMF[i] = MBENC_B_FRAME_PAK_CU_RECORD; |
| params.BTI_LCU32.SWScoreBoardSurfIndexMF[i] = MBENC_B_FRAME_SW_SCOREBOARD; |
| params.BTI_LCU32.QPCU16SurfIndexMF[i] = MBENC_B_FRAME_CU_QP_DATA; |
| params.BTI_LCU32.LCULevelDataSurfIndexMF[i] = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT; |
| params.BTI_LCU32.TemporalMVSurfIndexMF[i] = MBENC_B_FRAME_COLOCATED_CU_MV_DATA; |
| params.BTI_LCU32.HmeDataSurfIndexMF[i] = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA; |
| } |
| params.BTI_LCU32.DebugSurfIndexMF[0] = MBENC_B_FRAME_DEBUG_SURFACE; |
| params.BTI_LCU32.DebugSurfIndexMF[1] = MBENC_B_FRAME_DEBUG_SURFACE1; |
| params.BTI_LCU32.DebugSurfIndexMF[2] = MBENC_B_FRAME_DEBUG_SURFACE2; |
| params.BTI_LCU32.DebugSurfIndexMF[3] = MBENC_B_FRAME_DEBUG_SURFACE3; |
| params.BTI_LCU32.HEVCCnstLutSurfIndex = MBENC_B_FRAME_ENC_CONST_TABLE; |
| params.BTI_LCU32.LoadBalenceSurfIndex = MBENC_B_FRAME_CONCURRENT_TG_DATA; |
| } |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates); |
| PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData( |
| ¶ms, |
| kernelState->dwCurbeOffset, |
| sizeof(params))); |
| |
| return eStatus; |
| } |
| |
| // ------------------------------------------------------------------------------ |
| //| Purpose: Setup curbe for HEVC BrcInitReset Kernel |
| //| Return: N/A |
| //------------------------------------------------------------------------------ |
| MOS_STATUS CodechalEncHevcStateG12::SetCurbeBrcInitReset( |
| CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates); |
| |
| if (brcKrnIdx != CODECHAL_HEVC_BRC_INIT && brcKrnIdx != CODECHAL_HEVC_BRC_RESET) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| // Initialize the CURBE data |
| BRC_INITRESET_CURBE curbe = m_brcInitResetCurbeInit; |
| |
| uint32_t profileLevelMaxFrame = GetProfileLevelMaxFrameSize(); |
| |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR || |
| m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR || |
| m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR) |
| { |
| if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Initial VBV Buffer Fullness is zero\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| if (m_hevcSeqParams->VBVBufferSizeInBit == 0) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("VBV buffer size in bits is zero\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| } |
| |
| curbe.DW0_ProfileLevelMaxFrame = profileLevelMaxFrame; |
| curbe.DW1_InitBufFull = m_hevcSeqParams->InitVBVBufferFullnessInBit; |
| curbe.DW2_BufSize = m_hevcSeqParams->VBVBufferSizeInBit; |
| curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; //DDI in Kbits |
| curbe.DW4_MaximumBitRate = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS; |
| curbe.DW5_MinimumBitRate = 0; |
| curbe.DW6_FrameRateM = m_hevcSeqParams->FrameRate.Numerator; |
| curbe.DW7_FrameRateD = m_hevcSeqParams->FrameRate.Denominator; |
| curbe.DW8_BRCFlag = BRCINIT_IGNORE_PICTURE_HEADER_SIZE; // always ignore the picture header size set in BRC Update curbe; |
| |
| if (m_hevcPicParams->NumROI) |
| { |
| curbe.DW8_BRCFlag |= BRCINIT_DISABLE_MBBRC; // BRC ROI need disable MBBRC logic in LcuBrc Kernel |
| } |
| else |
| { |
| curbe.DW8_BRCFlag |= (m_lcuBrcEnabled) ? 0 : BRCINIT_DISABLE_MBBRC; |
| } |
| |
| curbe.DW8_BRCFlag |= (m_brcEnabled && m_numPipe > 1) ? BRCINIT_USEHUCBRC : 0; |
| // For non-ICQ, ACQP Buffer always set to 1 |
| curbe.DW25_ACQPBuffer = 1; |
| curbe.DW25_SlidingWindowSize = m_slidingWindowSize; |
| |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR) |
| { |
| curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate; |
| curbe.DW8_BRCFlag |= BRCINIT_ISCBR; |
| } |
| else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR) |
| { |
| if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate) |
| { |
| curbe.DW4_MaximumBitRate = 2 * curbe.DW3_TargetBitRate; |
| } |
| curbe.DW8_BRCFlag |= BRCINIT_ISVBR; |
| } |
| else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR) |
| { |
| curbe.DW8_BRCFlag |= BRCINIT_ISAVBR; |
| // For AVBR, max bitrate = target bitrate, |
| curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; //DDI in Kbits |
| curbe.DW4_MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; |
| } |
| else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ) |
| { |
| curbe.DW8_BRCFlag |= BRCINIT_ISICQ; |
| curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor; |
| } |
| else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM) |
| { |
| curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate; |
| curbe.DW8_BRCFlag |= BRCINIT_ISVCM; |
| } |
| else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP) |
| { |
| curbe.DW8_BRCFlag = BRCINIT_ISCQP; |
| } |
| else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_QVBR) |
| { |
| if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate) |
| { |
| curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate; // Use max bit rate for HRD compliance |
| } |
| curbe.DW8_BRCFlag = curbe.DW8_BRCFlag | BRCINIT_ISQVBR | BRCINIT_ISVBR; // We need to make sure that VBR is used for QP determination. |
| // use ICQQualityFactor to determine the larger Qp for each MB |
| curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor; |
| } |
| curbe.DW9_FrameWidth = m_oriFrameWidth; |
| curbe.DW10_FrameHeight = m_oriFrameHeight; |
| curbe.DW10_AVBRAccuracy = m_usAvbrAccuracy; |
| curbe.DW11_AVBRConvergence = m_usAvbrConvergence; |
| curbe.DW12_NumberSlice = m_numSlices; |
| |
| /********************************************************************** |
| In case of non-HB/BPyramid Structure |
| BRC_Param_A = GopP |
| BRC_Param_B = GopB |
| In case of HB/BPyramid GOP Structure |
| BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are |
| BRC Parameters set as follows as per CModel equation |
| ***********************************************************************/ |
| // BPyramid GOP |
| if (m_HierchGopBRCEnabled) |
| { |
| curbe.DW8_BRCGopP = ((m_hevcSeqParams->GopPicSize + m_hevcSeqParams->GopRefDist - 1) / m_hevcSeqParams->GopRefDist); |
| curbe.DW9_BRCGopB = curbe.DW8_BRCGopP; |
| curbe.DW13_BRCGopB1 = curbe.DW8_BRCGopP * 2; |
| curbe.DW14_BRCGopB2 = ((m_hevcSeqParams->GopPicSize) - (curbe.DW8_BRCGopP) - (curbe.DW13_BRCGopB1) - (curbe.DW9_BRCGopB)); |
| // B1 Level GOP |
| if (m_hevcSeqParams->GopRefDist <= 4 || curbe.DW14_BRCGopB2 == 0) |
| { |
| curbe.DW14_MaxBRCLevel = 3; |
| } |
| // B2 Level GOP |
| else |
| { |
| curbe.DW14_MaxBRCLevel = 4; |
| } |
| } |
| // For Regular GOP - No BPyramid |
| else |
| { |
| curbe.DW14_MaxBRCLevel = 1; |
| curbe.DW8_BRCGopP = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0; |
| curbe.DW9_BRCGopB = m_hevcSeqParams->GopPicSize - 1 - curbe.DW8_BRCGopP; |
| } |
| |
| // Set dynamic thresholds |
| double inputBitsPerFrame = (double)((double)curbe.DW4_MaximumBitRate * (double)curbe.DW7_FrameRateD); |
| inputBitsPerFrame = (double)(inputBitsPerFrame / curbe.DW6_FrameRateM); |
| |
| if (curbe.DW2_BufSize < (uint32_t)inputBitsPerFrame * 4) |
| { |
| curbe.DW2_BufSize = (uint32_t)inputBitsPerFrame * 4; |
| } |
| |
| if (curbe.DW1_InitBufFull == 0) |
| { |
| curbe.DW1_InitBufFull = 7 * curbe.DW2_BufSize / 8; |
| } |
| if (curbe.DW1_InitBufFull < (uint32_t)(inputBitsPerFrame * 2)) |
| { |
| curbe.DW1_InitBufFull = (uint32_t)(inputBitsPerFrame * 2); |
| } |
| if (curbe.DW1_InitBufFull > curbe.DW2_BufSize) |
| { |
| curbe.DW1_InitBufFull = curbe.DW2_BufSize; |
| } |
| |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR) |
| { |
| // For AVBR, Buffer size = 2*Bitrate, InitVBV = 0.75 * BufferSize |
| curbe.DW2_BufSize = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS; |
| curbe.DW1_InitBufFull = (uint32_t)(0.75 * curbe.DW2_BufSize); |
| } |
| |
| if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) |
| { |
| curbe.DW15_LongTermInterval = 0; // no LTR for low delay brc |
| } |
| else |
| { |
| curbe.DW15_LongTermInterval = (m_enableBrcLTR && m_ltrInterval) ? m_ltrInterval : m_enableBrcLTR ? HEVC_BRC_LONG_TERM_REFRENCE_FLAG : 0; |
| } |
| |
| double bpsRatio = ((double)inputBitsPerFrame / ((double)(curbe.DW2_BufSize) / 30)); |
| bpsRatio = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio; |
| |
| curbe.DW19_DeviationThreshold0_PBframe = (uint32_t)(-50 * pow(0.90, bpsRatio)); |
| curbe.DW19_DeviationThreshold1_PBframe = (uint32_t)(-50 * pow(0.66, bpsRatio)); |
| curbe.DW19_DeviationThreshold2_PBframe = (uint32_t)(-50 * pow(0.46, bpsRatio)); |
| curbe.DW19_DeviationThreshold3_PBframe = (uint32_t)(-50 * pow(0.3, bpsRatio)); |
| |
| curbe.DW20_DeviationThreshold4_PBframe = (uint32_t)(50 * pow(0.3, bpsRatio)); |
| curbe.DW20_DeviationThreshold5_PBframe = (uint32_t)(50 * pow(0.46, bpsRatio)); |
| curbe.DW20_DeviationThreshold6_PBframe = (uint32_t)(50 * pow(0.7, bpsRatio)); |
| curbe.DW20_DeviationThreshold7_PBframe = (uint32_t)(50 * pow(0.9, bpsRatio)); |
| |
| curbe.DW21_DeviationThreshold0_VBRcontrol = (uint32_t)(-50 * pow(0.9, bpsRatio)); |
| curbe.DW21_DeviationThreshold1_VBRcontrol = (uint32_t)(-50 * pow(0.7, bpsRatio)); |
| curbe.DW21_DeviationThreshold2_VBRcontrol = (uint32_t)(-50 * pow(0.5, bpsRatio)); |
| curbe.DW21_DeviationThreshold3_VBRcontrol = (uint32_t)(-50 * pow(0.3, bpsRatio)); |
| |
| curbe.DW22_DeviationThreshold4_VBRcontrol = (uint32_t)(100 * pow(0.4, bpsRatio)); |
| curbe.DW22_DeviationThreshold5_VBRcontrol = (uint32_t)(100 * pow(0.5, bpsRatio)); |
| curbe.DW22_DeviationThreshold6_VBRcontrol = (uint32_t)(100 * pow(0.75, bpsRatio)); |
| curbe.DW22_DeviationThreshold7_VBRcontrol = (uint32_t)(100 * pow(0.9, bpsRatio)); |
| |
| curbe.DW23_DeviationThreshold0_Iframe = (uint32_t)(-50 * pow(0.8, bpsRatio)); |
| curbe.DW23_DeviationThreshold1_Iframe = (uint32_t)(-50 * pow(0.6, bpsRatio)); |
| curbe.DW23_DeviationThreshold2_Iframe = (uint32_t)(-50 * pow(0.34, bpsRatio)); |
| curbe.DW23_DeviationThreshold3_Iframe = (uint32_t)(-50 * pow(0.2, bpsRatio)); |
| |
| curbe.DW24_DeviationThreshold4_Iframe = (uint32_t)(50 * pow(0.2, bpsRatio)); |
| curbe.DW24_DeviationThreshold5_Iframe = (uint32_t)(50 * pow(0.4, bpsRatio)); |
| curbe.DW24_DeviationThreshold6_Iframe = (uint32_t)(50 * pow(0.66, bpsRatio)); |
| curbe.DW24_DeviationThreshold7_Iframe = (uint32_t)(50 * pow(0.9, bpsRatio)); |
| |
| if (m_hevcSeqParams->HierarchicalFlag && !m_hevcSeqParams->LowDelayMode && |
| (m_hevcSeqParams->GopRefDist == 4 || m_hevcSeqParams->GopRefDist == 8)) |
| { |
| curbe.DW26_RandomAccess = true; |
| } |
| else |
| { |
| curbe.DW26_RandomAccess = false; |
| } |
| |
| if (m_brcInit) |
| { |
| m_dBrcInitCurrentTargetBufFullInBits = curbe.DW1_InitBufFull; |
| } |
| |
| m_brcInitResetBufSizeInBits = curbe.DW2_BufSize; |
| m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame; |
| |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData( |
| &curbe, |
| kernelState->dwCurbeOffset, |
| sizeof(curbe))); |
| |
| return eStatus; |
| } |
| |
| // ------------------------------------------------------------------------------ |
| //| Purpose: Setup curbe for HEVC BrcUpdate Kernel |
| //| Return: N/A |
| //------------------------------------------------------------------------------ |
| MOS_STATUS CodechalEncHevcStateG12::SetCurbeBrcUpdate( |
| CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (brcKrnIdx != CODECHAL_HEVC_BRC_FRAME_UPDATE && brcKrnIdx != CODECHAL_HEVC_BRC_LCU_UPDATE) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not frame update or LCU update\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates); |
| |
| // Initialize the CURBE data |
| BRCUPDATE_CURBE curbe = m_brcUpdateCurbeInit; |
| |
| curbe.DW5_TargetSize_Flag = 0; |
| |
| if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits) |
| { |
| m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits; |
| curbe.DW5_TargetSize_Flag = 1; |
| } |
| |
| if (m_numSkipFrames) |
| { |
| // pass num/size of skipped frames to update BRC |
| curbe.DW6_NumSkippedFrames = m_numSkipFrames; |
| curbe.DW15_SizeOfSkippedFrames = m_sizeSkipFrames; |
| |
| // account for skipped frame in calculating CurrentTargetBufFullInBits |
| m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames; |
| } |
| |
| curbe.DW0_TargetSize = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits); |
| curbe.DW1_FrameNumber = m_storeData - 1; // Check if we can remove this (set to 0) |
| |
| // BRC PAK statistic buffer from last frame, the encoded size includes header already. |
| // in BRC Initreset kernel, curbe DW8_BRCFlag will always ignore picture header size, so no need to set picture header size here. |
| curbe.DW2_PictureHeaderSize = 0; |
| curbe.DW5_CurrFrameBrcLevel = m_currFrameBrcLevel; |
| curbe.DW5_MaxNumPAKs = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(); |
| |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP) |
| { |
| curbe.DW6_CqpValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta; |
| } |
| if (m_hevcPicParams->NumROI) |
| { |
| curbe.DW6_ROIEnable = m_brcEnabled ? false : true; |
| curbe.DW6_BRCROIEnable = m_brcEnabled ? true : false; |
| curbe.DW6_RoiRatio = CalculateROIRatio(); |
| } |
| curbe.DW6_SlidingWindowEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW); |
| |
| //for low delay brc |
| curbe.DW6_LowDelayEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW); |
| curbe.DW16_UserMaxFrameSize = GetProfileLevelMaxFrameSize(); |
| curbe.DW14_ParallelMode = m_hevcSeqParams->ParallelBRC; |
| |
| if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR) |
| { |
| curbe.DW3_StartGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150); |
| curbe.DW3_StartGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150); |
| curbe.DW4_StartGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150); |
| curbe.DW4_StartGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150); |
| |
| curbe.DW11_gRateRatioThreshold0 = |
| (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40))); |
| curbe.DW11_gRateRatioThreshold1 = |
| (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75))); |
| curbe.DW12_gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97))); |
| curbe.DW12_gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100))); |
| curbe.DW12_gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100))); |
| curbe.DW12_gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100))); |
| } |
| |
| if (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_EXTREMELY_LOW) |
| { |
| curbe.DW17_LongTerm_Current = 0; // no LTR for low delay brc |
| } |
| else |
| { |
| m_isFrameLTR = (CodecHal_PictureIsLongTermRef(m_currReconstructedPic)); |
| curbe.DW17_LongTerm_Current = (m_enableBrcLTR && m_isFrameLTR) ? 1 : 0; |
| } |
| |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData( |
| &curbe, |
| kernelState->dwCurbeOffset, |
| sizeof(curbe))); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SendMbEncSurfacesIKernel( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| uint32_t startBTI = 0, mbenc_I_KRNIDX = MBENC_LCU32_KRNIDX; |
| CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams; |
| PMOS_SURFACE inputSurface = m_rawSurfaceToEnc; |
| PMHW_KERNEL_STATE kernelState = &m_mbEncKernelStates[mbenc_I_KRNIDX]; |
| PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_mbEncKernelBindingTable[mbenc_I_KRNIDX]; |
| |
| // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map |
| startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource, |
| m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource, |
| CodechalDbgAttr::attrOutput, |
| "Hevc_CombinedBuffer1", |
| m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_I_MBENC));); |
| |
| // VME surfaces |
| startBTI = 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| inputSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Programming dummy surfaces even if not used (VME requirement), currently setting to input surface |
| for (int32_t surface_idx = 0; surface_idx < 8; surface_idx++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| inputSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| //Source Y and UV |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| inputSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| false)); |
| |
| surfaceCodecParams.bUseUVPlane = true; |
| |
| surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI++]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| inputSurface, |
| CodechalDbgAttr::attrEncodeRawInputSurface, |
| "MbEnc_Input_SrcSurf"))); |
| // Current Y with reconstructed boundary pixels |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_currPicWithReconBoundaryPix, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Enc CU Record |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_intermediateCuRecordSurfaceLcu32, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // PAK object command surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_resMbCodeSurface, |
| m_mvOffset, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // CU packet for PAK surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_resMbCodeSurface, |
| m_mbCodeSize - m_mvOffset, |
| m_mvOffset, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| //Software scoreboard surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| m_swScoreboardState->GetCurSwScoreboardSurface(), |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| true)); |
| |
| surfaceCodecParams.bUse32UINTSurfaceFormat = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Scratch surface for Internal Use Only |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_scratchSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // CU 16x16 QP data input surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_brcBuffers.sBrcMbQpBuffer, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Lcu level data input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_lcuLevelInputDataSurface[m_currRecycledBufIdx], |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Enc I Constant Table surface // CostLUT Buf |
| startBTI = MBENC_I_FRAME_ENC_CONST_TABLE; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_encConstantTableForB.sResource, |
| m_encConstantTableForB.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| #if 0 |
| // Concurrent Thread Group Data surface |
| startBTI = MBENC_I_FRAME_CONCURRENT_TG_DATA; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &resConcurrentThreadGroupData.sResource, |
| resConcurrentThreadGroupData.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| #endif |
| |
| // Brc Combined Enc parameter surface |
| startBTI = MBENC_I_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_brcInputForEncKernelBuffer->sResource, |
| HEVC_FRAMEBRC_BUF_CONST_SIZE, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Kernel debug surface |
| startBTI = MBENC_I_FRAME_DEBUG_DUMP; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_debugSurface[0].sResource, |
| m_debugSurface[0].dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SendMbEncSurfacesBKernel( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates); |
| PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_LCU32_KRNIDX]; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable); |
| PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = m_isMaxLcu64 ? &m_mbEncKernelBindingTable[MBENC_LCU64_KRNIDX] : &m_mbEncKernelBindingTable[MBENC_LCU32_KRNIDX]; |
| |
| PMOS_SURFACE inputSurface = m_rawSurfaceToEnc; |
| uint32_t startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0; |
| CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams; |
| |
| // Combined 1D buffer 1, which contains regular kernel curbe and concurrent map |
| startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource, |
| m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource, |
| CodechalDbgAttr::attrOutput, |
| "Hevc_CombinedBuffer1", |
| m_encBCombinedBuffer1[m_currRecycledBufIdx].dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC));); |
| // Combined 1D buffer 2, which contains non fixed sizes of buffers |
| startBTI = MBENC_B_FRAME_ENCODER_COMBINED_BUFFER2; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource, |
| m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| surfaceCodecParams.bRawSurface = true; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource, |
| CodechalDbgAttr::attrOutput, |
| "Hevc_CombinedBuffer2", |
| m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC));); |
| // VME surfaces |
| startBTI = MBENC_B_FRAME_VME_PRED_CURR_PIC_IDX0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| inputSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++) |
| { |
| int32_t ll = 0; |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx]; |
| if (!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| PMOS_SURFACE refSurfacePtr; |
| if (surface_idx == 0 && m_useWeightedSurfaceForL0) |
| { |
| refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + surface_idx); |
| } |
| else |
| { |
| refSurfacePtr = &m_refList[idx]->sRefBuffer; |
| } |
| |
| // Picture Y VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| refSurfacePtr, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx; |
| std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_refList[idx]->sRefBuffer, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| refSurfName.data()))); |
| } |
| else |
| { |
| // Providing Dummy surface as per VME requirement. |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| inputSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| ll = 1; |
| refPic = m_hevcSliceParams->RefPicList[ll][surface_idx]; |
| if (!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| PMOS_SURFACE refSurfacePtr; |
| if (surface_idx == 0 && m_useWeightedSurfaceForL1) |
| { |
| refSurfacePtr = m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + surface_idx); |
| } |
| else |
| { |
| refSurfacePtr = &m_refList[idx]->sRefBuffer; |
| } |
| |
| // Picture Y VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| refSurfacePtr, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx; |
| std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_refList[idx]->sRefBuffer, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| refSurfName.data()))); |
| } |
| else |
| { |
| // Providing Dummy surface as per VME requirement. |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| inputSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| } |
| |
| //Source Y and UV |
| startBTI = MBENC_B_FRAME_CURR_Y; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| inputSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| false)); |
| |
| surfaceCodecParams.bUseUVPlane = true; |
| |
| surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBTI]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| inputSurface, |
| CodechalDbgAttr::attrEncodeRawInputSurface, |
| "MbEnc_Input_SrcSurf"))); |
| |
| // Current Y with reconstructed boundary pixels |
| startBTI = MBENC_B_FRAME_CURR_Y_WITH_RECON_BOUNDARY_PIX; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_currPicWithReconBoundaryPix, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| m_verticalLineStride, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Enc CU Record |
| startBTI = MBENC_B_FRAME_ENC_CU_RECORD; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_intermediateCuRecordSurfaceLcu32, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| 0, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // PAK object command surface |
| startBTI = MBENC_B_FRAME_PAK_OBJ; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_resMbCodeSurface, |
| m_mvOffset, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // CU packet for PAK surface |
| startBTI = MBENC_B_FRAME_PAK_CU_RECORD; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_resMbCodeSurface, |
| m_mbCodeSize - m_mvOffset, |
| m_mvOffset, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| //Software scoreboard surface |
| startBTI = MBENC_B_FRAME_SW_SCOREBOARD; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| m_swScoreboardState->GetCurSwScoreboardSurface(), |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| m_verticalLineStride, |
| true)); |
| |
| surfaceCodecParams.bUse32UINTSurfaceFormat = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Scratch surface for Internal Use Only |
| startBTI = MBENC_B_FRAME_SCRATCH_SURFACE; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_scratchSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| m_verticalLineStride, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // CU 16x16 QP data input surface |
| startBTI = MBENC_B_FRAME_CU_QP_DATA; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_brcBuffers.sBrcMbQpBuffer, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| m_verticalLineStride, |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Lcu level data input |
| startBTI = MBENC_B_FRAME_LCU_LEVEL_DATA_INPUT; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_lcuLevelInputDataSurface[m_currRecycledBufIdx], |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| m_verticalLineStride, |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Enc B 32x32 Constant Table surface |
| startBTI = MBENC_B_FRAME_ENC_CONST_TABLE; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_encConstantTableForB.sResource, |
| m_encConstantTableForB.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Colocated CU Motion Vector Data surface |
| startBTI = MBENC_B_FRAME_COLOCATED_CU_MV_DATA; |
| uint8_t mbCodeIdxForTempMVP = 0xFF; |
| if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC) |
| { |
| uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx; |
| |
| mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx; |
| } |
| |
| if (m_pictureCodingType == I_TYPE) |
| { |
| // No temoporal MVP in the I frame |
| m_hevcSliceParams->slice_temporal_mvp_enable_flag = false; |
| } |
| else |
| { |
| if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag) |
| { |
| // Temporal reference MV index is invalid and so disable the temporal MVP |
| CODECHAL_ENCODE_ASSERT(false); |
| m_hevcSliceParams->slice_temporal_mvp_enable_flag = false; |
| } |
| } |
| |
| if (mbCodeIdxForTempMVP == 0xFF) |
| { |
| startBTI++; |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP), |
| m_sizeOfMvTemporalBuffer, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| startBTI = MBENC_B_FRAME_HME_MOTION_PREDICTOR_DATA; |
| |
| // HME motion predictor data |
| if (m_hmeEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| m_hmeKernel->GetSurface(CodechalKernelHme::SurfaceId::me4xMvDataBuffer), |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| else |
| { |
| startBTI++; |
| } |
| |
| // Brc Combined Enc parameter surface |
| startBTI = MBENC_B_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_brcInputForEncKernelBuffer->sResource, |
| HEVC_FRAMEBRC_BUF_CONST_SIZE, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| startBTI = MBENC_B_FRAME_VME_PRED_FOR_2X_DS_CURR; |
| if (m_isMaxLcu64) |
| { |
| PMOS_SURFACE currScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER); |
| |
| //VME 2X Inter prediction surface for current frame |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| currScaledSurface2x, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| currScaledSurface2x, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| "2xScaledSurf"))); |
| |
| // RefFrame's 2x DS surface |
| for (int32_t surface_idx = 0; surface_idx < 4; surface_idx++) |
| { |
| int32_t ll = 0; |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[ll][surface_idx]; |
| if (!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| |
| // Picture Y VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx), |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx; |
| std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx), |
| CodechalDbgAttr::attrReferenceSurfaces, |
| refSurfName.data()))); |
| } |
| else |
| { |
| // Providing Dummy surface as per VME requirement. |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| currScaledSurface2x, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| ll = 1; |
| refPic = m_hevcSliceParams->RefPicList[ll][surface_idx]; |
| if (!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| int32_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| |
| // Picture Y VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx), |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx; |
| std::string refSurfName = "Ref2xScaledSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| m_trackedBuf->Get2xDsSurface(m_refList[idx]->ucScalingIdx), |
| CodechalDbgAttr::attrReferenceSurfaces, |
| refSurfName.data()))); |
| } |
| else |
| { |
| // Providing Dummy surface as per VME requirement. |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| &surfaceCodecParams, |
| currScaledSurface2x, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| } |
| } |
| |
| // Encoder History Input Buffer |
| startBTI = MBENC_B_FRAME_ENCODER_HISTORY_INPUT_BUFFER; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_encoderHistoryInputBuffer, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Encoder History Output Buffer |
| startBTI = MBENC_B_FRAME_ENCODER_HISTORY_OUTPUT_BUFFER; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_encoderHistoryOutputBuffer, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| m_verticalLineStride, |
| true)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Kernel debug surface |
| startBTI = MBENC_B_FRAME_DEBUG_SURFACE; |
| for (uint32_t i = 0; i < CODECHAL_GET_ARRAY_LENGTH(m_debugSurface); i++, startBTI++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_debugSurface[i].sResource, |
| m_debugSurface[i].dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI], |
| false)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SendBrcInitResetSurfaces( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| CODECHAL_HEVC_BRC_KRNIDX krnIdx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (krnIdx != CODECHAL_HEVC_BRC_INIT && krnIdx != CODECHAL_HEVC_BRC_RESET) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Brc kernel requested is not init or reset\n"); |
| return MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[krnIdx]; |
| uint32_t startBti = 0; |
| CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams; |
| // BRC History Buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_brcBuffers.resBrcHistoryBuffer, |
| m_brcHistoryBufferSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| true)); |
| |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[krnIdx]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // BRC Distortion Surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| m_brcDistortion, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| 0, |
| true)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetupBrcConstantTable( |
| PMOS_SURFACE brcConstantData) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *outputData = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(outputData); |
| uint8_t *inputData = (uint8_t *)g_cInit_HEVC_BRC_QP_ADJUST; |
| uint32_t inputSize = sizeof(g_cInit_HEVC_BRC_QP_ADJUST); |
| uint32_t outputSize = brcConstantData->dwHeight * brcConstantData->dwPitch; |
| |
| // 576-byte of Qp adjust table |
| while ((inputSize >= brcConstantData->dwWidth) && (outputSize >= brcConstantData->dwWidth)) |
| { |
| MOS_SecureMemcpy(outputData, outputSize, inputData, brcConstantData->dwWidth); |
| outputData += brcConstantData->dwPitch; |
| outputSize -= brcConstantData->dwPitch; |
| inputData += brcConstantData->dwWidth; |
| inputSize -= brcConstantData->dwWidth; |
| } |
| //lambda and mode cost |
| if (m_isMaxLcu64) |
| { |
| inputData = (uint8_t *)m_brcLcu64x64LambdaModeCostInit; |
| inputSize = sizeof(m_brcLcu64x64LambdaModeCostInit); |
| } |
| else |
| { |
| inputData = (uint8_t *)m_brcLcu32x32LambdaModeCostInit; |
| inputSize = sizeof(m_brcLcu32x32LambdaModeCostInit); |
| } |
| |
| while ((inputSize >= brcConstantData->dwWidth) && (outputSize >= brcConstantData->dwWidth)) |
| { |
| MOS_SecureMemcpy(outputData, outputSize, inputData, brcConstantData->dwWidth); |
| outputData += brcConstantData->dwPitch; |
| outputSize -= brcConstantData->dwPitch; |
| inputData += brcConstantData->dwWidth; |
| inputSize -= brcConstantData->dwWidth; |
| } |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SendBrcFrameUpdateSurfaces( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK |
| PMOS_RESOURCE brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx]; |
| MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState; |
| mhwHevcPicState.pHevcEncSeqParams = m_hevcSeqParams; |
| mhwHevcPicState.pHevcEncPicParams = m_hevcPicParams; |
| mhwHevcPicState.bUseVDEnc = m_vdencEnabled ? 1 : 0; |
| mhwHevcPicState.brcNumPakPasses = m_mfxInterface->GetBrcNumPakPasses(); |
| mhwHevcPicState.sseEnabledInVmeEncode = m_sseEnabled; |
| mhwHevcPicState.rhodomainRCEnable = m_brcEnabled && (m_numPipe > 1); |
| mhwHevcPicState.bSAOEnable = m_hevcSeqParams->SAO_enabled_flag ? (m_hevcSliceParams->slice_sao_luma_flag || m_hevcSliceParams->slice_sao_chroma_flag) : 0; |
| mhwHevcPicState.bTransformSkipEnable = m_hevcPicParams->transform_skip_enabled_flag; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState)); |
| |
| PMOS_SURFACE brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData)); |
| |
| uint32_t startBti = 0; |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE]; |
| PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_FRAME_UPDATE]; |
| CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams; |
| |
| // BRC History Buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_brcBuffers.resBrcHistoryBuffer, |
| m_brcHistoryBufferSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| true)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // BRC Prev PAK statistics output buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead], |
| m_hevcBrcPakStatisticsSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // BRC HCP_PIC_STATE read |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| brcHcpStateReadBuffer, |
| m_brcBuffers.dwBrcHcpPicStateSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // BRC HCP_PIC_STATE write |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx], |
| m_brcBuffers.dwBrcHcpPicStateSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| true)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Combined ENC-parameter buffer |
| startBti++; |
| |
| // BRC Distortion Surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| m_brcDistortion, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| 0, |
| true)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // BRC Data Surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| brcConstantData, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| 0, |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Pixel MB Statistics surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_resMbStatsBuffer, |
| m_hwInterface->m_avcMbStatBufferSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Mv and Distortion summation surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_mvAndDistortionSumSurface.sResource, |
| m_mvAndDistortionSumSurface.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBti++], |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_mvAndDistortionSumSurface.sResource, |
| CodechalDbgAttr::attrInput, |
| "MvDistSum", |
| m_mvAndDistortionSumSurface.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrInput, |
| "ImgStateRead", |
| BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(), |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrInput, |
| "ConstData", |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| |
| // PAK statistics buffer is only dumped for BrcUpdate kernel input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead], |
| CodechalDbgAttr::attrInput, |
| "PakStats", |
| HEVC_BRC_PAK_STATISTCS_SIZE, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| |
| // HEVC maintains a ptr to its own distortion surface, as it may be a couple different surfaces |
| if (m_brcDistortion) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_debugInterface->DumpBuffer( |
| &m_brcDistortion->OsResource, |
| CodechalDbgAttr::attrInput, |
| "BrcDist_BeforeFrameBRC", |
| m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight, |
| m_brcBuffers.dwMeBrcDistortionBottomFieldOffset, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcBuffers.resBrcHistoryBuffer, |
| CodechalDbgAttr::attrInput, |
| "HistoryRead_beforeFramBRC", |
| m_brcHistoryBufferSize, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| |
| if (m_brcBuffers.pMbEncKernelStateInUse) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( |
| CODECHAL_MEDIA_STATE_BRC_UPDATE, |
| m_brcBuffers.pMbEncKernelStateInUse)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_resMbStatsBuffer, |
| CodechalDbgAttr::attrInput, |
| "MBStatsSurf", |
| m_hwInterface->m_avcMbStatBufferSize, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE));) |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SendBrcLcuUpdateSurfaces( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE]; |
| PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_LCU_UPDATE]; |
| uint32_t startBTI = 0; |
| CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams; |
| |
| if (m_brcEnabled) |
| { |
| // BRC History Buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_brcBuffers.resBrcHistoryBuffer, |
| m_brcHistoryBufferSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| true)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // BRC Distortion Surface - Intra or Inter |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| m_brcDistortion, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| 0, |
| true)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // Pixel MB Statistics surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &surfaceCodecParams, |
| &m_resMbStatsBuffer, |
| m_hwInterface->m_avcMbStatBufferSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| } |
| else |
| { |
| // CQP ROI |
| startBTI += 3; |
| } |
| // MB QP surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_brcBuffers.sBrcMbQpBuffer, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| 0, |
| true)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| // ROI surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| &surfaceCodecParams, |
| &m_brcBuffers.sBrcRoiSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ROI_ENCODE].Value, |
| bindingTable->dwBindingTableEntries[startBTI++], |
| 0, |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState( |
| m_hwInterface, |
| cmdBuffer, |
| &surfaceCodecParams, |
| kernelState)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GetCustomDispatchPattern( |
| PMHW_WALKER_PARAMS walkerParams, |
| PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(walkerParams); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(walkerCodecParams); |
| |
| MOS_ZeroMemory(walkerParams, sizeof(*walkerParams)); |
| |
| walkerParams->WalkerMode = (MHW_WALKER_MODE)walkerCodecParams->WalkerMode; |
| |
| walkerParams->dwLocalLoopExecCount = 0xFFFF; //MAX VALUE |
| walkerParams->dwGlobalLoopExecCount = 0xFFFF; //MAX VALUE |
| |
| // the following code is copied from the kernel ULT |
| uint32_t maxThreadWidth, maxThreadHeight; |
| uint32_t threadSpaceWidth, threadSpaceHeight, concurGroupNum, threadScaleV; |
| |
| threadSpaceWidth = walkerCodecParams->dwResolutionX; |
| threadSpaceHeight = walkerCodecParams->dwResolutionY; |
| maxThreadWidth = threadSpaceWidth; |
| maxThreadHeight = threadSpaceHeight; |
| concurGroupNum = m_numberConcurrentGroup; |
| threadScaleV = m_numberEncKernelSubThread; |
| |
| if (concurGroupNum > 1) |
| { |
| maxThreadWidth = threadSpaceWidth; |
| maxThreadHeight = threadSpaceWidth + (threadSpaceWidth + threadSpaceHeight + concurGroupNum - 2) / concurGroupNum; |
| maxThreadHeight *= threadScaleV; |
| maxThreadHeight += 1; |
| } |
| else |
| { |
| threadSpaceHeight *= threadScaleV; |
| maxThreadHeight *= threadScaleV; |
| } |
| |
| uint32_t localLoopExecCount = m_degree45Needed ? (2 * m_numWavefrontInOneRegion + 1) : m_numWavefrontInOneRegion; |
| |
| eStatus = InitMediaObjectWalker(maxThreadWidth, |
| maxThreadHeight, |
| concurGroupNum - 1, |
| m_swScoreboardState->GetDependencyPattern(), |
| m_numberEncKernelSubThread - 1, |
| localLoopExecCount, |
| *walkerParams); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GenerateLcuLevelData(MOS_SURFACE &lcuLevelInputDataSurfaceParam) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_tileParams); |
| |
| uint32_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| |
| uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t residual = (1 << shift) - 1; |
| |
| uint32_t frameWidthInLcu = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift; |
| uint32_t frameHeightInLcu = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift; |
| |
| PLCU_LEVEL_DATA *lcuInfo = (PLCU_LEVEL_DATA *)MOS_AllocMemory(sizeof(PLCU_LEVEL_DATA) * frameWidthInLcu); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(lcuInfo); |
| for (uint32_t i = 0; i < frameWidthInLcu; i++) |
| { |
| lcuInfo[i] = (PLCU_LEVEL_DATA)MOS_AllocMemory(sizeof(LCU_LEVEL_DATA) * frameHeightInLcu); |
| if (lcuInfo[i] == nullptr) |
| { |
| for (uint32_t j = 0; j < i; j++) |
| { |
| MOS_FreeMemory(lcuInfo[j]); |
| } |
| MOS_FreeMemory(lcuInfo); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr); |
| } |
| MOS_ZeroMemory(lcuInfo[i], (sizeof(LCU_LEVEL_DATA) * frameHeightInLcu)); |
| } |
| |
| // Tiling case |
| if (numTileColumns > 1 || numTileRows > 1) |
| { |
| // This assumes that the entire Slice is contained within a Tile |
| for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++) |
| { |
| for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++) |
| { |
| uint32_t tileId = tileRow * numTileColumns + tileCol; |
| MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile = m_tileParams[tileId]; |
| |
| uint32_t tileColumnWidth = (currentTile.TileWidthInMinCbMinus1 + 1 + residual) >> shift; |
| uint32_t tileRowHeight = (currentTile.TileHeightInMinCbMinus1 + 1 + residual) >> shift; |
| |
| for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| |
| eStatus = (MOS_STATUS)IsSliceInTile(slcCount, |
| ¤tTile, |
| &sliceInTile, |
| &lastSliceInTile); |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| for (uint32_t i = 0; i < frameWidthInLcu; i++) |
| { |
| MOS_FreeMemory(lcuInfo[i]); |
| } |
| MOS_FreeMemory(lcuInfo); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(eStatus); |
| } |
| |
| if (!sliceInTile) |
| { |
| startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice; |
| continue; |
| } |
| |
| sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address; |
| uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu; |
| uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu; |
| |
| for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++) |
| { |
| lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU; |
| lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index |
| lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount; |
| lcuInfo[sliceLcuX][sliceLcuY].TileId = (uint16_t)tileId; |
| lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = (uint16_t)currentTile.TileStartLCUX; |
| lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = (uint16_t)currentTile.TileStartLCUY; |
| lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)(currentTile.TileStartLCUX + tileColumnWidth); |
| lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)(currentTile.TileStartLCUY + tileRowHeight); |
| |
| sliceLcuX++; |
| |
| if (sliceLcuX >= currentTile.TileStartLCUX + tileColumnWidth) |
| { |
| sliceLcuX = currentTile.TileStartLCUX; |
| sliceLcuY++; |
| } |
| } |
| startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice; |
| } |
| } |
| } |
| } |
| else // non-tiling case |
| { |
| for (uint32_t startLCU = 0, sliceStartLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address; |
| uint32_t sliceLcuX = sliceStartLcu % frameWidthInLcu; |
| uint32_t sliceLcuY = sliceStartLcu / frameWidthInLcu; |
| |
| for (uint32_t i = 0; i < m_hevcSliceParams[slcCount].NumLCUsInSlice; i++) |
| { |
| lcuInfo[sliceLcuX][sliceLcuY].SliceStartLcuIndex = (uint16_t)startLCU; |
| lcuInfo[sliceLcuX][sliceLcuY].SliceEndLcuIndex = (uint16_t)(startLCU + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index |
| lcuInfo[sliceLcuX][sliceLcuY].SliceId = (uint16_t)slcCount; |
| lcuInfo[sliceLcuX][sliceLcuY].TileId = 0; |
| lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateX = 0; |
| lcuInfo[sliceLcuX][sliceLcuY].TileStartCoordinateY = 0; |
| lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateX = (uint16_t)frameWidthInLcu; |
| lcuInfo[sliceLcuX][sliceLcuY].TileEndCoordinateY = (uint16_t)frameHeightInLcu; |
| |
| sliceLcuX++; |
| |
| if (sliceLcuX >= frameWidthInLcu) |
| { |
| sliceLcuX = 0; |
| sliceLcuY++; |
| } |
| } |
| startLCU += m_hevcSliceParams[slcCount].NumLCUsInSlice; |
| } |
| } |
| |
| // Write LCU Info to the surface |
| if (!Mos_ResourceIsNull(&lcuLevelInputDataSurfaceParam.OsResource)) |
| { |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| PLCU_LEVEL_DATA lcuLevelData = (PLCU_LEVEL_DATA)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &lcuLevelInputDataSurfaceParam.OsResource, |
| &lockFlags); |
| if (lcuLevelData == nullptr) |
| { |
| for (uint32_t i = 0; i < frameWidthInLcu; i++) |
| { |
| MOS_FreeMemory(lcuInfo[i]); |
| } |
| MOS_FreeMemory(lcuInfo); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(nullptr); |
| } |
| |
| uint8_t *dataRowStart = (uint8_t *)lcuLevelData; |
| |
| for (uint32_t sliceLcuY = 0; sliceLcuY < frameHeightInLcu; sliceLcuY++) |
| { |
| for (uint32_t sliceLcuX = 0; sliceLcuX < frameWidthInLcu; sliceLcuX++) |
| { |
| *(lcuLevelData) = lcuInfo[sliceLcuX][sliceLcuY]; |
| |
| if ((sliceLcuX + 1) == frameWidthInLcu) |
| { |
| dataRowStart += lcuLevelInputDataSurfaceParam.dwPitch; |
| lcuLevelData = (PLCU_LEVEL_DATA)dataRowStart; |
| } |
| else |
| { |
| lcuLevelData++; |
| } |
| } |
| } |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &lcuLevelInputDataSurfaceParam.OsResource); |
| } |
| else |
| { |
| eStatus = MOS_STATUS_NULL_POINTER; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n"); |
| } |
| |
| // Freeing the temporarily allocated memory |
| if (lcuInfo) |
| { |
| for (uint32_t i = 0; i < frameWidthInLcu; i++) |
| { |
| MOS_FreeMemory(lcuInfo[i]); |
| } |
| MOS_FreeMemory(lcuInfo); |
| } |
| return eStatus; |
| } |
| |
| // Helper class to describe quadtree node |
| class QuadTreeNode |
| { |
| friend class QuadTree; |
| |
| public: |
| QuadTreeNode(const QuadTreeNode *ctb, uint32_t x, uint32_t y, uint32_t level, uint32_t ctbLog2Size) : m_ctb(ctb), m_x(x), m_y(y), m_level(level), m_size((1 << ctbLog2Size) >> level), m_ctbLog2Size(ctbLog2Size) |
| { |
| } |
| |
| protected: |
| void Build(uint32_t picWidth, uint32_t picHeight) |
| { |
| if (DoesBlockCrossCodedPicture(picWidth, picHeight)) |
| { |
| CreateCUs(); |
| for_each(m_childBlocks.begin(), m_childBlocks.end(), [&](QuadTreeNode &blk) { blk.Build(picWidth, picHeight); }); |
| } |
| } |
| |
| void CreateCUs() |
| { |
| uint32_t size = m_size / 2; |
| uint32_t level = m_level + 1; |
| |
| m_childBlocks.emplace_back(m_ctb, m_x, m_y, level, m_ctbLog2Size); |
| m_childBlocks.emplace_back(m_ctb, m_x + size, m_y, level, m_ctbLog2Size); |
| m_childBlocks.emplace_back(m_ctb, m_x, m_y + size, level, m_ctbLog2Size); |
| m_childBlocks.emplace_back(m_ctb, m_x + size, m_y + size, level, m_ctbLog2Size); |
| } |
| |
| bool DoesBlockCrossCodedPicture(uint32_t w, uint32_t h) const |
| { |
| return (m_x < w && ((m_x + m_size) > w)) || (m_y < h && ((m_y + m_size) > h)); |
| } |
| |
| public: |
| const QuadTreeNode * m_ctb = nullptr; // the root of CTB |
| const uint32_t m_x = 0; |
| const uint32_t m_y = 0; |
| const uint32_t m_level = 0; |
| const uint32_t m_size = 0; |
| const uint32_t m_ctbLog2Size = 0; |
| std::vector<QuadTreeNode> m_childBlocks = {}; |
| }; |
| |
| class QuadTree : public QuadTreeNode |
| { |
| public: |
| QuadTree(uint32_t x, uint32_t y, uint32_t ctbLog2Size) |
| : QuadTreeNode(this, x, y, 0, ctbLog2Size) |
| { |
| } |
| |
| // Build quadtree in the way none of the blocks crosses picture boundary |
| void BuildQuadTree(uint32_t width, uint32_t height) |
| { |
| m_picWidth = width; |
| m_picHeight = height; |
| |
| Build(width, height); |
| |
| CUs.reserve(64); |
| FillCuList(*this, CUs); |
| } |
| |
| static void GetSplitFlags(const QuadTreeNode &blk, HcpPakObjectG12 &pakObj) |
| { |
| auto idx = [](uint32_t x0, uint32_t y0, uint32_t x, uint32_t y, uint32_t log2CbSize) { |
| auto const nCbS = (1 << log2CbSize); |
| return (x - x0) / nCbS + (y - y0) / nCbS * 2; |
| }; |
| |
| if (blk.m_childBlocks.empty()) // Block doesn't have splits |
| return; |
| |
| switch (blk.m_level) |
| { |
| case 0: |
| pakObj.DW1.Split_flag_level0 = 1; |
| break; |
| |
| case 1: |
| { |
| auto const blkIdx = idx(blk.m_ctb->m_x, blk.m_ctb->m_y, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 1); |
| pakObj.DW1.Split_flag_level1 |= 1 << blkIdx; |
| } |
| break; |
| |
| case 2: |
| { |
| auto const blkIdx1 = idx(blk.m_ctb->m_x, blk.m_ctb->m_y, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 1); |
| auto const nCbS1 = (1 << (blk.m_ctbLog2Size - 1)); |
| auto const x1 = blk.m_ctb->m_x + nCbS1 * (blkIdx1 % 2); |
| auto const y1 = blk.m_ctb->m_y + nCbS1 * (blkIdx1 / 2); |
| auto const blkIdx2 = idx(x1, y1, blk.m_x, blk.m_y, blk.m_ctbLog2Size - 2); |
| switch (blkIdx1) |
| { |
| case 0: |
| pakObj.DW1.Split_flag_level2_level1part0 |= 1 << blkIdx2; |
| break; |
| case 1: |
| pakObj.DW1.Split_flag_level2_level1part1 |= 1 << blkIdx2; |
| break; |
| case 2: |
| pakObj.DW1.Split_flag_level2_level1part2 |= 1 << blkIdx2; |
| break; |
| case 3: |
| pakObj.DW1.Split_flag_level2_level1part3 |= 1 << blkIdx2; |
| break; |
| }; |
| } |
| break; |
| } |
| |
| for_each(blk.m_childBlocks.begin(), blk.m_childBlocks.end(), [&](const QuadTreeNode &blk) { GetSplitFlags(blk, pakObj); }); |
| } |
| |
| protected: |
| // Prepare a list of CU inside a coded picure boundary |
| void FillCuList(const QuadTreeNode &cu, std::vector<const QuadTreeNode *> &list) |
| { |
| if (cu.m_childBlocks.empty() && ((cu.m_x + cu.m_size) <= m_picWidth) && ((cu.m_y + cu.m_size) <= m_picHeight)) |
| list.push_back(&cu); |
| else |
| for_each(cu.m_childBlocks.begin(), cu.m_childBlocks.end(), [&](const QuadTreeNode &blk) { FillCuList(blk, list); }); |
| } |
| |
| uint32_t m_picWidth = 0; |
| uint32_t m_picHeight = 0; |
| |
| public: |
| std::vector<const QuadTreeNode *> CUs = {}; |
| }; |
| |
| MOS_STATUS CodechalEncHevcStateG12::GenerateSkipFrameMbCodeSurface(SkipFrameInfo &skipframeInfo) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_LOCK_PARAMS lockFlags = {}; |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &skipframeInfo.m_resMbCodeSkipFrameSurface, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| MOS_ZeroMemory(data, m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE); |
| |
| auto pakObjData = (HcpPakObjectG12 *)data; |
| auto cuData = (EncodeHevcCuDataG12 *)(data + m_mvOffset); |
| |
| auto const ctbSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| auto const maxNumCuInCtb = (ctbSize / CODECHAL_HEVC_MIN_CU_SIZE) * (ctbSize / CODECHAL_HEVC_MIN_CU_SIZE); |
| auto const picWidthInCtb = MOS_ROUNDUP_DIVIDE(m_frameWidth, ctbSize); |
| auto const picHeightInCtb = MOS_ROUNDUP_DIVIDE(m_frameHeight, ctbSize); |
| uint32_t num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| uint32_t * tileColumnsStartPosition{new uint32_t[num_tile_columns]{}}; |
| |
| for (uint32_t i = 0; i < (num_tile_columns); i++) |
| { |
| if (m_hevcPicParams->tile_column_width[i] == 0) |
| { |
| tileColumnsStartPosition[i] = picWidthInCtb; |
| break; |
| } |
| |
| if (i == 0) |
| { |
| tileColumnsStartPosition[i] = m_hevcPicParams->tile_column_width[i]; |
| continue; |
| } |
| |
| tileColumnsStartPosition[i] = tileColumnsStartPosition[i - 1] + m_hevcPicParams->tile_column_width[i]; |
| } |
| |
| // Prepare CTB splits for corner cases: |
| // Last column |
| QuadTree lastColumnCtb((picWidthInCtb - 1) * ctbSize, 0, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| lastColumnCtb.BuildQuadTree(m_frameWidth, m_frameHeight); |
| |
| // Last row |
| QuadTree lastRowCtb(0, (picHeightInCtb - 1) * ctbSize, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| lastRowCtb.BuildQuadTree(m_frameWidth, m_frameHeight); |
| |
| // Right bottom CTB |
| QuadTree lastColRowCtb((picWidthInCtb - 1) * ctbSize, (picHeightInCtb - 1) * ctbSize, m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3); |
| lastColRowCtb.BuildQuadTree(m_frameWidth, m_frameHeight); |
| |
| uint32_t sliceFirstCtbIdx; |
| uint32_t ctbXAddr; |
| uint32_t ctbYAddr; |
| uint32_t nCUs; |
| uint32_t tileEnd; |
| uint32_t tileStart; |
| for (uint32_t slcIdx = 0; slcIdx < m_numSlices; ++slcIdx) |
| { |
| sliceFirstCtbIdx = m_hevcSliceParams[slcIdx].slice_segment_address; |
| tileEnd = 0; |
| tileStart = 0; |
| ctbXAddr = sliceFirstCtbIdx % picWidthInCtb; |
| ctbYAddr = sliceFirstCtbIdx / picWidthInCtb; |
| for (uint32_t i = 0; i < num_tile_columns; i++) |
| { |
| //Determine what tile slice belongs to |
| if (ctbXAddr < tileColumnsStartPosition[i]) |
| { |
| tileEnd = tileColumnsStartPosition[i]; |
| tileStart = (i == 0) ? 0 : tileColumnsStartPosition[i - 1]; |
| break; |
| } |
| } |
| |
| for (uint32_t ctbIdxInSlice = 0; ctbIdxInSlice < m_hevcSliceParams[slcIdx].NumLCUsInSlice; ++ctbIdxInSlice, ++pakObjData, ++ctbXAddr) |
| { |
| if (ctbXAddr >= tileEnd) |
| { |
| ctbYAddr++; |
| ctbXAddr = tileStart; |
| } |
| pakObjData->DW0.Type = 0x03; |
| pakObjData->DW0.Opcode = 0x27; |
| pakObjData->DW0.SubOp = 0x21; |
| pakObjData->DW0.DwordLength = 0x3; |
| pakObjData->DW2.Current_LCU_X_Addr = ctbXAddr; |
| pakObjData->DW2.Current_LCU_Y_Addr = ctbYAddr; |
| pakObjData->DW4.LCUForceZeroCoeff = 1; // Force skip CUs |
| pakObjData->DW4.Disable_SAO_On_LCU_Flag = 1; |
| |
| const bool bCtbCrossRightPicBoundary = (ctbXAddr + 1) * ctbSize > m_frameWidth; |
| const bool bCtbCrossBottomPicBoundary = (ctbYAddr + 1) * ctbSize > m_frameHeight; |
| const bool bCtbCrossRightBottomPicBoundary = bCtbCrossRightPicBoundary && bCtbCrossBottomPicBoundary; |
| if (bCtbCrossRightBottomPicBoundary) |
| { |
| QuadTree::GetSplitFlags(lastColRowCtb, *pakObjData); |
| nCUs = lastColRowCtb.CUs.size(); |
| } |
| else if (bCtbCrossRightPicBoundary) |
| { |
| QuadTree::GetSplitFlags(lastColumnCtb, *pakObjData); |
| nCUs = lastColumnCtb.CUs.size(); |
| } |
| else if (bCtbCrossBottomPicBoundary) |
| { |
| QuadTree::GetSplitFlags(lastRowCtb, *pakObjData); |
| nCUs = lastRowCtb.CUs.size(); |
| } |
| else // default case |
| { |
| nCUs = 1; |
| // For regular CTB, CU splits are not needed. All level values are zero |
| } |
| pakObjData->DW1.CU_count_minus1 = nCUs - 1; |
| |
| if (ctbIdxInSlice == (m_hevcSliceParams[slcIdx].NumLCUsInSlice - 1)) |
| { |
| pakObjData->DW1.LastCtbOfTileFlag = pakObjData->DW1.LastCtbOfSliceFlag = 1; |
| pakObjData->DW5 = 0x05000000; // Add batch buffer end flag |
| } |
| |
| auto CeilLog2 = [](uint32_t x) { |
| auto l = 0; |
| while (x > (1U << l)) l++; |
| return l; |
| }; |
| |
| // Fill CU records |
| for (unsigned int cuIdx = 0; cuIdx < nCUs; ++cuIdx, ++cuData) |
| { |
| cuData->DW7_CuPredMode = 1; // Inter |
| |
| // Note that this can work only for B slices. |
| // If P slice support appears, we need to have the 2nd skipFrameMbCodeSurface |
| // When panic mode is triggered backwards reference only should be used |
| cuData->DW7_InterPredIdcMv0 = 0; |
| cuData->DW7_InterPredIdcMv1 = 0; |
| |
| if (bCtbCrossRightBottomPicBoundary) |
| { |
| cuData->DW7_CuSize = CeilLog2(lastColRowCtb.CUs[cuIdx]->m_size) - 3; |
| } |
| else if (bCtbCrossRightPicBoundary) |
| { |
| cuData->DW7_CuSize = CeilLog2(lastColumnCtb.CUs[cuIdx]->m_size) - 3; |
| } |
| else if (bCtbCrossBottomPicBoundary) |
| { |
| cuData->DW7_CuSize = CeilLog2(lastRowCtb.CUs[cuIdx]->m_size) - 3; |
| } |
| else |
| { |
| cuData->DW7_CuSize = m_hevcSeqParams->log2_max_coding_block_size_minus3; |
| } |
| |
| if (cuData->DW7_CuSize == 3) // 64x64 |
| { |
| cuData->DW5_TuSize = 0xff; // 4 TUs 32x32 |
| cuData->DW6_TuCountMinus1 = 3; |
| } |
| else if (cuData->DW7_CuSize == 2) // 32x32 |
| { |
| cuData->DW5_TuSize = 3; // 1 TU 32x32 |
| } |
| else if (cuData->DW7_CuSize == 1) // 16x16 |
| { |
| cuData->DW5_TuSize = 2; // 1 TU 16x16 |
| } |
| else // 8x8 |
| { |
| cuData->DW5_TuSize = 1; // 1 TU 8x8 |
| } |
| } |
| cuData += (maxNumCuInCtb - nCUs); // Shift to CUs of next CTB |
| |
| |
| } |
| } |
| m_osInterface->pfnUnlockResource(m_osInterface, &skipframeInfo.m_resMbCodeSkipFrameSurface); |
| delete[] tileColumnsStartPosition; |
| |
| skipframeInfo.numSlices = m_numSlices; |
| uint32_t mbCodeSize = m_mbCodeSize + 8 * CODECHAL_CACHELINE_SIZE; |
| |
| #if USE_CODECHAL_DEBUG_TOOL |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &skipframeInfo.m_resMbCodeSkipFrameSurface, |
| CodechalDbgAttr::attrInput, |
| "SkipFrameSurface", |
| mbCodeSize, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GenerateConcurrentThreadGroupData(MOS_RESOURCE &concurrentThreadGroupData) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!Mos_ResourceIsNull(&concurrentThreadGroupData)) |
| { |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| PCONCURRENT_THREAD_GROUP_DATA concurrentTgData = (PCONCURRENT_THREAD_GROUP_DATA)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &concurrentThreadGroupData, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(concurrentTgData); |
| |
| MOS_ZeroMemory(concurrentTgData, concurrentThreadGroupData.iSize); |
| |
| uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t residual = (1 << shift) - 1; |
| |
| uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift; |
| uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift; |
| |
| uint32_t slcCount = 0; |
| // Currently only using one thread group for each slice. Extend it to multiple soon. |
| for (uint32_t startLcu = 0; slcCount < m_numSlices; slcCount++, startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice) |
| { |
| uint32_t sliceStartLcu = m_hevcSliceParams[slcCount].slice_segment_address; |
| uint32_t sliceStartLcux = sliceStartLcu % frameWidthInLCU; |
| uint32_t sliceStartLcuy = sliceStartLcu / frameWidthInLCU; |
| |
| uint32_t sliceEndLcu = (uint16_t)(startLcu + m_hevcSliceParams[slcCount].NumLCUsInSlice); // this should be next slice start index |
| uint32_t sliceEndLcux = sliceStartLcu % frameWidthInLCU; |
| uint32_t sliceEndLcuy = sliceStartLcu / frameWidthInLCU; |
| |
| concurrentTgData->CurrSliceStartLcuX = (uint16_t)sliceStartLcux; |
| concurrentTgData->CurrSliceStartLcuY = (uint16_t)sliceStartLcuy; |
| |
| concurrentTgData->CurrSliceEndLcuX = (uint16_t)sliceEndLcux; |
| concurrentTgData->CurrSliceEndLcuY = (uint16_t)sliceEndLcuy; |
| |
| concurrentTgData->CurrTgStartLcuX = (uint16_t)sliceStartLcux; |
| concurrentTgData->CurrTgStartLcuY = (uint16_t)sliceStartLcuy; |
| |
| concurrentTgData->CurrTgEndLcuX = (uint16_t)sliceEndLcux; |
| concurrentTgData->CurrTgEndLcuY = (uint16_t)sliceEndLcuy; |
| } |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &concurrentThreadGroupData); |
| } |
| else |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n"); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncodeMbEncKernel( |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL); |
| |
| // Initialize DSH kernel state |
| PMHW_KERNEL_STATE kernelState; |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| CODECHAL_WALKER_DEGREE walkerDegree; |
| MHW_WALKER_PARAMS walkerParams; |
| uint32_t walkerResolutionX, walkerResolutionY; |
| bool customDispatchPattern = true; |
| uint16_t totalThreadNumPerLcu = 1; |
| |
| if (m_hevcPicParams->CodingType == I_TYPE) |
| { |
| encFunctionType = CODECHAL_MEDIA_STATE_HEVC_I_MBENC; |
| } |
| else |
| { |
| encFunctionType = m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC; |
| } |
| |
| if (m_isMaxLcu64) |
| { |
| kernelState = &m_mbEncKernelStates[MBENC_LCU64_KRNIDX]; |
| if (m_hevcSeqParams->TargetUsage == 1) |
| { |
| walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6; |
| walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6; |
| } |
| else |
| { |
| walkerResolutionX = 2 * (MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6); |
| walkerResolutionY = 2 * (MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6); |
| } |
| } |
| else |
| { |
| kernelState = &m_mbEncKernelStates[MBENC_LCU32_KRNIDX]; |
| walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; |
| walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5; |
| } |
| |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| walkerCodecParams.dwResolutionX = walkerResolutionX; |
| walkerCodecParams.dwResolutionY = walkerResolutionY; |
| walkerCodecParams.dwNumSlices = m_numSlices; |
| walkerCodecParams.usTotalThreadNumPerLcu = totalThreadNumPerLcu; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCustomDispatchPattern(&walkerParams, &walkerCodecParams)); |
| |
| // If Single Task Phase is not enabled, use BT count for the kernel state. |
| if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported) |
| { |
| uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf( |
| m_stateHeapInterface, |
| maxBtCount)); |
| m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| |
| // Set up the DSH/SSH as normal |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; |
| MOS_ZeroMemory(&idParams, sizeof(idParams)); |
| idParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( |
| m_stateHeapInterface, |
| 1, |
| &idParams)); |
| |
| // Generate Lcu Level Data |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData(m_lcuLevelInputDataSurface[m_currRecycledBufIdx])); |
| |
| // Generate Concurrent Thread Group Data |
| if (m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26Degree || |
| m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26ZDegree || |
| m_swScoreboardState->GetDependencyPattern() == dependencyWavefront26XDegree) |
| { |
| // Generate Concurrent Thread Group Data |
| uint32_t curIdx = m_currRecycledBufIdx; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateConcurrentThreadGroupData(m_encBCombinedBuffer1[curIdx].sResource)); |
| } |
| else |
| { |
| // For 45D walking patter, kernel generates the concurrent thread group by itself. No need for driver to generate. |
| } |
| |
| // setup curbe |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncBKernel()); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_DSH_TYPE, |
| kernelState)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( |
| encFunctionType, |
| kernelState)); |
| //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHEVCMbEncCurbeG12( |
| //m_debugInterface, |
| //encFunctionType, |
| //&m_encBCombinedBuffer1[m_currRecycledBufIdx].sResource)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_ISH_TYPE, |
| kernelState));) |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); |
| |
| SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams(); |
| sendKernelCmdsParams.EncFunctionType = encFunctionType; |
| sendKernelCmdsParams.pKernelState = kernelState; |
| // TO DO : Remove scoreboard from VFE STATE Command |
| sendKernelCmdsParams.bEnableCustomScoreBoard = false; |
| sendKernelCmdsParams.pCustomScoreBoard = nullptr; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); |
| |
| // Add binding table |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| // send surfaces |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesBKernel(&cmdBuffer)); |
| |
| CODECHAL_DEBUG_TOOL( |
| if (m_pictureCodingType == I_TYPE) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| &m_lcuLevelInputDataSurface[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrOutput, |
| "HEVC_I_MBENC_LcuLevelData_In", |
| CODECHAL_MEDIA_STATE_HEVC_I_MBENC)); |
| } else { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| &m_lcuLevelInputDataSurface[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrOutput, |
| "HEVC_B_MBENC_LcuLevelData_In", |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| }) |
| |
| if ((encFunctionType == CODECHAL_MEDIA_STATE_HEVC_B_MBENC) || (encFunctionType == CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC)) |
| { |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_encConstantTableForB.sResource, |
| "HEVC_B_MBENC_ConstantData_In", |
| CodechalDbgAttr::attrOutput, |
| m_encConstantTableForB.dwSize, |
| 0, |
| encFunctionType))); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType)); |
| |
| // Add dump for MBEnc surface state heap here |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_SSH_TYPE, |
| kernelState));) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId( |
| m_stateHeapInterface)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd( |
| &cmdBuffer, |
| nullptr)); |
| } |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| encFunctionType, |
| nullptr))); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase)); |
| |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw); |
| m_lastTaskInPhase = false; |
| } |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_debugSurface[0].sResource, |
| CodechalDbgAttr::attrOutput, |
| "DebugDataSurface_Out0", |
| m_debugSurface[0].dwSize, |
| 0, |
| encFunctionType)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_debugSurface[1].sResource, |
| CodechalDbgAttr::attrOutput, |
| "DebugDataSurface_Out1", |
| m_debugSurface[1].dwSize, |
| 0, |
| encFunctionType)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_debugSurface[2].sResource, |
| CodechalDbgAttr::attrOutput, |
| "DebugDataSurface_Out2", |
| m_debugSurface[2].dwSize, |
| 0, |
| encFunctionType)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_debugSurface[3].sResource, |
| CodechalDbgAttr::attrOutput, |
| "DebugDataSurface_Out3", |
| m_debugSurface[3].dwSize, |
| 0, |
| encFunctionType));); |
| |
| #if 0 // the dump should be done in the GetStatusReport. However, if ENC causes PAK hangs-up, there is no way to get them. |
| { |
| CODECHAL_DEBUG_TOOL( |
| CODEC_REF_LIST currRefList; |
| |
| currRefList = *(pRefList[m_currReconstructedPic.FrameIdx]); |
| currRefList.RefPic = m_currOriginalPic; |
| |
| m_debugInterface->CurrPic = m_currOriginalPic; |
| m_debugInterface->dwBufferDumpFrameNum = m_storeData; |
| m_debugInterface->wFrameType = m_pictureCodingType; |
| |
| //CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeMbEncMbPakOutput( |
| // m_debugInterface, |
| // this, |
| // &currRefList, |
| // (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ? |
| // CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &currRefList.resRefMbCodeBuffer, |
| CodechalDbgAttr::attrOutput, |
| "MbCode", |
| m_picWidthInMb * m_frameFieldHeightInMb*64, |
| CodecHal_PictureIsBottomField(currRefList.RefPic) ? m_frameFieldHeightInMb * m_picWidthInMb * 64 : 0, |
| (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ? |
| CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2)); |
| |
| if (m_mvDataSize) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &currRefList.resRefMvDataBuffer, |
| CodechalDbgAttr::attrOutput, |
| "MbData", |
| m_picWidthInMb * m_frameFieldHeightInMb * (32 * 4), |
| CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * (32 * 4), 0x1000) : 0, |
| (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ? |
| CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2)); |
| } |
| if (CodecHalIsFeiEncode(m_codecFunction)) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_resDistortionBuffer, |
| CodechalDbgAttr::attrOutput, |
| "DistortionSurf", |
| m_picWidthInMb * m_frameFieldHeightInMb * 48, |
| CodecHal_PictureIsBottomField(currRefList.RefPic) ? MOS_ALIGN_CEIL(m_frameFieldHeightInMb * m_picWidthInMb * 48, 0x1000) : 0, |
| (m_codecFunction != CODECHAL_FUNCTION_HYBRIDPAK) ? |
| CODECHAL_MEDIA_STATE_ENC_NORMAL : CODECHAL_MEDIA_STATE_HYBRID_PAK_P2)); |
| } |
| |
| ) |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer( |
| this, |
| &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource, |
| m_encBCombinedBuffer2[m_currRecycledBufIdx].dwSize, |
| (const char*)"_Hevc_CombinedBuffer2", |
| false)); |
| ); |
| |
| // Dump SW scoreboard surface - Output of MBENC |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpHevcEncodeSwScoreboardSurface( |
| m_debugInterface, |
| m_swScoreboardState->GetCurSwScoreboardSurface(), false)); |
| ); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer( |
| this, |
| &m_encConstantTableForB.sResource, |
| m_encConstantTableForB.dwSize, |
| (const char*)"_Hevc_EncConstantTable", |
| true)); |
| ); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer( |
| this, |
| &m_debugSurface[0].sResource, |
| m_debugSurface[0].dwSize, |
| (const char*)"_Hevc_DebugDump0", |
| true)); |
| ); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer( |
| this, |
| &m_debugSurface[1].sResource, |
| m_debugSurface[1].dwSize, |
| (const char*)"_Hevc_DebugDump1", |
| true)); |
| ); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer( |
| this, |
| &m_debugSurface[2].sResource, |
| m_debugSurface[2].dwSize, |
| (const char*)"_Hevc_DebugDump2", |
| true)); |
| ); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgDumpEncodeCombineBuffer( |
| this, |
| &m_debugSurface[3].sResource, |
| m_debugSurface[3].dwSize, |
| (const char*)"_Hevc_DebugDump3", |
| true)); |
| ); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_currPicWithReconBoundaryPix, |
| CodechalDbgAttr::attrReconstructedSurface, |
| "ReconSurf"))); |
| } |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncodeBrcInitResetKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates); |
| |
| CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET; |
| |
| // Initialize DSH kernel state |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx]; |
| |
| // If Single Task Phase is not enabled, use BT count for the kernel state. |
| if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported) |
| { |
| uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf( |
| m_stateHeapInterface, |
| maxBtCount)); |
| m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| |
| // Set up the DSH/SSH as normal |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; |
| MOS_ZeroMemory(&idParams, sizeof(idParams)); |
| idParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( |
| m_stateHeapInterface, |
| 1, |
| &idParams)); |
| |
| // Setup curbe for BrcInitReset kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcInitReset( |
| brcKrnIdx)); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_DSH_TYPE, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( |
| encFunctionType, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_ISH_TYPE, |
| kernelState));) |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); |
| |
| SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams(); |
| sendKernelCmdsParams.EncFunctionType = encFunctionType; |
| sendKernelCmdsParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); |
| |
| // Add binding table |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| // Send surfaces for BrcInitReset Kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcInitResetSurfaces(&cmdBuffer, brcKrnIdx)); |
| |
| MHW_MEDIA_OBJECT_PARAMS mediaObjectParams; |
| MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams)); |
| |
| MediaObjectInlineData mediaObjectInlineData; |
| MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData)); |
| mediaObjectParams.pInlineData = &mediaObjectInlineData; |
| mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject( |
| &cmdBuffer, |
| nullptr, |
| &mediaObjectParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType)); |
| |
| // Add dump for BrcInitReset surface state heap here |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_SSH_TYPE, |
| kernelState));) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId( |
| m_stateHeapInterface)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd( |
| &cmdBuffer, |
| nullptr)); |
| } |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| encFunctionType, |
| nullptr))); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase)); |
| |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw); |
| m_lastTaskInPhase = false; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncodeBrcFrameUpdateKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE); |
| |
| CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE; |
| |
| // Initialize DSH kernel state |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx]; |
| |
| // If Single Task Phase is not enabled, use BT count for the kernel state. |
| if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported) |
| { |
| uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf( |
| m_stateHeapInterface, |
| maxBtCount)); |
| m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| |
| // Set up the DSH/SSH as normal |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; |
| MOS_ZeroMemory(&idParams, sizeof(idParams)); |
| idParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( |
| m_stateHeapInterface, |
| 1, |
| &idParams)); |
| |
| // Setup curbe for BrcFrameUpdate kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate( |
| brcKrnIdx)); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_DSH_TYPE, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( |
| encFunctionType, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_ISH_TYPE, |
| kernelState));) |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); |
| |
| SendKernelCmdsParams sendKernelCmdsParams; |
| sendKernelCmdsParams = SendKernelCmdsParams(); |
| sendKernelCmdsParams.EncFunctionType = encFunctionType; |
| sendKernelCmdsParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); |
| |
| // Add binding table |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| // Send surfaces for BrcFrameUpdate Kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcFrameUpdateSurfaces(&cmdBuffer)); |
| |
| MHW_MEDIA_OBJECT_PARAMS mediaObjectParams; |
| MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams)); |
| |
| MediaObjectInlineData mediaObjectInlineData; |
| MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData)); |
| mediaObjectParams.pInlineData = &mediaObjectInlineData; |
| mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObject( |
| &cmdBuffer, |
| nullptr, |
| &mediaObjectParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType)); |
| |
| // Add dump for BrcFrameUpdate surface state heap here |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_SSH_TYPE, |
| kernelState));) |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId( |
| m_stateHeapInterface)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd( |
| &cmdBuffer, |
| nullptr)); |
| } |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| encFunctionType, |
| nullptr))); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase)); |
| |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw); |
| m_lastTaskInPhase = false; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncodeBrcLcuUpdateKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU); |
| |
| CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE; |
| |
| // Initialize DSH kernel state |
| PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx]; |
| |
| // If Single Task Phase is not enabled, use BT count for the kernel state. |
| if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported) |
| { |
| uint32_t maxBtCount = m_singleTaskPhaseSupported ? m_maxBtCount : kernelState->KernelParams.iBTCount; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf( |
| m_stateHeapInterface, |
| maxBtCount)); |
| m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| |
| // Set up the DSH/SSH as normal |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; |
| MOS_ZeroMemory(&idParams, sizeof(idParams)); |
| idParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( |
| m_stateHeapInterface, |
| 1, |
| &idParams)); |
| |
| // Setup curbe for BrcFrameUpdate kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate( |
| brcKrnIdx)); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_MB_BRC_UPDATE; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_DSH_TYPE, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe( |
| encFunctionType, |
| kernelState)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_ISH_TYPE, |
| kernelState));) |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0)); |
| |
| SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams(); |
| sendKernelCmdsParams.EncFunctionType = encFunctionType; |
| sendKernelCmdsParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); |
| |
| // Add binding table |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| if (m_hevcPicParams->NumROI) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupROISurface()); |
| } |
| |
| // Send surfaces for BrcFrameUpdate Kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcLcuUpdateSurfaces(&cmdBuffer)); |
| |
| // Program Media walker |
| uint32_t resolutionX, resolutionY; |
| resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth); |
| resolutionX = MOS_ROUNDUP_SHIFT(resolutionX, 4); |
| resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight); |
| resolutionY = MOS_ROUNDUP_SHIFT(resolutionY, 3); |
| CODECHAL_ENCODE_ASSERTMESSAGE("LucBRC thread space = %d x %d", resolutionX, resolutionY); |
| |
| MHW_WALKER_PARAMS walkerParams; |
| MOS_ZeroMemory(&walkerParams, sizeof(walkerParams)); |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| walkerCodecParams.dwResolutionX = resolutionX; |
| walkerCodecParams.dwResolutionY = resolutionY; |
| walkerCodecParams.bNoDependency = true; |
| walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported; |
| walkerCodecParams.ucGroupId = m_groupId; |
| walkerCodecParams.wPictureCodingType = m_pictureCodingType; |
| walkerCodecParams.bUseScoreboard = false; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetRenderInterface()->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType)); |
| |
| // Add dump for BrcFrameUpdate surface state heap here |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion( |
| encFunctionType, |
| MHW_SSH_TYPE, |
| kernelState));) |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks( |
| m_stateHeapInterface, |
| kernelState)); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId( |
| m_stateHeapInterface)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetMiInterface()->AddMiBatchBufferEnd( |
| &cmdBuffer, |
| nullptr)); |
| } |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer( |
| &cmdBuffer, |
| encFunctionType, |
| nullptr))); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase)); |
| |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0); |
| |
| if (!m_singleTaskPhaseSupported || m_lastTaskInPhase) |
| { |
| m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw); |
| m_lastTaskInPhase = false; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncodeKernelFunctions() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (m_pakOnlyTest) |
| { |
| // Skip ENC when PAK only mode is enabled |
| return eStatus; |
| } |
| |
| if (m_pictureCodingType == P_TYPE) |
| { |
| m_lowDelay = true; |
| } |
| |
| if (m_hevcPicParams->bUsedAsRef || m_brcEnabled) |
| { |
| m_currRefSync = &m_refSync[m_currMbCodeIdx]; |
| |
| // Check if the signal obj has been used before |
| if (!m_hevcSeqParams->ParallelBRC && (m_currRefSync->uiSemaphoreObjCount || m_currRefSync->bInUsed)) |
| { |
| MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_renderContext; |
| syncParams.presSyncResource = &m_currRefSync->resSyncObject; |
| syncParams.uiSemaphoreCount = m_currRefSync->uiSemaphoreObjCount; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineWait(m_osInterface, &syncParams)); |
| m_currRefSync->uiSemaphoreObjCount = 0; |
| m_currRefSync->bInUsed = false; |
| } |
| } |
| else |
| { |
| m_currRefSync = nullptr; |
| } |
| |
| //Reset to use a different performance tag ID |
| m_osInterface->pfnResetPerfBufferID(m_osInterface); |
| |
| m_firstTaskInPhase = true; |
| m_lastTaskInPhase = false; |
| |
| m_brcInputForEncKernelBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx]; |
| |
| // BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface |
| // BRC init is called once even for CQP mode when ROI is enabled, hence also checking for first frame flag |
| if ((m_brcEnabled && (m_brcInit || m_brcReset)) || (m_firstFrame && m_hevcPicParams->NumROI)) |
| { |
| m_firstTaskInPhase = m_lastTaskInPhase = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcInitResetKernel()); |
| m_brcInit = m_brcReset = false; |
| } |
| |
| m_firstTaskInPhase = true; |
| m_lastTaskInPhase = false; |
| |
| CodechalEncodeSwScoreboard::KernelParams swScoreboardKernelParames; |
| MOS_ZeroMemory(&swScoreboardKernelParames, sizeof(swScoreboardKernelParames)); |
| |
| InitSwScoreBoardParams(swScoreboardKernelParames); |
| |
| if (m_useSwInitScoreboard) |
| { |
| SetupSwScoreBoard(&swScoreboardKernelParames); |
| } |
| else |
| { |
| // Call SW scoreboard Init kernel used by MBEnc kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->Execute(&swScoreboardKernelParames)); |
| } |
| |
| // Dump SW scoreboard surface - Output of SW scoreboard Init Kernel and Input to MBENC |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| m_swScoreboardState->GetCurSwScoreboardSurface(), |
| CodechalDbgAttr::attrInput, |
| "InitSWScoreboard_In", |
| CODECHAL_MEDIA_STATE_SW_SCOREBOARD_INIT))); |
| |
| // Csc, Downscaling, and/or 10-bit to 8-bit conversion |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_cscDsState); |
| |
| CodechalEncodeCscDs::KernelParams cscScalingKernelParams; |
| MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams)); |
| cscScalingKernelParams.bLastTaskInPhaseCSC = |
| cscScalingKernelParams.bLastTaskInPhase4xDS = !(m_16xMeSupported || m_hmeEnabled || m_brcEnabled); |
| cscScalingKernelParams.bLastTaskInPhase16xDS = !(m_32xMeSupported || m_hmeEnabled || m_brcEnabled); |
| cscScalingKernelParams.bLastTaskInPhase32xDS = !(m_hmeEnabled || m_brcEnabled); |
| |
| CodechalEncodeCscDsG12::HevcExtKernelParams hevcExtCscParams; |
| MOS_ZeroMemory(&hevcExtCscParams, sizeof(hevcExtCscParams)); |
| |
| if (m_isMaxLcu64) |
| { |
| hevcExtCscParams.bHevcEncHistorySum = true; |
| hevcExtCscParams.bUseLCU32 = false; |
| hevcExtCscParams.presHistoryBuffer = &m_encBCombinedBuffer2[m_lastRecycledBufIdx].sResource; |
| hevcExtCscParams.dwSizeHistoryBuffer = m_historyOutBufferSize; |
| hevcExtCscParams.dwOffsetHistoryBuffer = m_historyOutBufferOffset; |
| hevcExtCscParams.presHistorySumBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource; |
| hevcExtCscParams.dwSizeHistorySumBuffer = sizeof(MBENC_COMBINED_BUFFER2::ucHistoryInBuffer); |
| hevcExtCscParams.dwOffsetHistorySumBuffer = sizeof(MBENC_COMBINED_BUFFER2::ucBrcCombinedEncBuffer); |
| hevcExtCscParams.presMultiThreadTaskBuffer = &m_encBCombinedBuffer2[m_currRecycledBufIdx].sResource; |
| hevcExtCscParams.dwSizeMultiThreadTaskBuffer = m_threadTaskBufferSize; |
| hevcExtCscParams.dwOffsetMultiThreadTaskBuffer = m_threadTaskBufferOffset; |
| cscScalingKernelParams.hevcExtParams = &hevcExtCscParams; |
| } |
| else |
| { |
| cscScalingKernelParams.hevcExtParams = nullptr; // LCU32 does not require history buffers |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->KernelFunctions(&cscScalingKernelParams)); |
| |
| if (m_hmeEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel()); |
| } |
| else if (m_brcEnabled && m_hevcPicParams->CodingType == I_TYPE) |
| { |
| m_lastTaskInPhase = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeIntraDistKernel()); |
| } |
| |
| // BRC + MbEnc in second task phase |
| m_firstTaskInPhase = true; |
| m_lastTaskInPhase = false; |
| |
| // Wait for PAK if necessary |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak()); |
| |
| // ROI uses the BRC LCU update kernel, even in CQP. So we will call it |
| if (m_hevcPicParams->NumROI && !m_brcEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcLcuUpdateKernel()); |
| m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame; |
| |
| CODECHAL_DEBUG_TOOL( |
| if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.sBrcMbQpBuffer.OsResource, |
| CodechalDbgAttr::attrOutput, |
| "MbQp", |
| m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight, |
| m_brcBuffers.dwBrcMbQpBottomFieldOffset, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcDistortion->OsResource, |
| CodechalDbgAttr::attrInput, |
| "BrcDist_AfterLcuBrc", |
| m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight, |
| m_brcBuffers.dwMeBrcDistortionBottomFieldOffset, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE));) |
| } |
| |
| if (m_brcEnabled) |
| { |
| m_hevcBrcG12->m_brcNumPakPasses = m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcFrameUpdateKernel()); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcDistortion->OsResource, |
| CodechalDbgAttr::attrInput, |
| "BrcDist_AfterFrameBrc", |
| m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight, |
| m_brcBuffers.dwMeBrcDistortionBottomFieldOffset, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.resBrcHistoryBuffer, |
| CodechalDbgAttr::attrOutput, |
| "HistoryWrite", |
| m_brcHistoryBufferSize, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrOutput, |
| "ImgStateWrite", |
| BRC_IMG_STATE_SIZE_PER_PASS_G11 * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(), |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE));) |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcDistortion->OsResource, |
| CodechalDbgAttr::attrInput, |
| "BrcDist_AfterFrameBrcUpdate", |
| m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight, |
| m_brcBuffers.dwMeBrcDistortionBottomFieldOffset, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx], |
| CodechalDbgAttr::attrOutput, |
| "ImgStateWrite", |
| BRC_IMG_STATE_SIZE_PER_PASS * m_hwInterface->GetMfxInterface()->GetBrcNumPakPasses(), |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.resBrcHistoryBuffer, |
| CodechalDbgAttr::attrOutput, |
| "HistoryWrite", |
| m_brcHistoryBufferSize, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.sBrcIntraDistortionBuffer.OsResource, |
| CodechalDbgAttr::attrOutput, |
| "Idistortion", |
| m_brcBuffers.sBrcIntraDistortionBuffer.dwWidth * m_brcBuffers.sBrcIntraDistortionBuffer.dwHeight, |
| 0, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE));) |
| |
| if (m_lcuBrcEnabled || m_hevcPicParams->NumROI) |
| { |
| // LCU-based BRC needs to have frame-based one to be call first in order to get HCP_IMG_STATE command result |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hevcBrcG12->EncodeBrcLcuUpdateKernel()); |
| m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame; |
| } |
| else |
| { |
| m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame; |
| } |
| |
| CODECHAL_DEBUG_TOOL( |
| if (!Mos_ResourceIsNull(&m_brcBuffers.sBrcMbQpBuffer.OsResource)) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.sBrcMbQpBuffer.OsResource, |
| CodechalDbgAttr::attrOutput, |
| "MbQp", |
| m_brcBuffers.sBrcMbQpBuffer.dwPitch * m_brcBuffers.sBrcMbQpBuffer.dwHeight, |
| m_brcBuffers.dwBrcMbQpBottomFieldOffset, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE)); |
| } CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(&m_brcDistortion->OsResource, |
| CodechalDbgAttr::attrInput, |
| "BrcDist_AfterLcuBrcUpdate", |
| m_brcBuffers.sMeBrcDistortionBuffer.dwPitch * m_brcBuffers.sMeBrcDistortionBuffer.dwHeight, |
| m_brcBuffers.dwMeBrcDistortionBottomFieldOffset, |
| CODECHAL_MEDIA_STATE_BRC_UPDATE));) |
| } |
| |
| m_useWeightedSurfaceForL0 = false; |
| m_useWeightedSurfaceForL1 = false; |
| |
| //currently only support same weightoffset for all slices, and only support Luma weighted prediction |
| auto slicetype = m_hevcSliceParams->slice_type; |
| if (m_weightedPredictionSupported && !m_feiEnable && |
| ((slicetype == CODECHAL_HEVC_P_SLICE && m_hevcPicParams->weighted_pred_flag) || |
| (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag))) |
| { |
| uint32_t LumaWeightFlag[2] = {0}; //[L0, L1] |
| CodechalEncodeWP::SliceParams sliceWPParams; |
| MOS_FillMemory((void *)&sliceWPParams, sizeof(sliceWPParams), 0); |
| |
| //populate the slice WP parameter structure |
| sliceWPParams.luma_log2_weight_denom = m_hevcSliceParams->luma_log2_weight_denom; // luma weidht denom |
| for (auto i = 0; i < 2; i++) |
| { |
| for (auto j = 0; j < CODEC_MAX_NUM_REF_FRAME_HEVC; j++) |
| { |
| sliceWPParams.weights[i][j][0][0] = (1 << m_hevcSliceParams->luma_log2_weight_denom) + |
| m_hevcSliceParams->delta_luma_weight[i][j]; //Luma weight |
| sliceWPParams.weights[i][j][0][1] = m_hevcSliceParams->luma_offset[i][j]; //Luma offset |
| |
| if (m_hevcSliceParams->delta_luma_weight[i][j] || m_hevcSliceParams->luma_offset[i][j]) |
| { |
| LumaWeightFlag[i] |= (1 << j); |
| } |
| } |
| } |
| |
| CodechalEncodeWP::KernelParams wpKernelParams; |
| MOS_FillMemory((void *)&wpKernelParams, sizeof(wpKernelParams), 0); |
| wpKernelParams.useWeightedSurfaceForL0 = &m_useWeightedSurfaceForL0; |
| wpKernelParams.useWeightedSurfaceForL1 = &m_useWeightedSurfaceForL1; |
| wpKernelParams.slcWPParams = &sliceWPParams; |
| |
| // Weighted Prediction to be applied for L0 |
| for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1); i++) |
| { |
| if ((LumaWeightFlag[LIST_0] & (1 << i)) && (i < CODEC_MAX_FORWARD_WP_FRAME)) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][i]; |
| if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid) |
| { |
| MOS_SURFACE refFrameInput; |
| uint8_t frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx; |
| refFrameInput = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer; |
| |
| //Weighted Prediction for ith forward reference frame |
| wpKernelParams.useRefPicList1 = false; |
| wpKernelParams.wpIndex = i; |
| wpKernelParams.refFrameInput = &refFrameInput; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams)); |
| } |
| } |
| } |
| |
| // Weighted Predition to be applied for L1 |
| if (slicetype == CODECHAL_HEVC_B_SLICE && m_hevcPicParams->weighted_bipred_flag) |
| { |
| for (auto i = 0; i < (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1); i++) |
| { |
| if ((LumaWeightFlag[LIST_1] & (1 << i)) && (i < CODEC_MAX_BACKWARD_WP_FRAME)) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_1][i]; |
| if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid) |
| { |
| MOS_SURFACE refFrameInput; |
| uint8_t frameIndex = m_picIdx[refPic.FrameIdx].ucPicIdx; |
| refFrameInput = m_hevcPicParams->bUseRawPicForRef ? m_refList[frameIndex]->sRefRawBuffer : m_refList[frameIndex]->sRefReconBuffer; |
| |
| //Weighted Prediction for ith backward reference frame |
| wpKernelParams.useRefPicList1 = true; |
| wpKernelParams.wpIndex = i; |
| wpKernelParams.refFrameInput = &refFrameInput; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->Execute(&wpKernelParams)); |
| } |
| } |
| } |
| } |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| |
| MOS_USER_FEATURE_VALUE_WRITE_DATA userFeatureWriteData; |
| // Weighted prediction for L0 Reporting |
| userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; |
| userFeatureWriteData.Value.i32Data = m_useWeightedSurfaceForL0; |
| userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_WEIGHTED_PREDICTION_L0_IN_USE_ID; |
| MOS_UserFeature_WriteValues_ID(NULL, &userFeatureWriteData, 1); |
| // Weighted prediction for L1 Reporting |
| userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__; |
| userFeatureWriteData.Value.i32Data = m_useWeightedSurfaceForL1; |
| userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_WEIGHTED_PREDICTION_L1_IN_USE_ID; |
| MOS_UserFeature_WriteValues_ID(NULL, &userFeatureWriteData, 1); |
| |
| #endif // _DEBUG || _RELEASE_INTERNAL |
| |
| // Reset to use a different performance tag ID |
| m_osInterface->pfnResetPerfBufferID(m_osInterface); |
| |
| m_lastTaskInPhase = true; |
| |
| if (m_hevcPicParams->CodingType == I_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(CODECHAL_MEDIA_STATE_HEVC_I_MBENC)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| } |
| |
| if (m_brcEnabled && m_enableFramePanicMode && (false == m_hevcSeqParams->DisableHRDConformance) && |
| m_skipFrameInfo.numSlices != m_numSlices) // 'numSlices != m_numSlices' check is to re-generate surface if slice layout changed from previous frame |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSkipFrameMbCodeSurface(m_skipFrameInfo)); |
| } |
| |
| // Notify PAK engine once ENC is done |
| if (!Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse)) |
| { |
| MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; |
| if (m_useMdf) |
| { |
| if (!m_computeContextEnabled) |
| { |
| syncParams.GpuContext = MOS_GPU_CONTEXT_RENDER3; //MDF uses render3 |
| } |
| else |
| { |
| syncParams.GpuContext = MOS_GPU_CONTEXT_CM_COMPUTE; |
| } |
| } |
| else |
| { |
| syncParams.GpuContext = m_renderContext; |
| } |
| syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse; |
| |
| uint32_t old_stream_index = m_osInterface->streamIndex; |
| m_osInterface->streamIndex = static_cast<CmQueueRT *>(m_cmQueue)->StreamIndex(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_osInterface->pfnEngineSignal(m_osInterface, &syncParams)); |
| m_osInterface->streamIndex = old_stream_index; |
| } |
| |
| if (m_brcEnabled) |
| { |
| if (m_hevcSeqParams->ParallelBRC) |
| { |
| m_brcBuffers.uiCurrBrcPakStasIdxForRead = |
| (m_brcBuffers.uiCurrBrcPakStasIdxForRead + 1) % CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; |
| } |
| } |
| |
| CODECHAL_DEBUG_TOOL( |
| uint8_t index; |
| CODEC_PICTURE refPic; |
| if (m_useWeightedSurfaceForL0) { |
| refPic = m_hevcSliceParams->RefPicList[LIST_0][0]; |
| index = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_refList[index]->sRefBuffer, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| "WP_In_L0"))); |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L0_START + 0), |
| CodechalDbgAttr::attrReferenceSurfaces, |
| "WP_Out_L0"))); |
| } if (m_useWeightedSurfaceForL1) { |
| refPic = m_hevcSliceParams->RefPicList[LIST_1][0]; |
| index = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_refList[index]->sRefBuffer, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| "WP_In_L1"))); |
| |
| CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| m_wpState->GetWPOutputPicList(CODEC_WP_OUTPUT_L1_START + 0), |
| CodechalDbgAttr::attrReferenceSurfaces, |
| "WP_Out_L1"))); |
| }) |
| |
| m_lastPictureCodingType = m_pictureCodingType; |
| m_lastRecycledBufIdx = m_currRecycledBufIdx; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncodeIntraDistKernel() |
| { |
| CodechalKernelIntraDist::CurbeParam curbeParam; |
| curbeParam.downScaledWidthInMb4x = m_downscaledWidthInMb4x; |
| curbeParam.downScaledHeightInMb4x = m_downscaledHeightInMb4x; |
| |
| CodechalKernelIntraDist::SurfaceParams surfaceParam; |
| surfaceParam.input4xDsSurface = |
| surfaceParam.input4xDsVmeSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER); |
| surfaceParam.intraDistSurface = m_brcDistortion; |
| surfaceParam.intraDistBottomFieldOffset = m_brcBuffers.dwMeBrcDistortionBottomFieldOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Execute(curbeParam, surfaceParam)); |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::InitKernelState() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // Init kernel state |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc()); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc()); |
| |
| // Create weighted prediction kernel state |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_wpState = MOS_New(CodechalEncodeWPG12, this)); |
| m_wpState->SetKernelBase(m_kernelBase); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_wpState->InitKernelState()); |
| // create intra distortion kernel |
| m_intraDistKernel = MOS_New(CodechalKernelIntraDist, this); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_intraDistKernel); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_intraDistKernel->Initialize( |
| GetCommonKernelHeaderAndSizeG12, |
| m_kernelBase, |
| m_kuidCommon)); |
| |
| // Create SW scoreboard init kernel state |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_swScoreboardState = MOS_New(CodechalEncodeSwScoreboardG12, this)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_swScoreboardState->InitKernelState()); |
| // Create Hme kernel |
| m_hmeKernel = MOS_New(CodechalKernelHmeG12, this); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_hmeKernel); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Initialize( |
| GetCommonKernelHeaderAndSizeG12, |
| m_kernelBase, |
| m_kuidCommon)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetDmemHuCPakIntegrate( |
| PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES || !m_brcEnabled) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| HucPakStitchDmemEncG12 *hucPakStitchDmem = (HucPakStitchDmemEncG12 *)m_osInterface->pfnLockResource( |
| m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem); |
| |
| MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG12)); |
| |
| // reset all the offsets to -1 |
| uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) + |
| sizeof(hucPakStitchDmem->VDENCSTAT_offset) + |
| sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) + |
| sizeof(hucPakStitchDmem->HEVC_Streamout_offset) + |
| sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) + |
| sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset); |
| MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF); |
| |
| uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4 |
| CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4 |
| uint16_t numTiles = numTileRows * numTileColumns; |
| uint16_t numTilesPerPipe = m_numTiles / m_numPipe; |
| |
| hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth; |
| hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight; |
| hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe; |
| hucPakStitchDmem->Codec = 1; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc |
| hucPakStitchDmem->MAXPass = m_brcEnabled ? (m_numPassesInOnePipe + 1) : 1; |
| hucPakStitchDmem->CurrentPass = (uint8_t)currentPass + 1; // // Current BRC pass [1..MAXPass] |
| hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| hucPakStitchDmem->CabacZeroWordFlag = true; // to do: set to true later |
| hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc; |
| hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE; |
| // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record |
| hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8; |
| hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE; |
| |
| hucPakStitchDmem->StitchEnable = false; |
| hucPakStitchDmem->StitchCommandOffset = 0; |
| hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END; |
| hucPakStitchDmem->brcUnderFlowEnable = false; //temporally disable underflow bit rate control in HUC fw since it need more tuning. |
| |
| PCODEC_ENCODER_SLCDATA slcData = m_slcData; |
| CODECHAL_ENCODE_CHK_NULL_RETURN(slcData); |
| uint32_t totalSliceHeaderSize = 0; |
| for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| totalSliceHeaderSize += (slcData->BitSize + 7) >> 3; |
| slcData++; |
| } |
| hucPakStitchDmem->SliceHeaderSizeinBits = totalSliceHeaderSize * 8; |
| hucPakStitchDmem->currFrameBRClevel = m_currFrameBrcLevel; |
| |
| //Set the kernel output offsets |
| hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord; |
| hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = m_hevcFrameStatsOffset.uiHevcPakStatistics; |
| hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF; |
| hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF; |
| |
| for (auto i = 0; i < m_numPipe; i++) |
| { |
| hucPakStitchDmem->NumTiles[i] = numTilesPerPipe; |
| |
| // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic. |
| // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region. |
| hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + |
| m_hevcTileStatsOffset.uiTileSizeRecord; |
| hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiHevcPakStatistics) + |
| m_hevcTileStatsOffset.uiHevcPakStatistics; |
| } |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass])); |
| |
| MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS)); |
| dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]); |
| dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE); |
| dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetRegionsHuCPakIntegrate( |
| PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || |
| (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) || |
| (currentPass != 0 && m_cqpEnabled)) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer()); |
| MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS)); |
| // Add Virtual addr |
| virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc |
| virtualAddrParams->regionParams[0].dwOffset = 0; |
| virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output |
| virtualAddrParams->regionParams[1].isWritable = true; |
| virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream |
| virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command |
| virtualAddrParams->regionParams[5].isWritable = true; |
| virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer; // Region 6 History Buffer (Input/Output) |
| virtualAddrParams->regionParams[6].isWritable = true; |
| virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command |
| virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data |
| virtualAddrParams->regionParams[9].isWritable = true; |
| virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation |
| virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc |
| virtualAddrParams->regionParams[10].isWritable = true; |
| virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer |
| virtualAddrParams->regionParams[15].dwOffset = 0; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetDmemHuCPakIntegrateCqp( |
| PMHW_VDBOX_HUC_DMEM_STATE_PARAMS dmemParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass != 0 || (!m_cqpEnabled && m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ)) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| HucPakStitchDmemEncG12 *hucPakStitchDmem = (HucPakStitchDmemEncG12 *)m_osInterface->pfnLockResource( |
| m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]), &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(hucPakStitchDmem); |
| |
| MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakStitchDmemEncG12)); |
| |
| // reset all the offsets to -1 |
| uint32_t TotalOffsetSize = sizeof(hucPakStitchDmem->TileSizeRecord_offset) + |
| sizeof(hucPakStitchDmem->VDENCSTAT_offset) + |
| sizeof(hucPakStitchDmem->HEVC_PAKSTAT_offset) + |
| sizeof(hucPakStitchDmem->HEVC_Streamout_offset) + |
| sizeof(hucPakStitchDmem->VP9_PAK_STAT_offset) + |
| sizeof(hucPakStitchDmem->Vp9CounterBuffer_offset); |
| MOS_FillMemory(hucPakStitchDmem, TotalOffsetSize, 0xFF); |
| |
| uint16_t numTileRows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| uint16_t numTileColumns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| CODECHAL_ENCODE_ASSERT(numTileColumns > 0 && numTileColumns % 2 == 0); //numTileColumns is nonzero and even number; 2 or 4 |
| CODECHAL_ENCODE_ASSERT(m_numPipe > 0 && m_numPipe % 2 == 0 && numTileColumns <= m_numPipe); //ucNumPipe is nonzero and even number; 2 or 4 |
| uint16_t numTiles = numTileRows * numTileColumns; |
| uint16_t numTilesPerPipe = m_numTiles / m_numPipe; |
| |
| hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_frameWidth; |
| hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_frameHeight; |
| hucPakStitchDmem->TotalNumberOfPAKs = m_numPipe; |
| hucPakStitchDmem->Codec = 2; //HEVC DP CQP |
| hucPakStitchDmem->MAXPass = 1; |
| hucPakStitchDmem->CurrentPass = 1; |
| hucPakStitchDmem->MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| hucPakStitchDmem->CabacZeroWordFlag = true; |
| hucPakStitchDmem->bitdepth_luma = m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->bitdepth_chroma = m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8 |
| hucPakStitchDmem->ChromaFormatIdc = m_hevcSeqParams->chroma_format_idc; |
| hucPakStitchDmem->TotalSizeInCommandBuffer = m_numTiles * CODECHAL_CACHELINE_SIZE; |
| // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record |
| hucPakStitchDmem->OffsetInCommandBuffer = m_tileParams[m_numTiles - 1].TileSizeStreamoutOffset * CODECHAL_CACHELINE_SIZE + 8; |
| hucPakStitchDmem->LastTileBS_StartInBytes = m_tileParams[m_numTiles - 1].BitstreamByteOffset * CODECHAL_CACHELINE_SIZE; |
| |
| hucPakStitchDmem->StitchEnable = false; |
| hucPakStitchDmem->StitchCommandOffset = 0; |
| hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END; |
| |
| //Set the kernel output offsets |
| hucPakStitchDmem->TileSizeRecord_offset[0] = m_hevcFrameStatsOffset.uiTileSizeRecord; |
| hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = 0xFFFFFFFF; |
| hucPakStitchDmem->HEVC_Streamout_offset[0] = 0xFFFFFFFF; |
| hucPakStitchDmem->VDENCSTAT_offset[0] = 0xFFFFFFFF; |
| |
| for (auto i = 0; i < m_numPipe; i++) |
| { |
| hucPakStitchDmem->NumTiles[i] = numTilesPerPipe; |
| |
| // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic. |
| // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region. |
| hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * m_hevcStatsSize.uiTileSizeRecord) + |
| m_hevcTileStatsOffset.uiTileSizeRecord; |
| } |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass])); |
| |
| MOS_ZeroMemory(dmemParams, sizeof(MHW_VDBOX_HUC_DMEM_STATE_PARAMS)); |
| dmemParams->presHucDataSource = &(m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass]); |
| dmemParams->dwDataLength = MOS_ALIGN_CEIL(sizeof(HucPakStitchDmemEncG12), CODECHAL_CACHELINE_SIZE); |
| dmemParams->dwDmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ConfigStitchDataBuffer() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || |
| (currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES && m_brcEnabled) || |
| (currentPass != 0 && m_cqpEnabled)) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = 1; |
| |
| HucCommandData *hucStitchDataBuf = (HucCommandData *)m_osInterface->pfnLockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], &lockFlagsWriteOnly); |
| |
| MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData)); |
| hucStitchDataBuf->TotalCommands = 1; |
| hucStitchDataBuf->InputCOM[0].SizeOfData = 0xF; |
| |
| HucInputCmdG12 hucInputCmd; |
| MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmdG12)); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface); |
| hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0; |
| hucInputCmd.CmdMode = HUC_CMD_LIST_MODE; |
| hucInputCmd.LengthOfTable = (uint8_t)(m_numTiles); |
| hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize; |
| ; |
| |
| PMOS_RESOURCE presSrc = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( |
| m_osInterface, |
| presSrc, |
| false, |
| false)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( |
| m_osInterface, |
| &m_resBitstreamBuffer, |
| true, |
| true)); |
| |
| uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc); |
| uint64_t destAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_resBitstreamBuffer); |
| hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF); |
| hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32); |
| |
| hucInputCmd.DestAddrBottom = (uint32_t)(destAddr & 0x00000000FFFFFFFF); |
| hucInputCmd.DestAddrTop = (uint32_t)((destAddr & 0xFFFFFFFF00000000) >> 32); |
| |
| MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmdG12), &hucInputCmd, sizeof(HucInputCmdG12)); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetRegionsHuCPakIntegrateCqp( |
| PMHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS virtualAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || |
| (m_hevcSeqParams->RateControlMethod != RATECONTROL_ICQ && m_brcEnabled) || |
| (currentPass != 0 && m_cqpEnabled)) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| MOS_ZeroMemory(virtualAddrParams, sizeof(MHW_VDBOX_HUC_VIRTUAL_ADDR_PARAMS)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer()); |
| |
| // Add Virtual addr |
| virtualAddrParams->regionParams[0].presRegion = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; // Region 0 - Tile based input statistics from PAK/ VDEnc |
| virtualAddrParams->regionParams[0].dwOffset = 0; |
| virtualAddrParams->regionParams[1].presRegion = &m_resHuCPakAggregatedFrameStatsBuffer.sResource; // Region 1 - HuC Frame statistics output |
| virtualAddrParams->regionParams[1].isWritable = true; |
| virtualAddrParams->regionParams[4].presRegion = &m_resBitstreamBuffer; // Region 4 - Last Tile bitstream |
| virtualAddrParams->regionParams[5].presRegion = &m_resBitstreamBuffer; // Region 5 - HuC modifies the last tile bitstream before stitch command |
| virtualAddrParams->regionParams[5].isWritable = true; |
| virtualAddrParams->regionParams[6].presRegion = &m_brcBuffers.resBrcHistoryBuffer; // Region 6 History Buffer (Input/Output) |
| virtualAddrParams->regionParams[6].isWritable = true; |
| virtualAddrParams->regionParams[7].presRegion = &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]; //&m_resHucPakStitchReadBatchBuffer; // Region 7 - HCP PIC state command |
| |
| virtualAddrParams->regionParams[9].presRegion = &m_resBrcDataBuffer; // Region 9 HuC outputs BRC data |
| virtualAddrParams->regionParams[9].isWritable = true; |
| virtualAddrParams->regionParams[8].presRegion = &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass]; // Region 8 - data buffer read by HUC for stitching cmd generation |
| virtualAddrParams->regionParams[10].presRegion = &m_HucStitchCmdBatchBuffer.OsResource; // Region 10 - SLB for stitching cmd output from Huc |
| virtualAddrParams->regionParams[10].isWritable = true; |
| virtualAddrParams->regionParams[15].presRegion = &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource; // Region 15 [In/Out] - Tile Record Buffer |
| virtualAddrParams->regionParams[15].dwOffset = 0; |
| |
| return eStatus; |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| MOS_STATUS CodechalEncHevcStateG12::ResetImgCtrlRegInPAKStatisticsBuffer( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| MOS_ZeroMemory(&storeDataParams, sizeof(storeDataParams)); |
| storeDataParams.pOsResource = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite]; |
| storeDataParams.dwResourceOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL); |
| storeDataParams.dwValue = 0; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd( |
| cmdBuffer, |
| &storeDataParams)); |
| |
| return eStatus; |
| } |
| #endif |
| |
| MOS_STATUS CodechalEncHevcStateG12::ReadBrcPakStatisticsForScalability( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams; |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams)); |
| miCpyMemMemParams.presSrc = &m_resBrcDataBuffer; |
| miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCount); |
| miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite]; |
| miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams)); |
| |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams)); |
| miCpyMemMemParams.presSrc = &m_resBrcDataBuffer; |
| miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, FrameByteCountNoHeader); |
| miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite]; |
| miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams)); |
| |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams)); |
| miCpyMemMemParams.presSrc = &m_resBrcDataBuffer; |
| miCpyMemMemParams.dwSrcOffset = CODECHAL_OFFSETOF(PakIntegrationBrcData, HCP_ImageStatusControl); |
| miCpyMemMemParams.presDst = &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite]; |
| miCpyMemMemParams.dwDstOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams)); |
| |
| uint32_t dwOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + |
| m_encodeStatusBuf.dwNumPassesOffset + // Num passes offset |
| sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource |
| |
| MHW_MI_STORE_DATA_PARAMS storeDataParams; |
| storeDataParams.pOsResource = &m_encodeStatusBuf.resStatusBuffer; |
| storeDataParams.dwResourceOffset = dwOffset; |
| storeDataParams.dwValue = (uint8_t)GetCurrentPass(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &storeDataParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::DumpHucDebugOutputBuffers() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| //only dump HuC in/out buffers in brc scalability case |
| bool dumpDebugBuffers = IsLastPipe() && (m_numPipe >= 2) && m_brcEnabled; |
| if (m_singleTaskPhaseSupported) |
| { |
| dumpDebugBuffers = dumpDebugBuffers && IsLastPass(); |
| } |
| |
| if (dumpDebugBuffers) |
| { |
| CODECHAL_DEBUG_TOOL( |
| int32_t currentPass = GetCurrentPass(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem( |
| &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass], |
| sizeof(HucPakStitchDmemEncG12), |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| // Region 7 - HEVC PIC State Command |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx], |
| 0, |
| m_hwInterface->m_vdenc2ndLevelBatchBufferSize, |
| 7, |
| "_PicState", |
| true, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| // Region 5 - Last Tile PAK Bitstream Output |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_resBitstreamBuffer, |
| 0, |
| m_encodeParams.dwBitstreamSize, |
| 5, |
| "_Bitstream", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| |
| // Region 6 - BRC History buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_brcBuffers.resBrcHistoryBuffer, |
| 0, |
| m_brcHistoryBufferSize, |
| 6, |
| "_HistoryBuffer", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| // Region 9 - HCP BRC Data Output |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_resBrcDataBuffer, |
| 0, |
| CODECHAL_CACHELINE_SIZE, |
| 9, |
| "_HcpBrcData", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| // Region 1 - Output Aggregated Frame Level Statistics |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_resHuCPakAggregatedFrameStatsBuffer.sResource, |
| 0, |
| m_hwInterface->m_pakIntAggregatedFrameStatsSize, // program exact out size |
| 1, |
| "_AggregateFrameStats", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| // Region 0 - Tile Statistics Constant Buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource, |
| 0, |
| m_hwInterface->m_pakIntTileStatsSize, |
| 0, |
| "_TileBasedStats", |
| true, |
| currentPass, |
| hucRegionDumpPakIntegrate)); |
| // Region 15 - Tile Record Buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucRegion( |
| &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource, |
| 0, |
| m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize, |
| 15, |
| "_TileRecord", |
| false, |
| currentPass, |
| hucRegionDumpPakIntegrate));) |
| } |
| |
| return eStatus; |
| } |
| |
| CodechalEncHevcStateG12::CodechalEncHevcStateG12( |
| CodechalHwInterface * hwInterface, |
| CodechalDebugInterface *debugInterface, |
| PCODECHAL_STANDARD_INFO standardInfo) |
| : CodechalEncHevcState(hwInterface, debugInterface, standardInfo) |
| { |
| m_2xMeSupported = |
| m_useCommonKernel = true; |
| m_useHwScoreboard = false; |
| #if defined(ENABLE_KERNELS) && !defined(_FULL_OPEN_SOURCE) |
| m_kernelBase = (uint8_t *)IGCODECKRN_G12; |
| #else |
| m_kernelBase = nullptr; |
| #endif |
| m_kuidCommon = IDR_CODEC_HME_DS_SCOREBOARD_KERNEL; |
| m_hucPakStitchEnabled = true; |
| m_scalabilityState = nullptr; |
| |
| MOS_ZeroMemory(&m_currPicWithReconBoundaryPix, sizeof(m_currPicWithReconBoundaryPix)); |
| MOS_ZeroMemory(&m_lcuLevelInputDataSurface, sizeof(m_lcuLevelInputDataSurface)); |
| MOS_ZeroMemory(&m_encoderHistoryInputBuffer, sizeof(m_encoderHistoryInputBuffer)); |
| MOS_ZeroMemory(&m_encoderHistoryOutputBuffer, sizeof(m_encoderHistoryOutputBuffer)); |
| MOS_ZeroMemory(&m_intermediateCuRecordSurfaceLcu32, sizeof(m_intermediateCuRecordSurfaceLcu32)); |
| MOS_ZeroMemory(&m_scratchSurface, sizeof(m_scratchSurface)); |
| MOS_ZeroMemory(&m_16x16QpInputData, sizeof(m_16x16QpInputData)); |
| MOS_ZeroMemory(m_debugSurface, sizeof(m_debugSurface)); |
| MOS_ZeroMemory(&m_encConstantTableForB, sizeof(m_encConstantTableForB)); |
| MOS_ZeroMemory(&m_mvAndDistortionSumSurface, sizeof(m_mvAndDistortionSumSurface)); |
| MOS_ZeroMemory(m_encBCombinedBuffer1, sizeof(m_encBCombinedBuffer1)); |
| MOS_ZeroMemory(m_encBCombinedBuffer2, sizeof(m_encBCombinedBuffer2)); |
| |
| MOS_ZeroMemory(&m_resPakcuLevelStreamoutData, sizeof(m_resPakcuLevelStreamoutData)); |
| MOS_ZeroMemory(&m_resPakSliceLevelStreamoutData, sizeof(m_resPakSliceLevelStreamoutData)); |
| MOS_ZeroMemory(m_resTileBasedStatisticsBuffer, sizeof(m_resTileBasedStatisticsBuffer)); |
| MOS_ZeroMemory(&m_resHuCPakAggregatedFrameStatsBuffer, sizeof(m_resHuCPakAggregatedFrameStatsBuffer)); |
| MOS_ZeroMemory(m_tileRecordBuffer, sizeof(m_tileRecordBuffer)); |
| MOS_ZeroMemory(&m_kmdVeOveride, sizeof(m_kmdVeOveride)); |
| MOS_ZeroMemory(&m_resHcpScalabilitySyncBuffer, sizeof(m_resHcpScalabilitySyncBuffer)); |
| |
| MOS_ZeroMemory(m_veBatchBuffer, sizeof(m_veBatchBuffer)); |
| MOS_ZeroMemory(&m_realCmdBuffer, sizeof(m_realCmdBuffer)); |
| MOS_ZeroMemory(&m_resBrcSemaphoreMem, sizeof(m_resBrcSemaphoreMem)); |
| MOS_ZeroMemory(&m_resBrcPakSemaphoreMem, sizeof(m_resBrcPakSemaphoreMem)); |
| MOS_ZeroMemory(&m_resPipeStartSemaMem, sizeof(m_resPipeStartSemaMem)); |
| MOS_ZeroMemory(&m_resPipeCompleteSemaMem, sizeof(m_resPipeCompleteSemaMem)); |
| MOS_ZeroMemory(m_resHucPakStitchDmemBuffer, sizeof(m_resHucPakStitchDmemBuffer)); |
| MOS_ZeroMemory(&m_resBrcDataBuffer, sizeof(m_resBrcDataBuffer)); |
| MOS_ZeroMemory(&m_skipFrameInfo.m_resMbCodeSkipFrameSurface, sizeof(m_skipFrameInfo.m_resMbCodeSkipFrameSurface)); |
| |
| m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS; |
| m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC; |
| |
| m_kuid = IDR_CODEC_HEVC_COMBINED_KENREL_INTEL; |
| MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize( |
| m_kernelBase, |
| m_kuid, |
| &m_kernelBinary, |
| &m_combinedKernelSize); |
| CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS); |
| |
| m_hwInterface->GetStateHeapSettings()->dwIshSize += |
| MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT)); |
| |
| Mos_CheckVirtualEngineSupported(m_osInterface, false, true); |
| |
| Mos_SetVirtualEngineSupported(m_osInterface, true); |
| } |
| |
| CodechalEncHevcStateG12::~CodechalEncHevcStateG12() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (m_wpState) |
| { |
| MOS_Delete(m_wpState); |
| m_wpState = nullptr; |
| } |
| |
| if (m_intraDistKernel) |
| { |
| MOS_Delete(m_intraDistKernel); |
| m_intraDistKernel = nullptr; |
| } |
| |
| if (m_swScoreboardState) |
| { |
| MOS_Delete(m_swScoreboardState); |
| m_swScoreboardState = nullptr; |
| } |
| |
| if (m_scalabilityState) |
| { |
| MOS_FreeMemAndSetNull(m_scalabilityState); |
| } |
| |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| if (m_statusReportDebugInterface != nullptr) |
| { |
| MOS_Delete(m_statusReportDebugInterface); |
| m_statusReportDebugInterface = nullptr; |
| } |
| #endif |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::Allocate(CodechalSetting *codecHalSettings) |
| { |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| if (!m_statusReportDebugInterface) |
| { |
| m_statusReportDebugInterface = MOS_New(CodechalDebugInterface); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_statusReportDebugInterface); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_statusReportDebugInterface->Initialize(m_hwInterface, codecHalSettings->codecFunction)); |
| } |
| #endif |
| |
| return CodechalEncoderState::Allocate(codecHalSettings); |
| } |
| |
| uint32_t CodechalEncHevcStateG12::CodecHalHevc_GetFileSize(char *fileName) |
| { |
| FILE * fp = nullptr; |
| uint32_t fileSize = 0; |
| MOS_SecureFileOpen(&fp, fileName, "rb"); |
| if (fp == nullptr) |
| { |
| return 0; |
| } |
| fseek(fp, 0, SEEK_END); |
| fileSize = ftell(fp); |
| fseek(fp, 0, SEEK_SET); |
| fclose(fp); |
| |
| return fileSize; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::LoadSourceAndRef2xDSFromFile( |
| PMOS_SURFACE pRef2xSurface, |
| PMOS_SURFACE pSrc2xSurface, |
| uint8_t reflist, |
| uint8_t refIdx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (m_loadKernelInput == false || (pSrc2xSurface && Mos_ResourceIsNull(&pSrc2xSurface->OsResource)) || |
| (pRef2xSurface && Mos_ResourceIsNull(&pRef2xSurface->OsResource)) || |
| (pSrc2xSurface == NULL && pRef2xSurface == NULL)) |
| { |
| return eStatus; |
| } |
| |
| char pathOfRef2xDSCmd[MOS_USER_CONTROL_MAX_DATA_SIZE]; |
| MOS_SecureStringPrint(pathOfRef2xDSCmd, |
| sizeof(pathOfRef2xDSCmd), |
| sizeof(pathOfRef2xDSCmd), |
| "%s\\Ref2xDSL%1d%1d.dat.%d", |
| m_loadKernelInputDataFolder, |
| reflist, |
| refIdx, |
| m_frameNum); |
| char pathOfSrc2xDSCmd[MOS_USER_CONTROL_MAX_DATA_SIZE]; |
| MOS_SecureStringPrint(pathOfSrc2xDSCmd, |
| sizeof(pathOfSrc2xDSCmd), |
| sizeof(pathOfSrc2xDSCmd), |
| "%s\\Src2xDS.dat.%d", |
| m_loadKernelInputDataFolder, |
| m_frameNum); |
| |
| uint32_t sizeRef2xDS = CodecHalHevc_GetFileSize(pathOfRef2xDSCmd); |
| uint32_t sizeSrc2xDS = CodecHalHevc_GetFileSize(pathOfSrc2xDSCmd); |
| if (sizeRef2xDS == 0 && sizeSrc2xDS == 0) |
| return MOS_STATUS_SUCCESS; |
| MOS_LOCK_PARAMS lockFlags; |
| |
| if (pRef2xSurface && sizeRef2xDS) |
| { |
| if (sizeRef2xDS > (pRef2xSurface->dwPitch * pRef2xSurface->dwHeight * 3 / 2)) |
| { |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, &pRef2xSurface->OsResource, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| FILE *Ref2xDS = nullptr; |
| eStatus = MOS_SecureFileOpen(&Ref2xDS, pathOfRef2xDSCmd, "rb"); |
| if (Ref2xDS == nullptr) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource); |
| return eStatus; |
| } |
| |
| uint32_t sizeToRead = sizeRef2xDS * 2 / 3; |
| if (sizeToRead != fread((void *)data, 1, sizeToRead, Ref2xDS)) |
| { |
| fclose(Ref2xDS); |
| m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource); |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| fclose(Ref2xDS); |
| //MOS_ZeroMemory(data + sizeToRead, sizeRef2xDS-sizeToRead); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &pRef2xSurface->OsResource); |
| } |
| |
| if (pSrc2xSurface && sizeSrc2xDS) |
| { |
| if (sizeSrc2xDS > (pSrc2xSurface->dwPitch * pSrc2xSurface->dwHeight * 3 / 2)) |
| { |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, &pSrc2xSurface->OsResource, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| FILE *Src2xDS = nullptr; |
| eStatus = MOS_SecureFileOpen(&Src2xDS, pathOfSrc2xDSCmd, "rb"); |
| if (Src2xDS == nullptr) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource); |
| return eStatus; |
| } |
| |
| uint32_t sizeToRead = sizeSrc2xDS * 2 / 3; |
| if (sizeToRead != fread((void *)data, 1, sizeToRead, Src2xDS)) |
| { |
| fclose(Src2xDS); |
| m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource); |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| fclose(Src2xDS); |
| //MOS_ZeroMemory(data + sizeToRead, sizeRef2xDS-sizeToRead); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &pSrc2xSurface->OsResource); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::LoadPakCommandAndCuRecordFromFile() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| char pathOfPakCmd[MOS_USER_CONTROL_MAX_DATA_SIZE]; |
| MOS_SecureStringPrint(pathOfPakCmd, |
| sizeof(pathOfPakCmd), |
| sizeof(pathOfPakCmd), |
| "%s\\PAKObj.dat.%d", |
| m_pakOnlyDataFolder, |
| m_frameNum); |
| |
| char pathOfCuRecord[MOS_USER_CONTROL_MAX_DATA_SIZE]; |
| MOS_SecureStringPrint(pathOfCuRecord, |
| sizeof(pathOfCuRecord), |
| sizeof(pathOfCuRecord), |
| "%s\\CURecord.dat.%d", |
| m_pakOnlyDataFolder, |
| m_frameNum); |
| |
| uint32_t sizePakObj = CodecHalHevc_GetFileSize(pathOfPakCmd); |
| if (sizePakObj == 0 || sizePakObj > m_mvOffset) |
| { |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| |
| uint32_t sizeCuRecord = CodecHalHevc_GetFileSize(pathOfCuRecord); |
| if (sizeCuRecord == 0 || sizeCuRecord > m_mbCodeSize - m_mvOffset) |
| { |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, &m_resMbCodeSurface, &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| FILE *pakObj = nullptr; |
| eStatus = MOS_SecureFileOpen(&pakObj, pathOfPakCmd, "rb"); |
| if (pakObj == nullptr) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface); |
| return eStatus; |
| } |
| |
| uint8_t *pakCmd = data; |
| if (sizePakObj != fread((void *)pakCmd, 1, sizePakObj, pakObj)) |
| { |
| fclose(pakObj); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface); |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| fclose(pakObj); |
| |
| uint8_t *record = data + m_mvOffset; |
| FILE * fRecord = nullptr; |
| eStatus = MOS_SecureFileOpen(&fRecord, pathOfCuRecord, "rb"); |
| if (fRecord == nullptr) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface); |
| return eStatus; |
| } |
| |
| if (sizeCuRecord != fread((void *)record, 1, sizeCuRecord, fRecord)) |
| { |
| fclose(fRecord); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface); |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| fclose(fRecord); |
| |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface); |
| |
| if (m_brcEnabled) |
| { |
| //Image State |
| char pathOfPicState[MOS_USER_CONTROL_MAX_DATA_SIZE]; |
| MOS_SecureStringPrint(pathOfPicState, |
| sizeof(pathOfPicState), |
| sizeof(pathOfPicState), |
| "%s\\BrcUpdate_ImgStateWrite.dat.%d", |
| m_pakOnlyDataFolder, |
| m_frameNum); |
| |
| uint32_t sizePicState = CodecHalHevc_GetFileSize(pathOfPicState); |
| if (sizePicState == 0) |
| { |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| |
| data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx], &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| FILE *fPicState = nullptr; |
| eStatus = MOS_SecureFileOpen(&fPicState, pathOfPicState, "rb"); |
| if (fPicState == nullptr) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]); |
| return eStatus; |
| } |
| |
| if (sizePicState != fread((void *)data, 1, sizePicState, fPicState)) |
| { |
| fclose(fPicState); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]); |
| return MOS_STATUS_INVALID_FILE_SIZE; |
| } |
| fclose(fPicState); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx]); |
| } |
| |
| return eStatus; |
| } |
| |
| uint8_t CodechalEncHevcStateG12::PicCodingTypeToSliceType(uint16_t pictureCodingType) |
| { |
| uint8_t sliceType = 0; |
| |
| switch (pictureCodingType) |
| { |
| case I_TYPE: |
| sliceType = CODECHAL_ENCODE_HEVC_I_SLICE; |
| break; |
| case P_TYPE: |
| sliceType = CODECHAL_ENCODE_HEVC_P_SLICE; |
| break; |
| case B_TYPE: |
| case B1_TYPE: |
| case B2_TYPE: |
| sliceType = CODECHAL_ENCODE_HEVC_B_SLICE; |
| break; |
| default: |
| CODECHAL_ENCODE_ASSERT(false); |
| } |
| return sliceType; |
| } |
| |
| // The following code is from the kernel ULT |
| MOS_STATUS CodechalEncHevcStateG12::InitMediaObjectWalker( |
| uint32_t threadSpaceWidth, |
| uint32_t threadSpaceHeight, |
| uint32_t colorCountMinusOne, |
| DependencyPattern dependencyPattern, |
| uint32_t childThreadNumber, |
| uint32_t localLoopExecCount, |
| MHW_WALKER_PARAMS &walkerParams) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| walkerParams.ColorCountMinusOne = colorCountMinusOne; |
| walkerParams.dwGlobalLoopExecCount = 0x3ff; |
| walkerParams.dwLocalLoopExecCount = 0x3ff; |
| |
| if (dependencyPattern == dependencyWavefrontHorizontal) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = 0; |
| walkerParams.LocalInnerLoopUnit.y = 1; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 0; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 0; |
| } |
| else if (dependencyPattern == dependencyWavefrontVertical) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 0; |
| walkerParams.LocalOutLoopStride.y = 1; |
| walkerParams.LocalInnerLoopUnit.x = 1; |
| walkerParams.LocalInnerLoopUnit.y = 0; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 0; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 0; |
| } |
| else if (dependencyPattern == dependencyWavefront45Degree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -1; |
| walkerParams.LocalInnerLoopUnit.y = 1; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 0; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 0; |
| } |
| else if (dependencyPattern == dependencyWavefront26Degree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -2; |
| walkerParams.LocalInnerLoopUnit.y = 1; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 0; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 0; |
| } |
| else if ((dependencyPattern == dependencyWavefront45XDegree) || |
| (dependencyPattern == dependencyWavefront45XDegreeAlt)) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -1; |
| walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = childThreadNumber; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 1; |
| } |
| else if ((dependencyPattern == dependencyWavefront26XDegree) || |
| (dependencyPattern == dependencyWavefront26XDegreeAlt)) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -2; |
| walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = childThreadNumber; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 1; |
| } |
| else if (dependencyPattern == dependencyWavefront45XVp9Degree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -1; |
| walkerParams.LocalInnerLoopUnit.y = 4; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 3; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 1; |
| } |
| else if (dependencyPattern == dependencyWavefront26ZDegree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = 2; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = -4; |
| walkerParams.GlobalInnerLoopUnit.y = 2; |
| |
| // Local |
| walkerParams.BlockResolution.x = 2; |
| walkerParams.BlockResolution.y = 2; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 0; |
| walkerParams.LocalOutLoopStride.y = 1; |
| walkerParams.LocalInnerLoopUnit.x = 1; |
| walkerParams.LocalInnerLoopUnit.y = 0; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 0; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 0; |
| } |
| else if (dependencyPattern == dependencyWavefront26ZigDegree) |
| { |
| int32_t size_x = threadSpaceWidth; //(threadSpaceWidth + 1)>> 1; |
| int32_t size_y = threadSpaceHeight; //threadSpaceHeight << 1; |
| |
| // Global |
| walkerParams.GlobalResolution.x = size_x; |
| walkerParams.GlobalResolution.y = size_y; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = size_x; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = size_y; |
| |
| // Local |
| walkerParams.BlockResolution.x = size_x; |
| walkerParams.BlockResolution.y = size_y; |
| walkerParams.LocalStart.x = 0; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -2; |
| walkerParams.LocalInnerLoopUnit.y = 4; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 3; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 1; |
| } |
| else if (dependencyPattern == dependencyWavefront45DDegree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = threadSpaceWidth; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -1; |
| walkerParams.LocalInnerLoopUnit.y = 1; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 0; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 0; |
| if (colorCountMinusOne > 0) |
| { |
| walkerParams.dwLocalLoopExecCount = localLoopExecCount; |
| } |
| } |
| else if (dependencyPattern == dependencyWavefront26DDegree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = threadSpaceWidth; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -2; |
| walkerParams.LocalInnerLoopUnit.y = 1; |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = 0; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 0; |
| |
| if (colorCountMinusOne > 0) |
| { |
| walkerParams.dwLocalLoopExecCount = localLoopExecCount; |
| } |
| } |
| else if (dependencyPattern == dependencyWavefront45XDDegree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = threadSpaceWidth; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -1; |
| walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1; |
| |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = childThreadNumber; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 1; |
| if (colorCountMinusOne > 0) |
| { |
| walkerParams.dwLocalLoopExecCount = localLoopExecCount; |
| } |
| } |
| else if (dependencyPattern == dependencyWavefront26XDDegree) |
| { |
| // Global |
| walkerParams.GlobalResolution.x = threadSpaceWidth; |
| walkerParams.GlobalResolution.y = threadSpaceHeight; |
| walkerParams.GlobalStart.x = 0; |
| walkerParams.GlobalStart.y = 0; |
| walkerParams.GlobalOutlerLoopStride.x = threadSpaceWidth; |
| walkerParams.GlobalOutlerLoopStride.y = 0; |
| walkerParams.GlobalInnerLoopUnit.x = 0; |
| walkerParams.GlobalInnerLoopUnit.y = threadSpaceHeight; |
| // Local |
| walkerParams.BlockResolution.x = threadSpaceWidth; |
| walkerParams.BlockResolution.y = threadSpaceHeight; |
| walkerParams.LocalStart.x = threadSpaceWidth; |
| walkerParams.LocalStart.y = 0; |
| walkerParams.LocalOutLoopStride.x = 1; |
| walkerParams.LocalOutLoopStride.y = 0; |
| walkerParams.LocalInnerLoopUnit.x = -2; |
| walkerParams.LocalInnerLoopUnit.y = childThreadNumber + 1; |
| // Mid |
| walkerParams.MiddleLoopExtraSteps = childThreadNumber; |
| walkerParams.MidLoopUnitX = 0; |
| walkerParams.MidLoopUnitY = 1; |
| |
| if (colorCountMinusOne > 0) |
| { |
| walkerParams.dwLocalLoopExecCount = localLoopExecCount; |
| } |
| } |
| else |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported walking pattern is observed\n"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| } |
| return eStatus; |
| } |
| |
| bool CodechalEncHevcStateG12::IsDegree45Needed() |
| { |
| if (m_numberConcurrentGroup == 1 && m_numberEncKernelSubThread == 1) |
| { |
| return false; |
| } |
| return true; |
| } |
| |
| void CodechalEncHevcStateG12::DecideConcurrentGroupAndWaveFrontNumber() |
| { |
| uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t widthInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1), shift); |
| uint32_t heightInLcu = MOS_ROUNDUP_SHIFT((m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1), shift); |
| DependencyPattern walkerDegree; |
| |
| //As per kernel ULT,for all non TU1 cases m_numberEncKernelSubThread should be set to 1 |
| // LCU32 has no multiple thread support, |
| if (!m_isMaxLcu64 || m_hevcSeqParams->TargetUsage != 1) |
| { |
| m_numberEncKernelSubThread = 1; // LCU32 has no multiple thread support |
| } |
| |
| while (heightInLcu / m_numberConcurrentGroup == 0) |
| { |
| m_numberConcurrentGroup = m_numberConcurrentGroup >> 1; |
| if (m_numberConcurrentGroup == 0) |
| { |
| // Try out all values and now have to use the default ones. |
| // Concurrent group and wave-front split must be enabled together |
| m_numberConcurrentGroup = 1; |
| break; |
| } |
| } |
| |
| if (m_numberConcurrentGroup > 1) |
| { |
| m_numWavefrontInOneRegion = 0; |
| while (m_numWavefrontInOneRegion == 0) |
| { |
| uint32_t shift = m_degree45Needed ? 0 : 1; |
| |
| m_numWavefrontInOneRegion = |
| (widthInLcu + ((heightInLcu - 1) << shift) + m_numberConcurrentGroup - 1) / m_numberConcurrentGroup; |
| |
| if (m_numWavefrontInOneRegion > 0) |
| { |
| // this is a valid setting and number of regisions is greater than or equal to 1 |
| break; |
| } |
| m_numberConcurrentGroup = m_numberConcurrentGroup >> 1; |
| if (m_numberConcurrentGroup == 0) |
| { |
| // Try out all values and now have to use the default ones. |
| m_numberConcurrentGroup = 1; |
| break; |
| } |
| } |
| } |
| else |
| { |
| m_numWavefrontInOneRegion = 0; |
| } |
| |
| m_numberEncKernelSubThread = MOS_MIN(m_numberEncKernelSubThread, m_hevcThreadTaskDataNum); |
| |
| return; |
| } |
| |
| void CodechalEncHevcStateG12::InitSwScoreBoardParams(CodechalEncodeSwScoreboard::KernelParams &swScoreboardKernelParames) |
| { |
| uint32_t widthAlignedMaxLcu; |
| uint32_t heightAlignedMaxLcu; |
| uint32_t widthAlignedLcu32; |
| uint32_t heightAlignedLcu32; |
| |
| if (m_mfeEnabled && m_colorBitMfeEnabled) |
| { |
| widthAlignedMaxLcu = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxWidth, MAX_LCU_SIZE); |
| heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxHeight, MAX_LCU_SIZE); |
| widthAlignedLcu32 = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxWidth, 32); |
| heightAlignedLcu32 = MOS_ALIGN_CEIL(m_mfeEncodeParams.maxHeight, 32); |
| } |
| else |
| { |
| widthAlignedMaxLcu = m_widthAlignedMaxLcu; |
| heightAlignedMaxLcu = m_heightAlignedMaxLcu; |
| widthAlignedLcu32 = m_widthAlignedLcu32; |
| heightAlignedLcu32 = m_heightAlignedLcu32; |
| } |
| |
| // SW scoreboard Kernel Call -- to be continued - DS + HME kernel call |
| swScoreboardKernelParames.isHevc = false; // can be set to false. Need to enabled only for an optimization which is not needed for now |
| |
| m_degree45Needed = true; |
| if (m_hevcSeqParams->TargetUsage == 1) |
| { |
| m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU1, m_numberConcurrentGroup); |
| // m_numberConcurrentGroup should default to 2 here for TU1. the only other value allowed from reg key will be 1 |
| m_degree45Needed = false; |
| } |
| else if (m_hevcSeqParams->TargetUsage == 4) |
| { |
| m_numberConcurrentGroup = MOS_MIN(m_maxWavefrontsforTU4, m_numberConcurrentGroup); |
| } |
| DecideConcurrentGroupAndWaveFrontNumber(); |
| |
| DependencyPattern walkPattern; |
| if (m_hevcSeqParams->TargetUsage == 1) |
| { |
| if (m_isMaxLcu64) |
| { |
| walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26XDegreeAlt : dependencyWavefront26XDDegree; |
| } |
| else |
| { |
| walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront26Degree : dependencyWavefront26DDegree; |
| } |
| } |
| else if (m_hevcSeqParams->TargetUsage == 4) |
| { |
| walkPattern = m_numberConcurrentGroup == 1 ? dependencyWavefront45Degree : dependencyWavefront45DDegree; |
| } |
| else |
| { |
| walkPattern = dependencyWavefront45DDegree; |
| } |
| m_swScoreboardState->SetDependencyPattern(walkPattern); |
| |
| if (m_isMaxLcu64) |
| { |
| if (m_hevcSeqParams->TargetUsage == 1) |
| { |
| swScoreboardKernelParames.scoreboardWidth = (widthAlignedMaxLcu >> 6); |
| swScoreboardKernelParames.scoreboardHeight = (heightAlignedMaxLcu >> 6) * m_numberEncKernelSubThread; |
| } |
| else |
| { |
| swScoreboardKernelParames.scoreboardWidth = 2 * (widthAlignedMaxLcu >> 6); |
| swScoreboardKernelParames.scoreboardHeight = 2 * (heightAlignedMaxLcu >> 6); |
| } |
| swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup; |
| swScoreboardKernelParames.numberOfChildThread = m_numberEncKernelSubThread - 1; // child thread number is minus one of the total sub-thread for the main thread takes one. |
| } |
| else |
| { |
| swScoreboardKernelParames.scoreboardWidth = widthAlignedLcu32 >> 5; |
| swScoreboardKernelParames.scoreboardHeight = heightAlignedLcu32 >> 5; |
| swScoreboardKernelParames.numberOfWaveFrontSplit = m_numberConcurrentGroup; |
| swScoreboardKernelParames.numberOfChildThread = 0; |
| } |
| |
| swScoreboardKernelParames.swScoreboardSurfaceWidth = swScoreboardKernelParames.scoreboardWidth; |
| swScoreboardKernelParames.swScoreboardSurfaceHeight = swScoreboardKernelParames.scoreboardHeight; |
| |
| m_swScoreboardState->SetCurSwScoreboardSurfaceIndex(m_currRecycledBufIdx); |
| |
| swScoreboardKernelParames.lcuInfoSurface = &m_lcuLevelInputDataSurface[m_currRecycledBufIdx]; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::UserFeatureKeyReport() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::UserFeatureKeyReport()); |
| #if (_DEBUG || _RELEASE_INTERNAL) |
| CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID, m_numberConcurrentGroup); |
| CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_SUBTHREAD_NUM_ID_G12, m_numberEncKernelSubThread); |
| CodecHalEncode_WriteKey64(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ENABLE_VE_DEBUG_OVERRIDE, m_kmdVeOveride.Value); |
| |
| if (m_pakOnlyTest) |
| { |
| CodecHalEncode_WriteStringKey(__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_PAK_ONLY_ID, m_pakOnlyDataFolder, strlen(m_pakOnlyDataFolder)); |
| } |
| CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENCODE_USED_VDBOX_NUM_ID, m_numPipe); |
| CodecHalEncode_WriteKey(__MEDIA_USER_FEATURE_VALUE_ENABLE_ENCODE_VE_CTXSCHEDULING_ID, MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)); |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetupSwScoreBoard(CodechalEncodeSwScoreboard::KernelParams *params) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (Mos_ResourceIsNull(&m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource)) |
| { |
| MOS_ZeroMemory(m_swScoreboardState->GetCurSwScoreboardSurface(), sizeof(*m_swScoreboardState->GetCurSwScoreboardSurface())); |
| |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D; |
| MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBuffer2D.Type = MOS_GFXRES_2D; |
| allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR; |
| allocParamsForBuffer2D.Format = Format_R32U; |
| allocParamsForBuffer2D.dwWidth = params->swScoreboardSurfaceWidth; |
| allocParamsForBuffer2D.dwHeight = params->swScoreboardSurfaceHeight; |
| allocParamsForBuffer2D.pBufName = "SW Scoreboard Init buffer"; |
| |
| eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBuffer2D, |
| &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo( |
| m_osInterface, |
| m_swScoreboardState->GetCurSwScoreboardSurface())); |
| } |
| |
| if (m_swScoreboard == nullptr) |
| { |
| m_swScoreboard = (uint8_t *)MOS_AllocAndZeroMemory(params->scoreboardWidth * sizeof(uint32_t) * params->scoreboardHeight); |
| InitSWScoreboard(m_swScoreboard, params->scoreboardWidth, params->scoreboardHeight, m_swScoreboardState->GetDependencyPattern(), (char)(params->numberOfChildThread)); |
| } |
| |
| MOS_LOCK_PARAMS lockFlags; |
| |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource, |
| &lockFlags); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| for (uint32_t h = 0; h < params->scoreboardHeight; h++) |
| { |
| uint32_t s = params->scoreboardWidth * sizeof(uint32_t); |
| MOS_SecureMemcpy(data, s, &m_swScoreboard[h * s], s); |
| data += m_swScoreboardState->GetCurSwScoreboardSurface()->dwPitch; |
| } |
| |
| m_osInterface->pfnUnlockResource( |
| m_osInterface, |
| &m_swScoreboardState->GetCurSwScoreboardSurface()->OsResource); |
| |
| return eStatus; |
| } |
| |
| void CodechalEncHevcStateG12::SetDependency( |
| uint8_t &numDependencies, |
| char * scoreboardDeltaX, |
| char * scoreboardDeltaY, |
| uint32_t dependencyPattern, |
| char childThreadNumber) |
| { |
| if (dependencyPattern == dependencyWavefrontHorizontal) |
| { |
| numDependencies = m_numDependencyHorizontal; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyHorizontal, m_dxWavefrontHorizontal, m_numDependencyHorizontal); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyHorizontal, m_dyWavefrontHorizontal, m_numDependencyHorizontal); |
| } |
| else if (dependencyPattern == dependencyWavefrontVertical) |
| { |
| numDependencies = m_numDependencyVertical; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyVertical, m_dxWavefrontVertical, m_numDependencyVertical); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyVertical, m_dyWavefrontVertical, m_numDependencyVertical); |
| } |
| else if (dependencyPattern == dependencyWavefront45Degree) |
| { |
| numDependencies = m_numDependency45Degree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree); |
| } |
| else if (dependencyPattern == dependencyWavefront26Degree || |
| dependencyPattern == dependencyWavefront26DDegree) |
| { |
| numDependencies = m_numDependency26Degree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26Degree, m_dxWavefront26Degree, m_numDependency26Degree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26Degree, m_dyWavefront26Degree, m_numDependency26Degree); |
| } |
| else if (dependencyPattern == dependencyWavefront45XDegree) |
| { |
| numDependencies = m_numDependency45xDegree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegree, m_dxWavefront45xDegree, m_numDependency45xDegree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegree, m_dyWavefront45xDegree, m_numDependency45xDegree); |
| numDependencies = childThreadNumber + 2; |
| scoreboardDeltaY[0] = childThreadNumber; |
| } |
| else if (dependencyPattern == dependencyWavefront26XDegree) |
| { |
| numDependencies = m_numDependency26xDegree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegree, m_dxWavefront26xDegree, m_numDependency26xDegree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegree, m_dyWavefront26xDegree, m_numDependency26xDegree); |
| numDependencies = childThreadNumber + 3; |
| scoreboardDeltaY[0] = childThreadNumber; |
| } |
| else if ((dependencyPattern == dependencyWavefront45XDegreeAlt) || |
| (dependencyPattern == dependencyWavefront45XDDegree)) |
| { |
| numDependencies = m_numDependency45xDegreeAlt; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xDegreeAlt, m_dxWavefront45xDegreeAlt, m_numDependency45xDegreeAlt); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xDegreeAlt, m_dyWavefront45xDegreeAlt, m_numDependency45xDegreeAlt); |
| scoreboardDeltaY[0] = childThreadNumber; |
| } |
| else if ((dependencyPattern == dependencyWavefront26XDegreeAlt) || |
| (dependencyPattern == dependencyWavefront26XDDegree)) |
| { |
| numDependencies = m_numDependency26xDegreeAlt; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26xDegreeAlt, m_dxWavefront26xDegreeAlt, m_numDependency26xDegreeAlt); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26xDegreeAlt, m_dyWavefront26xDegreeAlt, m_numDependency26xDegreeAlt); |
| scoreboardDeltaY[0] = childThreadNumber; |
| } |
| else if (dependencyPattern == dependencyWavefront45XVp9Degree) |
| { |
| numDependencies = m_numDependency45xVp9Degree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45xVp9Degree, m_dxWavefront45xVp9Degree, m_numDependency45xVp9Degree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45xVp9Degree, m_dyWavefront45xVp9Degree, m_numDependency45xVp9Degree); |
| } |
| else if (dependencyPattern == dependencyWavefront26ZDegree) |
| { |
| numDependencies = m_numDependency26zDegree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26zDegree, m_dxWavefront26zDegree, m_numDependency26zDegree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26zDegree, m_dyWavefront26zDegree, m_numDependency26zDegree); |
| } |
| else if (dependencyPattern == dependencyWavefront26ZigDegree) |
| { |
| numDependencies = m_numDependency26ZigDegree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency26ZigDegree, m_dxWavefront26ZigDegree, m_numDependency26ZigDegree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency26ZigDegree, m_dyWavefront26ZigDegree, m_numDependency26ZigDegree); |
| } |
| else if (dependencyPattern == dependencyWavefront45DDegree) |
| { |
| numDependencies = m_numDependency45Degree; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependency45Degree, m_dxWavefront45Degree, m_numDependency45Degree); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependency45Degree, m_dyWavefront45Degree, m_numDependency45Degree); |
| } |
| else |
| { |
| numDependencies = m_numDependencyNone; |
| MOS_SecureMemcpy(scoreboardDeltaX, m_numDependencyNone, m_dxWavefrontNone, m_numDependencyNone); |
| MOS_SecureMemcpy(scoreboardDeltaY, m_numDependencyNone, m_dyWavefrontNone, m_numDependencyNone); |
| } |
| } |
| |
| // ======================================================================================== |
| // FUNCTION: InitSWScoreboard |
| // DESCRIPTION: Initialize software scoreboard for a specific dependency pattern. |
| // INPUTS: scoreboardWidth - Width of scoreboard in Entries |
| // scoreboardHeight - Height of scoreboard in Entries |
| // dependencyPattern - The Enumeration of the Dependency Pattern |
| // OUTPUTS: scoreboard - Pointer to scoreboard in Memory |
| // ======================================================================================== |
| void CodechalEncHevcStateG12::InitSWScoreboard(uint8_t *scoreboard, uint32_t scoreboardWidth, uint32_t scoreboardHeight, uint32_t dependencyPattern, char childThreadNumber) |
| { |
| // 1. Select Dependency Pattern |
| uint8_t numDependencies; |
| char scoreboardDeltaX[m_maxNumDependency]; |
| char scoreboardDeltaY[m_maxNumDependency]; |
| memset(scoreboardDeltaX, 0, sizeof(scoreboardDeltaX)); |
| memset(scoreboardDeltaY, 0, sizeof(scoreboardDeltaY)); |
| |
| SetDependency(numDependencies, scoreboardDeltaX, scoreboardDeltaY, dependencyPattern, childThreadNumber); |
| |
| // 2. Initialize scoreboard (CPU Based) |
| int32_t dependentLocationX = 0; |
| int32_t dependentLocationY = 0; |
| uint32_t *scoreboardInDws = (uint32_t *)scoreboard; |
| int32_t totalThreadNumber = childThreadNumber + 1; |
| for (int32_t y = 0; y < (int32_t)scoreboardHeight; y += totalThreadNumber) |
| { |
| for (int32_t x = 0; x < (int32_t)scoreboardWidth; x++) |
| { |
| scoreboardInDws[y * scoreboardWidth + x] = 0; |
| |
| // Add dependencies accordingly |
| for (int32_t i = 0; i < numDependencies; i++) |
| { |
| dependentLocationX = x + scoreboardDeltaX[i]; |
| dependentLocationY = y + scoreboardDeltaY[i]; |
| if ((dependentLocationX < 0) || (dependentLocationY < 0) || |
| (dependentLocationX >= (int32_t)scoreboardWidth) || |
| (dependentLocationY >= (int32_t)scoreboardHeight)) |
| { |
| // Do not add dependency because thread does not exist |
| } |
| else |
| { |
| scoreboardInDws[y * scoreboardWidth + x] |= (1 << i); |
| } |
| } // End NumDep |
| } // End x |
| |
| for (int32_t n = y + 1; n < y + totalThreadNumber; n++) |
| { |
| for (int32_t k = 0; k < (int32_t)scoreboardWidth; k++) |
| { |
| scoreboardInDws[n * scoreboardWidth + k] = scoreboardInDws[y * scoreboardWidth + k]; |
| } |
| } |
| |
| } // End y |
| } |
| |
| void CodechalEncHevcStateG12::CreateMhwParams() |
| { |
| m_sliceStateParams = MOS_New(MHW_VDBOX_HEVC_SLICE_STATE_G12); |
| m_pipeModeSelectParams = MOS_New(MHW_VDBOX_PIPE_MODE_SELECT_PARAMS_G12); |
| m_pipeBufAddrParams = MOS_New(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS_G12); |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::CalculatePictureStateCommandSize() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MHW_VDBOX_STATE_CMDSIZE_PARAMS_G12 stateCmdSizeParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| m_hwInterface->GetHxxStateCommandSize( |
| CODECHAL_ENCODE_MODE_HEVC, |
| &m_defaultPictureStatesSize, |
| &m_defaultPicturePatchListSize, |
| &stateCmdSizeParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AddHcpPipeBufAddrCmd( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| *m_pipeBufAddrParams = {}; |
| SetHcpPipeBufAddrParams(*m_pipeBufAddrParams); |
| #ifdef _MMC_SUPPORTED |
| m_mmcState->SetPipeBufAddr(m_pipeBufAddrParams); |
| #endif |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpPipeBufAddrCmd(cmdBuffer, m_pipeBufAddrParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetTileData( |
| MHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 *tileCodingParams, |
| uint32_t bitstreamBufSize) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| return eStatus; |
| } |
| |
| uint32_t colBd[100] = {0}; |
| uint32_t num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| for (uint32_t i = 0; i < num_tile_columns; i++) |
| { |
| colBd[i + 1] = colBd[i] + m_hevcPicParams->tile_column_width[i]; |
| } |
| |
| uint32_t rowBd[100] = {0}; |
| uint32_t num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| for (uint32_t i = 0; i < num_tile_rows; i++) |
| { |
| rowBd[i + 1] = rowBd[i] + m_hevcPicParams->tile_row_height[i]; |
| } |
| |
| m_numTiles = num_tile_rows * num_tile_columns; |
| |
| uint32_t const uiNumCuRecordTab[] = {1, 4, 16, 64}; //LCU: 8x8->1, 16x16->4, 32x32->16, 64x64->64 |
| uint32_t numCuRecord = uiNumCuRecordTab[MOS_MIN(3, m_hevcSeqParams->log2_max_coding_block_size_minus3)]; |
| uint32_t bitstreamByteOffset = 0, saoRowstoreOffset = 0, cuLevelStreamoutOffset = 0, sseRowstoreOffset = 0; |
| int32_t frameWidthInMinCb = m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1; |
| int32_t frameHeightInMinCb = m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1; |
| int32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t NumLCUInPic = 0; |
| |
| for (uint32_t i = 0; i < num_tile_rows; i++) |
| { |
| for (uint32_t j = 0; j < num_tile_columns; j++) |
| { |
| NumLCUInPic += m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j]; |
| } |
| } |
| |
| uint32_t numSliceInTile = 0; |
| for (uint32_t uiNumLCUsInTiles = 0, i = 0; i < num_tile_rows; i++) |
| { |
| for (uint32_t j = 0; j < num_tile_columns; j++) |
| { |
| uint32_t idx = i * num_tile_columns + j; |
| uint32_t numLCUInTile = m_hevcPicParams->tile_row_height[i] * m_hevcPicParams->tile_column_width[j]; |
| |
| tileCodingParams[idx].TileStartLCUX = colBd[j]; |
| tileCodingParams[idx].TileStartLCUY = rowBd[i]; |
| |
| tileCodingParams[idx].TileColumnStoreSelect = j % 2; |
| tileCodingParams[idx].TileRowStoreSelect = i % 2; |
| |
| if (j != num_tile_columns - 1) |
| { |
| tileCodingParams[idx].TileWidthInMinCbMinus1 = (m_hevcPicParams->tile_column_width[j] << shift) - 1; |
| tileCodingParams[idx].IsLastTileofRow = false; |
| } |
| else |
| { |
| tileCodingParams[idx].TileWidthInMinCbMinus1 = (frameWidthInMinCb - (colBd[j] << shift)) - 1; |
| tileCodingParams[idx].IsLastTileofRow = true; |
| } |
| |
| if (i != num_tile_rows - 1) |
| { |
| tileCodingParams[idx].IsLastTileofColumn = false; |
| tileCodingParams[idx].TileHeightInMinCbMinus1 = (m_hevcPicParams->tile_row_height[i] << shift) - 1; |
| } |
| else |
| { |
| tileCodingParams[idx].TileHeightInMinCbMinus1 = (frameHeightInMinCb - (rowBd[i] << shift)) - 1; |
| tileCodingParams[idx].IsLastTileofColumn = true; |
| } |
| |
| tileCodingParams[idx].NumOfTilesInFrame = m_numTiles; |
| tileCodingParams[idx].NumOfTileColumnsInFrame = num_tile_columns; |
| tileCodingParams[idx].CuRecordOffset = MOS_ALIGN_CEIL(((numCuRecord * uiNumLCUsInTiles) * m_hcpInterface->GetHevcEncCuRecordSize()), |
| CODECHAL_CACHELINE_SIZE) / |
| CODECHAL_CACHELINE_SIZE; |
| tileCodingParams[idx].NumberOfActiveBePipes = (m_numPipe > 1) ? m_numPipe : 1; |
| |
| tileCodingParams[idx].PakTileStatisticsOffset = m_sizeOfHcpPakFrameStats * idx / CODECHAL_CACHELINE_SIZE; |
| tileCodingParams[idx].TileSizeStreamoutOffset = idx; |
| tileCodingParams[idx].Vp9ProbabilityCounterStreamoutOffset = 0; |
| tileCodingParams[idx].presHcpSyncBuffer = &m_resHcpScalabilitySyncBuffer.sResource; |
| tileCodingParams[idx].CuLevelStreamoutOffset = cuLevelStreamoutOffset; |
| tileCodingParams[idx].SliceSizeStreamoutOffset = numSliceInTile; |
| tileCodingParams[idx].SseRowstoreOffset = sseRowstoreOffset; |
| tileCodingParams[idx].BitstreamByteOffset = bitstreamByteOffset; |
| tileCodingParams[idx].SaoRowstoreOffset = saoRowstoreOffset; |
| |
| cuLevelStreamoutOffset += MOS_ALIGN_CEIL((tileCodingParams[idx].TileWidthInMinCbMinus1 + 1) * (tileCodingParams[idx].TileHeightInMinCbMinus1 + 1) * 16, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE; |
| sseRowstoreOffset += ((m_hevcPicParams->tile_column_width[j] + 3) * m_sizeOfSseSrcPixelRowStoreBufferPerLcu) / CODECHAL_CACHELINE_SIZE; |
| saoRowstoreOffset += (MOS_ALIGN_CEIL(m_hevcPicParams->tile_column_width[j], 4) * CODECHAL_HEVC_SAO_STRMOUT_SIZE_PERLCU) / CODECHAL_CACHELINE_SIZE; |
| uint64_t totalSizeTemp = (uint64_t)bitstreamBufSize * (uint64_t)numLCUInTile; |
| uint32_t bitStreamSizePerTile = (uint32_t)(totalSizeTemp / (uint64_t)NumLCUInPic) + ((totalSizeTemp % (uint64_t)NumLCUInPic) ? 1 : 0); |
| bitstreamByteOffset += MOS_ALIGN_CEIL(bitStreamSizePerTile, CODECHAL_CACHELINE_SIZE) / CODECHAL_CACHELINE_SIZE; |
| uiNumLCUsInTiles += numLCUInTile; |
| |
| for (uint32_t slcCount = 0; slcCount < m_numSlices; slcCount++) |
| { |
| bool lastSliceInTile = false, sliceInTile = false; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(IsSliceInTile(slcCount, |
| &tileCodingParams[idx], |
| &sliceInTile, |
| &lastSliceInTile)); |
| numSliceInTile += (sliceInTile ? 1 : 0); |
| } |
| } |
| // same row store buffer for different tile rows. |
| saoRowstoreOffset = 0; |
| sseRowstoreOffset = 0; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::IsSliceInTile( |
| uint32_t sliceNumber, |
| PMHW_VDBOX_HCP_TILE_CODING_PARAMS_G12 currentTile, |
| bool * sliceInTile, |
| bool * lastSliceInTile) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(currentTile); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(sliceInTile); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(lastSliceInTile); |
| |
| uint32_t shift = m_hevcSeqParams->log2_max_coding_block_size_minus3 - m_hevcSeqParams->log2_min_coding_block_size_minus3; |
| uint32_t residual = (1 << shift) - 1; |
| uint32_t frameWidthInLCU = (m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1 + residual) >> shift; |
| uint32_t frameHeightInLCU = (m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1 + residual) >> shift; |
| |
| PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = &m_hevcSliceParams[sliceNumber]; |
| uint32_t sliceStartLCU = hevcSlcParams->slice_segment_address; |
| uint32_t sliceLCUx = sliceStartLCU % frameWidthInLCU; |
| uint32_t sliceLCUy = sliceStartLCU / frameWidthInLCU; |
| |
| uint32_t tile_column_width = (currentTile->TileWidthInMinCbMinus1 + 1 + residual) >> shift; |
| uint32_t tile_row_height = (currentTile->TileHeightInMinCbMinus1 + 1 + residual) >> shift; |
| if (sliceLCUx < currentTile->TileStartLCUX || |
| sliceLCUy < currentTile->TileStartLCUY || |
| sliceLCUx >= currentTile->TileStartLCUX + tile_column_width || |
| sliceLCUy >= currentTile->TileStartLCUY + tile_row_height) |
| { |
| // slice start is not in the tile boundary |
| *lastSliceInTile = *sliceInTile = false; |
| return eStatus; |
| } |
| |
| sliceLCUx += (hevcSlcParams->NumLCUsInSlice - 1) % tile_column_width; |
| sliceLCUy += (hevcSlcParams->NumLCUsInSlice - 1) / tile_column_width; |
| |
| if (sliceLCUx >= currentTile->TileStartLCUX + tile_column_width) |
| { |
| sliceLCUx -= tile_column_width; |
| sliceLCUy++; |
| } |
| |
| if (sliceLCUx < currentTile->TileStartLCUX || |
| sliceLCUy < currentTile->TileStartLCUY || |
| sliceLCUx >= currentTile->TileStartLCUX + tile_column_width || |
| sliceLCUy >= currentTile->TileStartLCUY + tile_row_height) |
| { |
| // last LCU of the slice is out of the tile boundary |
| *lastSliceInTile = *sliceInTile = false; |
| return eStatus; |
| } |
| |
| *sliceInTile = true; |
| |
| sliceLCUx++; |
| sliceLCUy++; |
| |
| // the end of slice is at the boundary of tile |
| *lastSliceInTile = (sliceLCUx == currentTile->TileStartLCUX + tile_column_width && |
| sliceLCUy == currentTile->TileStartLCUY + tile_row_height); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AddHcpRefIdxCmd( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| PMHW_BATCH_BUFFER batchBuffer, |
| PMHW_VDBOX_HEVC_SLICE_STATE params) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcSliceParams); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params->pEncodeHevcPicParams); |
| |
| if (cmdBuffer == nullptr && batchBuffer == nullptr) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("There was no valid buffer to add the HW command to."); |
| return MOS_STATUS_NULL_POINTER; |
| } |
| |
| PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = params->pEncodeHevcPicParams; |
| PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = params->pEncodeHevcSliceParams; |
| |
| if (hevcSlcParams->slice_type != CODECHAL_ENCODE_HEVC_I_SLICE) |
| { |
| MHW_VDBOX_HEVC_REF_IDX_PARAMS_G12 refIdxParams; |
| |
| refIdxParams.CurrPic = hevcPicParams->CurrReconstructedPic; |
| refIdxParams.isEncode = true; |
| refIdxParams.ucList = LIST_0; |
| refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l0_active_minus1 + 1; |
| eStatus = MOS_SecureMemcpy(&refIdxParams.RefPicList, sizeof(refIdxParams.RefPicList), &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList)); |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Failed to copy memory."); |
| return eStatus; |
| } |
| |
| refIdxParams.hevcRefList = (void **)m_refList; |
| refIdxParams.poc_curr_pic = hevcPicParams->CurrPicOrderCnt; |
| for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++) |
| { |
| refIdxParams.poc_list[i] = hevcPicParams->RefFramePOCList[i]; |
| } |
| |
| refIdxParams.pRefIdxMapping = params->pRefIdxMapping; |
| refIdxParams.RefFieldPicFlag = 0; // there is no interlaced support in encoder |
| refIdxParams.RefBottomFieldFlag = 0; // there is no interlaced support in encoder |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams)); |
| |
| if (hevcSlcParams->slice_type == CODECHAL_ENCODE_HEVC_B_SLICE) |
| { |
| refIdxParams.ucList = LIST_1; |
| refIdxParams.ucNumRefForList = hevcSlcParams->num_ref_idx_l1_active_minus1 + 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpRefIdxStateCmd(cmdBuffer, batchBuffer, &refIdxParams)); |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SendPrologWithFrameTracking( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| bool frameTrackingRequested, |
| MHW_MI_MMIOREGISTERS *mmioRegister) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| MOS_GPU_CONTEXT gpuContext = m_osInterface->pfnGetGpuContext(m_osInterface); |
| |
| MHW_MI_FORCE_WAKEUP_PARAMS forceWakeupParams; |
| MOS_ZeroMemory(&forceWakeupParams, sizeof(MHW_MI_FORCE_WAKEUP_PARAMS)); |
| forceWakeupParams.bMFXPowerWellControl = false; |
| forceWakeupParams.bMFXPowerWellControlMask = true; |
| forceWakeupParams.bHEVCPowerWellControl = true; |
| forceWakeupParams.bHEVCPowerWellControlMask = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiForceWakeupCmd( |
| cmdBuffer, |
| &forceWakeupParams)); |
| |
| if (UseRenderCommandBuffer()) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncoderState::SendPrologWithFrameTracking(cmdBuffer, frameTrackingRequested, mmioRegister)); |
| return eStatus; |
| } |
| |
| #ifdef _MMC_SUPPORTED |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(m_miInterface, cmdBuffer, gpuContext)); |
| #endif |
| |
| if (!IsLastPipe()) |
| { |
| return eStatus; |
| } |
| |
| PMOS_COMMAND_BUFFER commandBufferInUse; |
| if (m_realCmdBuffer.pCmdBase) |
| { |
| commandBufferInUse = &m_realCmdBuffer; |
| } |
| else if (cmdBuffer && cmdBuffer->pCmdBase) |
| { |
| commandBufferInUse = cmdBuffer; |
| } |
| else |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| // initialize command buffer attributes |
| commandBufferInUse->Attributes.bTurboMode = m_hwInterface->m_turboMode; |
| commandBufferInUse->Attributes.dwNumRequestedEUSlices = m_hwInterface->m_numRequestedEuSlices; |
| commandBufferInUse->Attributes.dwNumRequestedSubSlices = m_hwInterface->m_numRequestedSubSlices; |
| commandBufferInUse->Attributes.dwNumRequestedEUs = m_hwInterface->m_numRequestedEus; |
| commandBufferInUse->Attributes.bValidPowerGatingRequest = true; |
| |
| if (frameTrackingRequested && m_frameTrackingEnabled) |
| { |
| commandBufferInUse->Attributes.bEnableMediaFrameTracking = true; |
| commandBufferInUse->Attributes.resMediaFrameTrackingSurface = |
| m_encodeStatusBuf.resStatusBuffer; |
| commandBufferInUse->Attributes.dwMediaFrameTrackingTag = m_storeData; |
| // Set media frame tracking address offset(the offset from the encoder status buffer page) |
| commandBufferInUse->Attributes.dwMediaFrameTrackingAddrOffset = 0; |
| } |
| |
| MHW_GENERIC_PROLOG_PARAMS genericPrologParams; |
| MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams)); |
| genericPrologParams.pOsInterface = m_hwInterface->GetOsInterface(); |
| genericPrologParams.pvMiInterface = m_hwInterface->GetMiInterface(); |
| genericPrologParams.bMmcEnabled = CodecHalMmcState::IsMmcEnabled(); |
| genericPrologParams.dwStoreDataValue = m_storeData - 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(commandBufferInUse, &genericPrologParams)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::InitMmcState() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| #ifdef _MMC_SUPPORTED |
| m_mmcState = MOS_New(CodechalMmcEncodeHevcG12, m_hwInterface, this); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState); |
| #endif |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| #if USE_CODECHAL_DEBUG_TOOL |
| |
| //MOS_STATUS CodechalEncHevcStateG12::CodecHal_DbgDumpHEVCMbEncCurbeG12( |
| // CodechalDebugInterface *pDebugInterface, |
| // CODECHAL_MEDIA_STATE_TYPE Function, |
| // PMOS_RESOURCE presDBuffer) |
| //{ |
| //#define WRITE_CURBE_FIELD_TO_FILE(field) {\ |
| // MOS_SecureStringPrint(sOutBuf, sizeof(sOutBuf), sizeof(sOutBuf), "field = %d\n", pCurbeData->field);\ |
| // CodecHal_DbgAddStringToBufferNewLine(&FileParams, sOutBuf);} |
| // |
| // PMOS_INTERFACE m_osInterface = nullptr; |
| // PCCHAR pcFunction = nullptr; |
| // char sAttrib[125]; |
| // char sOutBuf[MAX_FIELD_LENGTH]; |
| // CODECHAL_DBG_FILE_PARAMS FileParams; |
| // MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| // MOS_LOCK_PARAMS LockFlags; |
| // CodechalEncHevcStateG12::MBENC_COMBINED_BUFFER1 *pEncComBuf1 = nullptr; |
| // |
| // CODECHAL_DEBUG_FUNCTION_ENTER; |
| // |
| // CODECHAL_DEBUG_CHK_NULL(pDebugInterface); |
| // CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pOsInterface); |
| // CODECHAL_DEBUG_CHK_NULL(pDebugInterface->pHwInterface); |
| // m_osInterface = pDebugInterface->pOsInterface; |
| // |
| // pcFunction = CodecHal_DbgGetFunctionType( |
| // pDebugInterface, Function, DBG_CMD_BUFFER_DUMP_DEFAULT); |
| // CODECHAL_DEBUG_CHK_NULL(pcFunction); |
| // |
| // MOS_SecureStringPrint(sAttrib, sizeof(sAttrib), sizeof(sAttrib), "%s%s", pcFunction, CODECHAL_DBG_STRING_CURBE); |
| // |
| // MOS_ZeroMemory(&LockFlags, sizeof(MOS_LOCK_PARAMS)); |
| // LockFlags.ReadOnly = 1; |
| // |
| // pEncComBuf1 = (CodechalEncHevcStateG12::MBENC_COMBINED_BUFFER1*)m_osInterface->pfnLockResource( |
| // m_osInterface, |
| // presDBuffer, |
| // &LockFlags); |
| // |
| // FileParams = g_cInitDbgFileParams; |
| // |
| // if (!CodecHal_DbgAttribIsEnabled(pDebugInterface, sAttrib)) |
| // { |
| // return eStatus; |
| // } |
| // |
| // MOS_ZeroMemory(pDebugInterface->sPath, sizeof(pDebugInterface->sPath)); |
| // |
| // CODECHAL_DEBUG_CHK_STATUS(CodecHal_DbgConstructFilenameString( |
| // pDebugInterface, |
| // pcFunction, |
| // CODECHAL_DBG_STRING_CURBE, |
| // CODECHAL_DBG_STRING_TXT)); |
| // |
| // if (CodecHal_DbgAttribIsEnabled(pDebugInterface, CODECHAL_DBG_STRING_DUMPDATAINBINARY)) |
| // { |
| // CODECHAL_DEBUG_CHK_STATUS(CodecHal_DbgDumpBufferInHexDwords( |
| // pDebugInterface, |
| // (uint8_t*)&pEncComBuf1->Curbe, |
| // sizeof(pEncComBuf1->Curbe))); |
| // } |
| // else |
| // { |
| // CodechalEncHevcStateG12::MBENC_CURBE* pCurbeData = &pEncComBuf1->Curbe; |
| // |
| // FileParams.lRemaining = sizeof(char)* MAX_FIELD_LENGTH * MAX_NUM_ATTRIBUTES; |
| // FileParams.psWriteToFile = (char*)MOS_AllocAndZeroMemory(FileParams.lRemaining); |
| // CODECHAL_DEBUG_CHK_NULL(FileParams.psWriteToFile); |
| // FileParams.dwOffset = 0; |
| // |
| // memset(sOutBuf, 0, sizeof(sOutBuf)); |
| // |
| // MOS_SecureStringPrint(sOutBuf, sizeof(sOutBuf), sizeof(sOutBuf), "# CURBE Parameters:"); |
| // CodecHal_DbgAddStringToBufferNewLine(&FileParams, sOutBuf); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(FrameWidthInSamples); |
| // WRITE_CURBE_FIELD_TO_FILE(FrameHeightInSamples); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(Log2MaxCUSize); |
| // WRITE_CURBE_FIELD_TO_FILE(Log2MinCUSize); |
| // WRITE_CURBE_FIELD_TO_FILE(Log2MaxTUSize); |
| // WRITE_CURBE_FIELD_TO_FILE(Log2MinTUSize); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxIntraRdeIter); |
| // WRITE_CURBE_FIELD_TO_FILE(QPType); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthInter); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxTransformDepthIntra); |
| // WRITE_CURBE_FIELD_TO_FILE(Log2ParallelMergeLevel); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(CornerNeighborPixel); |
| // WRITE_CURBE_FIELD_TO_FILE(IntraNeighborAvailFlags); |
| // WRITE_CURBE_FIELD_TO_FILE(ChromaFormatType); |
| // WRITE_CURBE_FIELD_TO_FILE(SubPelMode); |
| // WRITE_CURBE_FIELD_TO_FILE(InterSADMeasure); |
| // WRITE_CURBE_FIELD_TO_FILE(IntraSADMeasure); |
| // WRITE_CURBE_FIELD_TO_FILE(IntraPrediction); |
| // WRITE_CURBE_FIELD_TO_FILE(RefIDCostMode); |
| // WRITE_CURBE_FIELD_TO_FILE(TUBasedCostSetting); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(ExplictModeEn); |
| // WRITE_CURBE_FIELD_TO_FILE(AdaptiveEn); |
| // WRITE_CURBE_FIELD_TO_FILE(EarlyImeSuccessEn); |
| // WRITE_CURBE_FIELD_TO_FILE(IntraSpeedMode); |
| // WRITE_CURBE_FIELD_TO_FILE(IMECostCentersSel); |
| // WRITE_CURBE_FIELD_TO_FILE(RDEQuantRoundValue); |
| // WRITE_CURBE_FIELD_TO_FILE(IMERefWindowSize); |
| // WRITE_CURBE_FIELD_TO_FILE(IntraComputeType); |
| // WRITE_CURBE_FIELD_TO_FILE(Depth0IntraPredition); |
| // WRITE_CURBE_FIELD_TO_FILE(TUDepthControl); |
| // WRITE_CURBE_FIELD_TO_FILE(IntraTuRecFeedbackDisable); |
| // WRITE_CURBE_FIELD_TO_FILE(MergeListBiDisable); |
| // WRITE_CURBE_FIELD_TO_FILE(EarlyImeStop); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(FrameQP); |
| // WRITE_CURBE_FIELD_TO_FILE(FrameQPSign); |
| // WRITE_CURBE_FIELD_TO_FILE(ConcurrentGroupNum); |
| // WRITE_CURBE_FIELD_TO_FILE(NumofUnitInWaveFront); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(LoadBalenceEnable); |
| // WRITE_CURBE_FIELD_TO_FILE(NumberofMultiFrame); |
| // WRITE_CURBE_FIELD_TO_FILE(Degree45); |
| // WRITE_CURBE_FIELD_TO_FILE(Break12Dependency); |
| // WRITE_CURBE_FIELD_TO_FILE(ThreadNumber); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_B); |
| // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_P); |
| // WRITE_CURBE_FIELD_TO_FILE(Pic_init_qp_I); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(NumofRowTile); |
| // WRITE_CURBE_FIELD_TO_FILE(NumofColumnTile); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(TransquantBypassEnableFlag); |
| // WRITE_CURBE_FIELD_TO_FILE(PCMEnabledFlag); |
| // WRITE_CURBE_FIELD_TO_FILE(CuQpDeltaEnabledFlag); |
| // WRITE_CURBE_FIELD_TO_FILE(Stepping); |
| // WRITE_CURBE_FIELD_TO_FILE(WaveFrontSplitsEnable); |
| // WRITE_CURBE_FIELD_TO_FILE(HMEFlag); |
| // WRITE_CURBE_FIELD_TO_FILE(SuperHME); |
| // WRITE_CURBE_FIELD_TO_FILE(UltraHME); |
| // WRITE_CURBE_FIELD_TO_FILE(Cu64SkipCheckOnly); |
| // WRITE_CURBE_FIELD_TO_FILE(EnableCu64Check); |
| // WRITE_CURBE_FIELD_TO_FILE(Cu642Nx2NCheckOnly); |
| // WRITE_CURBE_FIELD_TO_FILE(EnableCu64AmpCheck); |
| // WRITE_CURBE_FIELD_TO_FILE(DisablePIntra); |
| // WRITE_CURBE_FIELD_TO_FILE(DisableIntraTURec); |
| // WRITE_CURBE_FIELD_TO_FILE(InheritIntraModeFromTU0); |
| // WRITE_CURBE_FIELD_TO_FILE(CostScalingForRA); |
| // WRITE_CURBE_FIELD_TO_FILE(DisableIntraNxN); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL0); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxRefIdxL1); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxBRefIdxL0); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermination); |
| // WRITE_CURBE_FIELD_TO_FILE(SkipEarlyTermSize); |
| // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Enable); |
| // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Order); |
| // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Th); |
| // WRITE_CURBE_FIELD_TO_FILE(DynamicOrderTh); |
| // WRITE_CURBE_FIELD_TO_FILE(PerBFrameQPOffset); |
| // WRITE_CURBE_FIELD_TO_FILE(IncreaseExitThresh); |
| // WRITE_CURBE_FIELD_TO_FILE(Dynamic64Min32); |
| // WRITE_CURBE_FIELD_TO_FILE(LastFrameIsIntra); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(LenSP); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxNumSU); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(CostTableIndex); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(SliceType); |
| // WRITE_CURBE_FIELD_TO_FILE(TemporalMvpEnableFlag); |
| // WRITE_CURBE_FIELD_TO_FILE(CollocatedFromL0Flag); |
| // WRITE_CURBE_FIELD_TO_FILE(theSameRefList); |
| // WRITE_CURBE_FIELD_TO_FILE(IsLowDelay); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxNumMergeCand); |
| // WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL0); |
| // WRITE_CURBE_FIELD_TO_FILE(NumRefIdxL1); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_0); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_0); |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_1); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_1); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_2); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_2); |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_3); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_3); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_4); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_4); |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_5); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_5); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_6); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_6); |
| // WRITE_CURBE_FIELD_TO_FILE(FwdPocNumber_L0_mTb_7); |
| // WRITE_CURBE_FIELD_TO_FILE(BwdPocNumber_L1_mTb_7); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L0); |
| // WRITE_CURBE_FIELD_TO_FILE(LongTermReferenceFlags_L1); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(RefFrameWinWidth); |
| // WRITE_CURBE_FIELD_TO_FILE(RefFrameWinHeight); |
| // |
| // WRITE_CURBE_FIELD_TO_FILE(RoundingInter); |
| // WRITE_CURBE_FIELD_TO_FILE(RoundingIntra); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxThreadWidth); |
| // WRITE_CURBE_FIELD_TO_FILE(MaxThreadHeight); |
| // |
| // CODECHAL_DEBUG_CHK_STATUS(MOS_WriteFileFromPtr( |
| // pDebugInterface->sPath, |
| // FileParams.psWriteToFile, |
| // FileParams.dwOffset)); |
| // } |
| // |
| //finish: |
| // if (m_osInterface && pEncComBuf1) |
| // { |
| // m_osInterface->pfnUnlockResource( |
| // m_osInterface, |
| // presDBuffer); |
| // } |
| // |
| // if (FileParams.psWriteToFile) |
| // { |
| // MOS_FreeMemory(FileParams.psWriteToFile); |
| // } |
| // return eStatus; |
| //} |
| |
| #endif |
| MOS_STATUS CodechalEncHevcStateG12::VerifyCommandBufferSize() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode & resize CommandBuffer Size for every BRC pass |
| if (!m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| return eStatus; |
| } |
| |
| // virtual engine |
| uint32_t requestedSize = |
| m_pictureStatesSize + |
| m_extraPictureStatesSize + |
| (m_sliceStatesSize * m_numSlices); |
| |
| requestedSize += (requestedSize * m_numPassesInOnePipe + m_hucCommandsSize); |
| |
| // Running in the multiple VDBOX mode |
| int currentPipe = GetCurrentPipe(); |
| if (currentPipe < 0 || currentPipe >= m_numPipe) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (IsFirstPipe() && m_osInterface->bUsesPatchList) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable()); |
| } |
| |
| PMOS_COMMAND_BUFFER pCmdBuffer = m_singleTaskPhaseSupported ? &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : &m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass]; |
| |
| if (Mos_ResourceIsNull(&pCmdBuffer->OsResource) || |
| m_sizeOfVeBatchBuffer < requestedSize) |
| { |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = requestedSize; |
| allocParamsForBufferLinear.pBufName = "Batch buffer for each VDBOX"; |
| |
| if (!Mos_ResourceIsNull(&pCmdBuffer->OsResource)) |
| { |
| if (pCmdBuffer->pCmdBase) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &pCmdBuffer->OsResource); |
| } |
| m_osInterface->pfnFreeResource(m_osInterface, &pCmdBuffer->OsResource); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &pCmdBuffer->OsResource)); |
| |
| m_sizeOfVeBatchBuffer = requestedSize; |
| } |
| |
| if (pCmdBuffer->pCmdBase == nullptr) |
| { |
| MOS_LOCK_PARAMS lockParams; |
| MOS_ZeroMemory(&lockParams, sizeof(lockParams)); |
| lockParams.WriteOnly = true; |
| pCmdBuffer->pCmdPtr = pCmdBuffer->pCmdBase = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, &pCmdBuffer->OsResource, &lockParams); |
| pCmdBuffer->iRemaining = m_sizeOfVeBatchBuffer; |
| pCmdBuffer->iOffset = 0; |
| |
| if (pCmdBuffer->pCmdBase == nullptr) |
| { |
| eStatus = MOS_STATUS_NULL_POINTER; |
| return eStatus; |
| } |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::GetCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface); |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode |
| m_realCmdBuffer.pCmdBase = m_realCmdBuffer.pCmdPtr = nullptr; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, cmdBuffer, 0)); |
| return eStatus; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &m_realCmdBuffer, 0)); |
| |
| int currentPipe = GetCurrentPipe(); |
| if (currentPipe < 0 || currentPipe >= m_numPipe) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| *cmdBuffer = m_singleTaskPhaseSupported ? m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][0] : m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][currentPass]; |
| |
| if (m_osInterface->osCpInterface->IsCpEnabled() && cmdBuffer->iOffset == 0) |
| { |
| // Insert CP Prolog |
| CODECHAL_ENCODE_NORMALMESSAGE("Adding cp prolog for secure scalable encode"); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetCpInterface()->AddProlog(m_osInterface, cmdBuffer)); |
| } |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, cmdBuffer, 0); |
| return eStatus; |
| } |
| |
| int currentPipe = GetCurrentPipe(); |
| if (currentPipe < 0 || currentPipe >= m_numPipe) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass; |
| m_veBatchBuffer[m_virtualEngineBbIndex][currentPipe][passIndex] = *cmdBuffer; |
| m_osInterface->pfnReturnCommandBuffer(m_osInterface, &m_realCmdBuffer, 0); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SubmitCommandBuffer( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| bool nullRendering) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| |
| if (UseRenderCommandBuffer() || m_numPipe == 1) |
| { |
| // legacy mode |
| if (!UseRenderCommandBuffer()) // Set VE Hints for video contexts only |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(cmdBuffer)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, cmdBuffer, nullRendering)); |
| return eStatus; |
| } |
| |
| bool cmdBufferReadyForSubmit = IsLastPipe(); |
| |
| // In STF, Hold the command buffer submission till last pass |
| if (m_singleTaskPhaseSupported) |
| { |
| cmdBufferReadyForSubmit = cmdBufferReadyForSubmit && IsLastPass(); |
| } |
| |
| if (!cmdBufferReadyForSubmit) |
| { |
| return eStatus; |
| } |
| |
| int currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass; |
| |
| for (uint32_t i = 0; i < m_numPipe; i++) |
| { |
| PMOS_COMMAND_BUFFER cmdBuffer = &m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex]; |
| |
| if (cmdBuffer->pCmdBase) |
| { |
| m_osInterface->pfnUnlockResource(m_osInterface, &cmdBuffer->OsResource); |
| } |
| |
| cmdBuffer->pCmdBase = 0; |
| cmdBuffer->iOffset = cmdBuffer->iRemaining = 0; |
| } |
| m_sizeOfVeBatchBuffer = 0; |
| |
| if (eStatus == MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetAndPopulateVEHintParams(&m_realCmdBuffer)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &m_realCmdBuffer, nullRendering)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetSliceStructs() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| eStatus = CodechalEncodeHevcBase::SetSliceStructs(); |
| m_numPassesInOnePipe = m_numPasses; |
| m_numPasses = (m_numPasses + 1) * m_numPipe - 1; |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AllocateTileStatistics() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (!m_hevcPicParams->tiles_enabled_flag) |
| { |
| return eStatus; |
| } |
| |
| auto num_tile_rows = m_hevcPicParams->num_tile_rows_minus1 + 1; |
| auto num_tile_columns = m_hevcPicParams->num_tile_columns_minus1 + 1; |
| auto num_tiles = num_tile_rows * num_tile_columns; |
| |
| MOS_ZeroMemory(&m_hevcFrameStatsOffset, sizeof(HEVC_TILE_STATS_INFO)); |
| MOS_ZeroMemory(&m_hevcTileStatsOffset, sizeof(HEVC_TILE_STATS_INFO)); |
| MOS_ZeroMemory(&m_hevcStatsSize, sizeof(HEVC_TILE_STATS_INFO)); |
| |
| MOS_LOCK_PARAMS lockFlagsWriteOnly; |
| MOS_ZeroMemory(&lockFlagsWriteOnly, sizeof(MOS_LOCK_PARAMS)); |
| lockFlagsWriteOnly.WriteOnly = true; |
| |
| // Set the maximum size based on frame level statistics. |
| m_hevcStatsSize.uiTileSizeRecord = CODECHAL_CACHELINE_SIZE; |
| m_hevcStatsSize.uiHevcPakStatistics = m_sizeOfHcpPakFrameStats; |
| m_hevcStatsSize.uiVdencStatistics = 0; |
| m_hevcStatsSize.uiHevcSliceStreamout = CODECHAL_CACHELINE_SIZE; |
| |
| // Maintain the offsets to use for patching addresses in to the HuC Pak Integration kernel Aggregated Frame Statistics Output Buffer |
| // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions |
| m_hevcFrameStatsOffset.uiTileSizeRecord = 0; // Tile Size Record is not present in resHuCPakAggregatedFrameStatsBuffer |
| m_hevcFrameStatsOffset.uiHevcPakStatistics = 0; |
| m_hevcFrameStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcPakStatistics + m_hevcStatsSize.uiHevcPakStatistics, CODECHAL_PAGE_SIZE); |
| m_hevcFrameStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiVdencStatistics + m_hevcStatsSize.uiVdencStatistics, CODECHAL_PAGE_SIZE); |
| |
| // Frame level statistics |
| m_hwInterface->m_pakIntAggregatedFrameStatsSize = MOS_ALIGN_CEIL(m_hevcFrameStatsOffset.uiHevcSliceStreamout + (m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6), CODECHAL_PAGE_SIZE); |
| |
| // HEVC Frame Statistics Buffer - Output from HuC PAK Integration kernel |
| if (Mos_ResourceIsNull(&m_resHuCPakAggregatedFrameStatsBuffer.sResource)) |
| { |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntAggregatedFrameStatsSize; |
| allocParamsForBufferLinear.pBufName = "GEN11 HCP Aggregated Frame Statistics Streamout Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resHuCPakAggregatedFrameStatsBuffer.sResource)); |
| m_resHuCPakAggregatedFrameStatsBuffer.dwSize = m_hwInterface->m_pakIntAggregatedFrameStatsSize; |
| |
| uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resHuCPakAggregatedFrameStatsBuffer.sResource, |
| &lockFlagsWriteOnly); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(pData); |
| MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resHuCPakAggregatedFrameStatsBuffer.sResource); |
| } |
| |
| // Maintain the offsets to use for patching addresses in to the Tile Based Statistics Buffer |
| // Each offset needs to be page aligned as the combined region is fed into different page aligned HuC regions |
| m_hevcTileStatsOffset.uiTileSizeRecord = 0; // TileReord is in a separated resource |
| m_hevcTileStatsOffset.uiHevcPakStatistics = 0; // PakStaticstics is head of m_resTileBasedStatisticsBuffer |
| m_hevcTileStatsOffset.uiVdencStatistics = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcPakStatistics + (m_hevcStatsSize.uiHevcPakStatistics * num_tiles), CODECHAL_PAGE_SIZE); |
| m_hevcTileStatsOffset.uiHevcSliceStreamout = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiVdencStatistics + (m_hevcStatsSize.uiVdencStatistics * num_tiles), CODECHAL_PAGE_SIZE); |
| // Combined statistics size for all tiles |
| m_hwInterface->m_pakIntTileStatsSize = MOS_ALIGN_CEIL(m_hevcTileStatsOffset.uiHevcSliceStreamout + m_hevcStatsSize.uiHevcSliceStreamout * CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6, CODECHAL_PAGE_SIZE); |
| |
| // Tile size record size for all tiles |
| m_hwInterface->m_tileRecordSize = m_hevcStatsSize.uiTileSizeRecord * num_tiles; |
| |
| if (Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource) || m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_pakIntTileStatsSize) |
| { |
| if (!Mos_ResourceIsNull(&m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource)) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource); |
| } |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = m_hwInterface->m_pakIntTileStatsSize; |
| allocParamsForBufferLinear.pBufName = "GEN11 HCP Tile Level Statistics Streamout Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource)); |
| m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_pakIntTileStatsSize; |
| |
| uint8_t *pData = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource, |
| &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(pData); |
| |
| MOS_ZeroMemory(pData, allocParamsForBufferLinear.dwBytes); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource); |
| } |
| |
| if (Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource) || m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize < m_hwInterface->m_tileRecordSize) |
| { |
| if (!Mos_ResourceIsNull(&m_tileRecordBuffer[m_virtualEngineBbIndex].sResource)) |
| { |
| m_osInterface->pfnFreeResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource); |
| } |
| MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; |
| MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); |
| allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; |
| allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; |
| allocParamsForBufferLinear.Format = Format_Buffer; |
| allocParamsForBufferLinear.dwBytes = m_hwInterface->m_tileRecordSize; |
| allocParamsForBufferLinear.pBufName = "Tile Record Buffer"; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource( |
| m_osInterface, |
| &allocParamsForBufferLinear, |
| &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource)); |
| m_tileRecordBuffer[m_virtualEngineBbIndex].dwSize = m_hwInterface->m_tileRecordSize; |
| |
| uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( |
| m_osInterface, |
| &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource, |
| &lockFlagsWriteOnly); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(data); |
| |
| MOS_ZeroMemory(data, allocParamsForBufferLinear.dwBytes); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_tileRecordBuffer[m_virtualEngineBbIndex].sResource); |
| } |
| |
| return eStatus; |
| } |
| |
| void CodechalEncHevcStateG12::SetHcpPipeBufAddrParams(MHW_VDBOX_PIPE_BUF_ADDR_PARAMS &pipeBufAddrParams) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CodechalEncodeHevcBase::SetHcpPipeBufAddrParams(pipeBufAddrParams); |
| |
| // SAO Row Store is GEN12 specific |
| pipeBufAddrParams.presSaoRowStoreBuffer = &m_SAORowStoreBuffer; |
| |
| PCODECHAL_ENCODE_BUFFER tileStatisticsBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex]; |
| if (!Mos_ResourceIsNull(&tileStatisticsBuffer->sResource) && (m_numPipe > 1)) |
| { |
| pipeBufAddrParams.presLcuBaseAddressBuffer = &tileStatisticsBuffer->sResource; |
| pipeBufAddrParams.dwLcuStreamOutOffset = m_hevcTileStatsOffset.uiHevcSliceStreamout; |
| pipeBufAddrParams.presFrameStatStreamOutBuffer = &tileStatisticsBuffer->sResource; |
| pipeBufAddrParams.dwFrameStatStreamOutOffset = m_hevcTileStatsOffset.uiHevcPakStatistics; |
| } |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::ReadSseStatistics(PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| if (!m_sseEnabled) |
| { |
| return eStatus; |
| } |
| |
| // encodeStatus is offset by 2 DWs in the resource |
| uint32_t sseOffsetinBytes = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2 + m_encodeStatusBuf.dwSumSquareErrorOffset; |
| for (auto i = 0; i < 6; i++) // 64 bit SSE values for luma/ chroma channels need to be copied |
| { |
| MHW_MI_COPY_MEM_MEM_PARAMS miCpyMemMemParams; |
| MOS_ZeroMemory(&miCpyMemMemParams, sizeof(miCpyMemMemParams)); |
| miCpyMemMemParams.presSrc = m_hevcPicParams->tiles_enabled_flag && (m_numPipe > 1) ? &m_resHuCPakAggregatedFrameStatsBuffer.sResource : &m_resFrameStatStreamOutBuffer; |
| miCpyMemMemParams.dwSrcOffset = (HEVC_PAK_STATISTICS_SSE_OFFSET + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma |
| miCpyMemMemParams.presDst = &m_encodeStatusBuf.resStatusBuffer; |
| miCpyMemMemParams.dwDstOffset = sseOffsetinBytes + i * sizeof(uint32_t); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiCopyMemMemCmd(cmdBuffer, &miCpyMemMemParams)); |
| } |
| return eStatus; |
| } |
| |
| void CodechalEncHevcStateG12::SetHcpIndObjBaseAddrParams(MHW_VDBOX_IND_OBJ_BASE_ADDR_PARAMS &indObjBaseAddrParams) |
| { |
| PCODECHAL_ENCODE_BUFFER tileRecordBuffer = &m_tileRecordBuffer[m_virtualEngineBbIndex]; |
| bool useTileRecordBuffer = !Mos_ResourceIsNull(&tileRecordBuffer->sResource); |
| |
| MOS_ZeroMemory(&indObjBaseAddrParams, sizeof(indObjBaseAddrParams)); |
| indObjBaseAddrParams.Mode = CODECHAL_ENCODE_MODE_HEVC; |
| indObjBaseAddrParams.presMvObjectBuffer = IsPanicModePass() ? &m_skipFrameInfo.m_resMbCodeSkipFrameSurface : &m_resMbCodeSurface; |
| indObjBaseAddrParams.dwMvObjectOffset = m_mvOffset; |
| indObjBaseAddrParams.dwMvObjectSize = m_mbCodeSize - m_mvOffset; |
| indObjBaseAddrParams.presPakBaseObjectBuffer = &m_resBitstreamBuffer; |
| indObjBaseAddrParams.dwPakBaseObjectSize = m_bitstreamUpperBound; |
| indObjBaseAddrParams.presPakTileSizeStasBuffer = useTileRecordBuffer ? &tileRecordBuffer->sResource : nullptr; |
| indObjBaseAddrParams.dwPakTileSizeStasBufferSize = useTileRecordBuffer ? m_hwInterface->m_tileRecordSize : 0; |
| indObjBaseAddrParams.dwPakTileSizeRecordOffset = useTileRecordBuffer ? m_hevcTileStatsOffset.uiTileSizeRecord : 0; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::UpdateCmdBufAttribute( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| bool renderEngineInUse) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| // should not be there. Will remove it in the next change |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| if (MOS_VE_SUPPORTED(m_osInterface) && cmdBuffer->Attributes.pAttriVe) |
| { |
| PMOS_CMD_BUF_ATTRI_VE attriExt = |
| (PMOS_CMD_BUF_ATTRI_VE)(cmdBuffer->Attributes.pAttriVe); |
| |
| memset(attriExt, 0, sizeof(MOS_CMD_BUF_ATTRI_VE)); |
| attriExt->bUseVirtualEngineHint = |
| attriExt->VEngineHintParams.NeedSyncWithPrevious = !renderEngineInUse; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::SetAndPopulateVEHintParams( |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (!MOS_VE_SUPPORTED(m_osInterface)) |
| { |
| return eStatus; |
| } |
| |
| CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS scalSetParms; |
| MOS_ZeroMemory(&scalSetParms, sizeof(CODECHAL_ENCODE_SCALABILITY_SETHINT_PARMS)); |
| |
| if (!MOS_VE_CTXBASEDSCHEDULING_SUPPORTED(m_osInterface)) |
| { |
| scalSetParms.bNeedSyncWithPrevious = true; |
| } |
| |
| if (m_numPipe >= 2) |
| { |
| int32_t currentPass = GetCurrentPass(); |
| if (currentPass < 0 || currentPass >= CODECHAL_HEVC_MAX_NUM_BRC_PASSES) |
| { |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| uint8_t passIndex = m_singleTaskPhaseSupported ? 0 : currentPass; |
| for (auto i = 0; i < m_numPipe; i++) |
| { |
| scalSetParms.veBatchBuffer[i] = m_veBatchBuffer[m_virtualEngineBbIndex][i][passIndex].OsResource; |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_SetHintParams(this, m_scalabilityState, &scalSetParms)); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalEncodeScalability_PopulateHintParams(m_scalabilityState, cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::AddMediaVfeCmd( |
| PMOS_COMMAND_BUFFER cmdBuffer, |
| SendKernelCmdsParams *params) |
| { |
| CODECHAL_ENCODE_CHK_NULL_RETURN(params); |
| |
| MHW_VFE_PARAMS_G12 vfeParams = {}; |
| vfeParams.pKernelState = params->pKernelState; |
| vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL; |
| vfeParams.dwMaximumNumberofThreads = m_encodeVfeMaxThreads; |
| vfeParams.bFusedEuDispatch = false; // legacy mode |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams)); |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| #if USE_CODECHAL_DEBUG_TOOL |
| MOS_STATUS CodechalEncHevcStateG12::DumpFrameStatsBuffer(CodechalDebugInterface *debugInterface) |
| { |
| CODECHAL_ENCODE_CHK_NULL_RETURN(debugInterface); |
| |
| PMOS_RESOURCE resBuffer = &m_resFrameStatStreamOutBuffer; |
| uint32_t offset = 0; |
| uint32_t num_tiles = 1; |
| //In scalable mode, HEVC PAK Frame Statistics gets dumped out for each tile |
| if (m_numPipe > 1) |
| { |
| resBuffer = &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource; |
| offset = m_hevcTileStatsOffset.uiHevcPakStatistics; |
| num_tiles = (m_hevcPicParams->num_tile_rows_minus1 + 1) * (m_hevcPicParams->num_tile_columns_minus1 + 1); |
| } |
| uint32_t size = MOS_ALIGN_CEIL(m_sizeOfHcpPakFrameStats * num_tiles, CODECHAL_CACHELINE_SIZE); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( |
| resBuffer, |
| CodechalDbgAttr::attrFrameState, |
| "FrameStatus", |
| size, |
| offset, |
| CODECHAL_NUM_MEDIA_STATES)); |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_STATUS CodechalEncHevcStateG12::DumpPakOutput() |
| { |
| std::string currPassName = "PAK_PASS" + std::to_string((int)m_currPass); |
| |
| CODECHAL_DEBUG_TOOL( |
| int32_t currentPass = GetCurrentPass(); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_resPakcuLevelStreamoutData.sResource, |
| CodechalDbgAttr::attrCUStreamout, |
| currPassName.data(), |
| m_resPakcuLevelStreamoutData.dwSize, |
| 0, |
| CODECHAL_NUM_MEDIA_STATES)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].sResource, |
| CodechalDbgAttr::attrTileBasedStats, |
| currPassName.data(), |
| m_resTileBasedStatisticsBuffer[m_virtualEngineBbIndex].dwSize, |
| 0, |
| CODECHAL_NUM_MEDIA_STATES)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForWrite], |
| CodechalDbgAttr::attrBrcPakStats, |
| currPassName.data(), |
| m_hevcBrcPakStatisticsSize, |
| 0, |
| CODECHAL_NUM_MEDIA_STATES)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_HucStitchCmdBatchBuffer.OsResource, |
| CodechalDbgAttr::attr2ndLvlBatchMfx, |
| currPassName.data(), |
| m_hwInterface->m_HucStitchCmdBatchBufferSize, |
| 0, |
| CODECHAL_NUM_MEDIA_STATES)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_resHucStitchDataBuffer[m_currRecycledBufIdx][currentPass], |
| CodechalDbgAttr::attrHuCStitchDataBuf, |
| currPassName.data(), |
| sizeof(HucCommandData), |
| 0, |
| CODECHAL_NUM_MEDIA_STATES)); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpHucDmem( |
| &m_resHucPakStitchDmemBuffer[m_currRecycledBufIdx][currentPass], |
| sizeof(HucPakStitchDmemEncG12), |
| currentPass, |
| hucRegionDumpPakIntegrate));) |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| #endif |
| |
| MOS_STATUS CodechalEncHevcStateG12::EncodeMeKernel() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| if (m_hmeKernel && m_hmeKernel->Is4xMeEnabled()) |
| { |
| CodechalKernelHme::CurbeParam curbeParam; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeCurbeParams(curbeParam)); |
| |
| CodechalKernelHme::SurfaceParams surfaceParam; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMeSurfaceParams(surfaceParam)); |
| |
| m_hmeKernel->setnoMEKernelForPFrame(m_lowDelay); |
| |
| if (m_hmeKernel->Is16xMeEnabled()) |
| { |
| if (m_hmeKernel->Is32xMeEnabled()) |
| { |
| surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb32x; |
| surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb32x; |
| surfaceParam.downScaledBottomFieldOffset = m_scaled32xBottomFieldOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel32x)); |
| } |
| surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb16x; |
| surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb16x; |
| surfaceParam.downScaledBottomFieldOffset = m_scaled16xBottomFieldOffset; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel16x)); |
| } |
| surfaceParam.downScaledWidthInMb = m_downscaledWidthInMb4x; |
| surfaceParam.downScaledHeightInMb = m_downscaledFrameFieldHeightInMb4x; |
| surfaceParam.downScaledBottomFieldOffset = m_scaledBottomFieldOffset; |
| surfaceParam.meBrcDistortionSurface = m_brcBuffers.meBrcDistortionSurface; |
| |
| curbeParam.sumMVThreshold = m_sumMVThreshold; |
| |
| m_lastTaskInPhase = true; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hmeKernel->Execute(curbeParam, surfaceParam, CodechalKernelHme::HmeLevel::hmeLevel4x)); |
| } |
| |
| return MOS_STATUS_SUCCESS; |
| } |
| |
| void CodechalEncHevcStateG12::ResizeBufferOffset() |
| { |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| //Re-calculate aligned frame width/height + aligned Max LCU width/height when resolution reset occurs |
| uint32_t frameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH; |
| uint32_t frameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT; |
| |
| uint32_t widthAlignedMaxLcu = MOS_ALIGN_CEIL(frameWidth, MAX_LCU_SIZE); |
| uint32_t heightAlignedMaxLcu = MOS_ALIGN_CEIL(frameHeight, MAX_LCU_SIZE); |
| |
| uint32_t size = 0; |
| const uint32_t numLcu64 = widthAlignedMaxLcu * heightAlignedMaxLcu / 64 / 64; |
| MBENC_COMBINED_BUFFER2 fixedBuf; |
| |
| //Re-Calculate m_encBCombinedBuffer2 Size and Offsets |
| m_historyOutBufferSize = MOS_ALIGN_CEIL(32 * numLcu64, CODECHAL_CACHELINE_SIZE); |
| m_threadTaskBufferSize = MOS_ALIGN_CEIL(96 * numLcu64, CODECHAL_CACHELINE_SIZE); |
| |
| size = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE) + m_historyOutBufferSize + m_threadTaskBufferSize; |
| |
| m_historyOutBufferOffset = MOS_ALIGN_CEIL(sizeof(fixedBuf), CODECHAL_CACHELINE_SIZE); |
| m_threadTaskBufferOffset = m_historyOutBufferOffset + m_historyOutBufferSize; |
| } |