| /* |
| * Copyright (c) 2018, Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| //! |
| //! \file codechal_fei_hevc_g9_skl.cpp |
| //! \brief HEVC FEI dual-pipe encoder for GEN9 SKL. |
| //! |
| |
| #include "codechal_fei_hevc_g9_skl.h" |
| #include "igcodeckrn_g9.h" |
| #include "codeckrnheader.h" |
| |
| #define GPUMMU_WA_PADDING (64 * 1024) |
| |
| //! HEVC encoder kernel header structure for G9 SKL |
| struct CODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL |
| { |
| int nKernelCount; //!< Total number of kernels |
| |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_2xDownSampling_Kernel; //!< 2x down sampling kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_32x32_PU_ModeDecision_Kernel; //!< Intra 32x32 PU mode decision kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_16x16_PU_SADComputation_Kernel; //!< Intra 16x16 PU SAD computation kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_16x16_PU_ModeDecision_Kernel; //!< Intra 16x16 PU mode decision kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_8x8_PU_Kernel; //!< Intra 8x8 PU mode decision kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_8x8_PU_FMode_Kernel; //!< Intra 8x8 PU final mode decision kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_PB_32x32_PU_IntraCheck; //!< P/B 32x32 PU intra mode check kernel |
| CODECHAL_KERNEL_HEADER HEVC_FEI_LCUEnc_PB_MB; //!< P/B MbEnc Kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_DS4HME; //!< 4x Scaling kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_P_HME; //!< P frame HME kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_B_HME; //!< B frame HME kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_I_COARSE; //!< Intra coarse kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_PB_Pak; //!< P/B frame PAK kernel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_BRC_Blockcopy; //!< BRC blockcopy kerenel |
| CODECHAL_KERNEL_HEADER Hevc_FEI_LCUEnc_DS_Combined; //!< Down scale and format conversion kernel for 10 bit for KBL |
| CODECHAL_KERNEL_HEADER HEVC_FEI_LCUEnc_P_MB; //!< P frame MbEnc kernel |
| }; |
| |
| using PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL = struct CODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL*; |
| |
| //! HEVC encoder FEI intra 8x8 PU final mode decision kernel curbe for GEN9 |
| struct CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 |
| { |
| union { |
| struct { |
| uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW0; |
| |
| union { |
| struct { |
| uint32_t SliceType : MOS_BITFIELD_RANGE(0, 1); |
| uint32_t PuType : MOS_BITFIELD_RANGE(2, 3); |
| uint32_t PakReordingFlag : MOS_BITFIELD_BIT(4); |
| uint32_t ReservedMBZ : MOS_BITFIELD_BIT(5); |
| uint32_t LCUType : MOS_BITFIELD_BIT(6); |
| uint32_t ScreenContentFlag : MOS_BITFIELD_BIT(7); |
| uint32_t IntraRefreshEn : MOS_BITFIELD_RANGE(8, 9); |
| uint32_t EnableRollingIntra : MOS_BITFIELD_BIT(10); |
| uint32_t HalfUpdateMixedLCU : MOS_BITFIELD_BIT(11); |
| uint32_t Reserved_12_23 : MOS_BITFIELD_RANGE(12, 23); |
| uint32_t EnableIntraEarlyExit : MOS_BITFIELD_BIT(24); |
| uint32_t BRCEnable : MOS_BITFIELD_BIT(25); |
| uint32_t LCUBRCEnable : MOS_BITFIELD_BIT(26); |
| uint32_t ROIEnable : MOS_BITFIELD_BIT(27); |
| uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(28); |
| uint32_t EnableFlexibleParam : MOS_BITFIELD_BIT(29); |
| uint32_t EnableQualityImprovement : MOS_BITFIELD_BIT(30); |
| uint32_t EnableDebugDump : MOS_BITFIELD_BIT(31); |
| }; |
| uint32_t Value; |
| } DW1; |
| |
| union { |
| struct { |
| uint32_t LambdaForLuma; |
| }; |
| uint32_t Value; |
| } DW2; |
| |
| union { |
| // For inter frame or enable statictics data dump |
| struct { |
| uint32_t LambdaForDistCalculation; |
| }; |
| uint32_t Value; |
| } DW3; |
| |
| union { |
| struct { |
| uint32_t ModeCostFor8x8PU_TU8; |
| }; |
| uint32_t Value; |
| } DW4; |
| |
| union { |
| struct { |
| uint32_t ModeCostFor8x8PU_TU4; |
| }; |
| uint32_t Value; |
| } DW5; |
| |
| union { |
| struct { |
| uint32_t SATD16x16PuThreshold : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t BiasFactorToward8x8 : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW6; |
| |
| union { |
| struct { |
| uint32_t Qp : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t QpForInter : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW7; |
| |
| union { |
| struct { |
| uint32_t SimplifiedFlagForInter : MOS_BITFIELD_BIT(0); |
| uint32_t EnableStatsDataDump : MOS_BITFIELD_BIT(1); |
| uint32_t Reserved_2_7 : MOS_BITFIELD_RANGE(2, 7); |
| uint32_t KBLControlFlag : MOS_BITFIELD_BIT(8); |
| uint32_t Reserved_9_31 : MOS_BITFIELD_RANGE(9, 31); |
| }; |
| uint32_t Value; |
| } DW8; |
| |
| union { |
| struct { |
| uint32_t IntraRefreshMBNum : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t IntraRefreshUnitInMB : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t IntraRefreshQPDelta : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| uint32_t Value; |
| } DW9; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW10; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW11; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW12; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW13; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW14; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW15; |
| |
| union { |
| struct { |
| uint32_t BTI_PAK_Object; |
| }; |
| uint32_t Value; |
| } DW16; |
| |
| union { |
| struct { |
| uint32_t BTI_VME_8x8_Mode; |
| }; |
| uint32_t Value; |
| } DW17; |
| |
| union { |
| struct { |
| uint32_t BTI_Intra_Mode; |
| }; |
| uint32_t Value; |
| } DW18; |
| |
| union { |
| struct { |
| uint32_t BTI_PAK_Command; |
| }; |
| uint32_t Value; |
| } DW19; |
| |
| union { |
| struct { |
| uint32_t BTI_Slice_Map; |
| }; |
| uint32_t Value; |
| } DW20; |
| |
| union { |
| struct { |
| uint32_t BTI_IntraDist; |
| }; |
| uint32_t Value; |
| } DW21; |
| |
| union { |
| struct { |
| uint32_t BTI_BRC_Input; |
| }; |
| uint32_t Value; |
| } DW22; |
| |
| union { |
| struct { |
| uint32_t BTI_Simplest_Intra; |
| }; |
| uint32_t Value; |
| } DW23; |
| |
| union { |
| struct { |
| uint32_t BTI_LCU_Qp_Surface; |
| }; |
| uint32_t Value; |
| } DW24; |
| |
| union { |
| struct { |
| uint32_t BTI_BRC_Data; |
| }; |
| uint32_t Value; |
| } DW25; |
| |
| union { |
| //Output (for inter and statictics data dump only) |
| struct { |
| uint32_t BTI_Haar_Dist16x16; |
| }; |
| uint32_t Value; |
| } DW26; |
| |
| union { |
| // This surface should take the statistics surface from Hevc_LCUEnc_I_32x32_PU_ModeDecision as input |
| struct { |
| uint32_t BTI_Stats_Data; |
| }; |
| uint32_t Value; |
| } DW27; |
| |
| union { |
| // Frame level Statistics data surface |
| struct { |
| uint32_t BTI_Frame_Stats_Data; |
| }; |
| uint32_t Value; |
| } DW28; |
| |
| union { |
| // Frame level CTB Distortion data surface |
| struct { |
| uint32_t BTI_CTB_Distortion_Surface; |
| }; |
| uint32_t Value; |
| } DW29; |
| |
| union { |
| struct { |
| uint32_t BTI_Debug; |
| }; |
| uint32_t Value; |
| } DW30; |
| }; |
| |
| using PCODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 = struct CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9*; |
| C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9)) == 31); |
| |
| //! HEVC encoder FEI B 32x32 PU intra check kernel curbe for GEN9 |
| struct CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 |
| { |
| union { |
| struct { |
| uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW0; |
| |
| union { |
| struct { |
| uint32_t SliceType : MOS_BITFIELD_RANGE(0, 1); |
| uint32_t Reserved : MOS_BITFIELD_RANGE(2, 7); |
| uint32_t Log2MinTUSize : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t Flags : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t EnableIntraEarlyExit : MOS_BITFIELD_BIT(24); |
| uint32_t HMEEnable : MOS_BITFIELD_BIT(25); |
| uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(26); |
| uint32_t Res_27_30 : MOS_BITFIELD_RANGE(27, 30); |
| uint32_t EnableDebugDump : MOS_BITFIELD_BIT(31); |
| }; |
| uint32_t Value; |
| } DW1; |
| |
| union { |
| struct { |
| uint32_t QpValue : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t QpMultiplier : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW2; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW3; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW4; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW5; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW6; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW7; |
| |
| union { |
| struct { |
| uint32_t BTI_Per32x32PuIntraCheck; |
| }; |
| uint32_t Value; |
| } DW8; |
| |
| union { |
| struct { |
| uint32_t BTI_Src_Y; |
| }; |
| uint32_t Value; |
| } DW9; |
| |
| union { |
| struct { |
| uint32_t BTI_Src_Y2X; |
| }; |
| uint32_t Value; |
| } DW10; |
| |
| union { |
| struct { |
| uint32_t BTI_Slice_Map; |
| }; |
| uint32_t Value; |
| } DW11; |
| |
| union { |
| struct { |
| uint32_t BTI_VME_Y2X; |
| }; |
| uint32_t Value; |
| } DW12; |
| |
| union { |
| struct { |
| uint32_t BTI_Simplest_Intra; // output only |
| }; |
| uint32_t Value; |
| } DW13; |
| |
| union { |
| struct { |
| uint32_t BTI_HME_MVPred; |
| }; |
| uint32_t Value; |
| } DW14; |
| |
| union { |
| struct { |
| uint32_t BTI_HME_Dist; |
| }; |
| uint32_t Value; |
| } DW15; |
| |
| union { |
| struct { |
| uint32_t BTI_LCU_Skip; |
| }; |
| uint32_t Value; |
| } DW16; |
| |
| union { |
| struct { |
| uint32_t BTI_Debug; |
| }; |
| uint32_t Value; |
| } DW17; |
| }; |
| |
| using PCODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9; |
| C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9)) == 18); |
| |
| //! HEVC encoder FEI B Pak kernel curbe for GEN9 |
| struct CODECHAL_FEI_HEVC_B_PAK_CURBE_G9 |
| { |
| union |
| { |
| struct |
| { |
| uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW0; |
| |
| union |
| { |
| struct |
| { |
| uint32_t Qp : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t Res_8_15 : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t MaxVmvR : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW1; |
| |
| union |
| { |
| struct |
| { |
| uint32_t SliceType : MOS_BITFIELD_RANGE(0, 1); |
| uint32_t EnableWA : MOS_BITFIELD_BIT( 2); |
| uint32_t Res_3_7 : MOS_BITFIELD_RANGE(3, 7); |
| uint32_t SimplestIntraEnable : MOS_BITFIELD_BIT(8); |
| uint32_t BrcEnable : MOS_BITFIELD_BIT(9); |
| uint32_t LcuBrcEnable : MOS_BITFIELD_BIT(10); |
| uint32_t ROIEnable : MOS_BITFIELD_BIT(11); |
| uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(12); |
| uint32_t EnableRollingIntra : MOS_BITFIELD_BIT(13); |
| uint32_t Res_14 : MOS_BITFIELD_BIT(14); |
| uint32_t EnableQualityImprovement : MOS_BITFIELD_BIT(15); |
| uint32_t KBLControlFlag : MOS_BITFIELD_BIT(16); |
| uint32_t Res_17_30 : MOS_BITFIELD_RANGE(17, 30); |
| uint32_t ScreenContent : MOS_BITFIELD_BIT(31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW2; |
| |
| union |
| { |
| struct |
| { |
| uint32_t IntraRefreshMBNum : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t IntraRefreshUnitInMB : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t IntraRefreshQPDelta : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW3; |
| |
| union |
| { |
| struct |
| { |
| uint32_t Reserved; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW4_15[12]; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_CU_Record; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW16; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_PAK_Obj; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW17; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_Slice_Map; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW18; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_Brc_Input; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW19; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_LCU_Qp; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW20; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_Brc_Data; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW21; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_MB_Data; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW22; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_MVP_Surface; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW23; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_WA_PAK_Data; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW24; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_WA_PAK_Obj; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW25; |
| |
| union |
| { |
| struct |
| { |
| uint32_t BTI_Debug; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW26; |
| |
| }; |
| |
| using PCODECHAL_FEI_HEVC_B_PAK_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_PAK_CURBE_G9*; |
| C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_PAK_CURBE_G9)) == 27); |
| |
| //! HEVC encoder B MBEnc kernel curbe for GEN9 |
| struct CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 |
| { |
| // DW0 |
| union |
| { |
| struct |
| { |
| uint32_t SkipModeEn : MOS_BITFIELD_BIT(0); |
| uint32_t AdaptiveEn : MOS_BITFIELD_BIT(1); |
| uint32_t BiMixDis : MOS_BITFIELD_BIT(2); |
| uint32_t : MOS_BITFIELD_RANGE(3, 4); |
| uint32_t EarlyImeSuccessEn : MOS_BITFIELD_BIT(5); |
| uint32_t : MOS_BITFIELD_BIT(6); |
| uint32_t T8x8FlagForInterEn : MOS_BITFIELD_BIT(7); |
| uint32_t : MOS_BITFIELD_RANGE(8, 23); |
| uint32_t EarlyImeStop : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW0; |
| |
| // DW1 |
| union |
| { |
| struct |
| { |
| uint32_t MaxNumMVs : MOS_BITFIELD_RANGE(0, 5); |
| uint32_t : MOS_BITFIELD_RANGE(6, 15); |
| uint32_t BiWeight : MOS_BITFIELD_RANGE(16, 21); |
| uint32_t : MOS_BITFIELD_RANGE(22, 27); |
| uint32_t UniMixDisable : MOS_BITFIELD_BIT(28); |
| uint32_t : MOS_BITFIELD_RANGE(29, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW1; |
| |
| // DW2 |
| union |
| { |
| struct |
| { |
| uint32_t LenSP : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t MaxNumSU : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t PicWidth : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW2; |
| |
| // DW3 |
| union |
| { |
| struct |
| { |
| uint32_t SrcSize : MOS_BITFIELD_RANGE(0, 1); |
| uint32_t : MOS_BITFIELD_RANGE(2, 3); |
| uint32_t MbTypeRemap : MOS_BITFIELD_RANGE(4, 5); |
| uint32_t SrcAccess : MOS_BITFIELD_BIT(6); |
| uint32_t RefAccess : MOS_BITFIELD_BIT(7); |
| uint32_t SearchCtrl : MOS_BITFIELD_RANGE(8, 10); |
| uint32_t DualSearchPathOption : MOS_BITFIELD_BIT(11); |
| uint32_t SubPelMode : MOS_BITFIELD_RANGE(12, 13); |
| uint32_t SkipType : MOS_BITFIELD_BIT(14); |
| uint32_t DisableFieldCacheAlloc : MOS_BITFIELD_BIT(15); |
| uint32_t InterChromaMode : MOS_BITFIELD_BIT(16); |
| uint32_t FTEnable : MOS_BITFIELD_BIT(17); |
| uint32_t BMEDisableFBR : MOS_BITFIELD_BIT(18); |
| uint32_t BlockBasedSkipEnable : MOS_BITFIELD_BIT(19); |
| uint32_t InterSAD : MOS_BITFIELD_RANGE(20, 21); |
| uint32_t IntraSAD : MOS_BITFIELD_RANGE(22, 23); |
| uint32_t SubMbPartMask : MOS_BITFIELD_RANGE(24, 30); |
| uint32_t : MOS_BITFIELD_BIT(31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW3; |
| |
| union |
| { |
| struct |
| { |
| uint32_t PicHeightMinus1 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t Res_16_22 : MOS_BITFIELD_RANGE(16,22); |
| uint32_t EnableQualityImprovement : MOS_BITFIELD_BIT(23); |
| uint32_t EnableDebug : MOS_BITFIELD_BIT(24); |
| uint32_t EnableFlexibleParam : MOS_BITFIELD_BIT(25); |
| uint32_t EnableStatsDataDump : MOS_BITFIELD_BIT(26); |
| uint32_t Res_27 : MOS_BITFIELD_BIT(27); |
| uint32_t HMEEnable : MOS_BITFIELD_BIT(28); |
| uint32_t SliceType : MOS_BITFIELD_RANGE(29, 30); |
| uint32_t UseActualRefQPValue : MOS_BITFIELD_BIT(31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW4; |
| |
| // DW5 |
| union |
| { |
| struct |
| { |
| uint32_t Res_0_15 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t RefWidth : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t RefHeight : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW5; |
| |
| union |
| { |
| struct |
| { |
| uint32_t FrameWidth : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t FrameHeight : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW6; |
| |
| // DW7 |
| union |
| { |
| struct |
| { |
| uint32_t IntraPartMask : MOS_BITFIELD_RANGE(0, 4); |
| uint32_t NonSkipZMvAdded : MOS_BITFIELD_BIT(5); |
| uint32_t NonSkipModeAdded : MOS_BITFIELD_BIT(6); |
| uint32_t LumaIntraSrcCornerSwap : MOS_BITFIELD_BIT(7); |
| uint32_t : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t MVCostScaleFactor : MOS_BITFIELD_RANGE(16, 17); |
| uint32_t BilinearEnable : MOS_BITFIELD_BIT(18); |
| uint32_t Res_19 : MOS_BITFIELD_BIT(19); |
| uint32_t WeightedSADHAAR : MOS_BITFIELD_BIT(20); |
| uint32_t AConlyHAAR : MOS_BITFIELD_BIT(21); |
| uint32_t RefIDCostMode : MOS_BITFIELD_BIT(22); |
| uint32_t : MOS_BITFIELD_BIT(23); |
| uint32_t SkipCenterMask : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW7; |
| |
| // DW8 |
| union |
| { |
| struct |
| { |
| uint32_t Mode0Cost : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t Mode1Cost : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t Mode2Cost : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t Mode3Cost : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW8; |
| |
| // DW9 |
| union |
| { |
| struct |
| { |
| uint32_t Mode4Cost : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t Mode5Cost : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t Mode6Cost : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t Mode7Cost : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW9; |
| |
| // DW10 |
| union |
| { |
| struct |
| { |
| uint32_t Mode8Cost : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t Mode9Cost : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t RefIDCost : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t ChromaIntraModeCost : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW10; |
| |
| // DW11 |
| union |
| { |
| struct |
| { |
| uint32_t MV0Cost : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t MV1Cost : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t MV2Cost : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t MV3Cost : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW11; |
| |
| // DW12 |
| union |
| { |
| struct |
| { |
| uint32_t MV4Cost : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t MV5Cost : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t MV6Cost : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t MV7Cost : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW12; |
| |
| // DW13 |
| union |
| { |
| struct |
| { |
| uint32_t QpPrimeY : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t QpPrimeCb : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t QpPrimeCr : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t TargetSizeInWord : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW13; |
| |
| // DW14 |
| union |
| { |
| struct |
| { |
| uint32_t SICFwdTransCoeffThreshold_0 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t SICFwdTransCoeffThreshold_1 : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t SICFwdTransCoeffThreshold_2 : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW14; |
| |
| // DW15 |
| union |
| { |
| struct |
| { |
| uint32_t SICFwdTransCoeffThreshold_3 : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t SICFwdTransCoeffThreshold_4 : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t SICFwdTransCoeffThreshold_5 : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t SICFwdTransCoeffThreshold_6 : MOS_BITFIELD_RANGE(24, 31); // Highest Freq |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW15; |
| |
| // DW16 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_0; |
| SearchPathDelta SPDelta_1; |
| SearchPathDelta SPDelta_2; |
| SearchPathDelta SPDelta_3; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW16; |
| |
| // DW17 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_4; |
| SearchPathDelta SPDelta_5; |
| SearchPathDelta SPDelta_6; |
| SearchPathDelta SPDelta_7; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW17; |
| |
| // DW18 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_8; |
| SearchPathDelta SPDelta_9; |
| SearchPathDelta SPDelta_10; |
| SearchPathDelta SPDelta_11; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW18; |
| |
| // DW19 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_12; |
| SearchPathDelta SPDelta_13; |
| SearchPathDelta SPDelta_14; |
| SearchPathDelta SPDelta_15; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW19; |
| |
| // DW20 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_16; |
| SearchPathDelta SPDelta_17; |
| SearchPathDelta SPDelta_18; |
| SearchPathDelta SPDelta_19; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW20; |
| |
| // DW21 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_20; |
| SearchPathDelta SPDelta_21; |
| SearchPathDelta SPDelta_22; |
| SearchPathDelta SPDelta_23; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW21; |
| |
| // DW22 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_24; |
| SearchPathDelta SPDelta_25; |
| SearchPathDelta SPDelta_26; |
| SearchPathDelta SPDelta_27; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW22; |
| |
| // DW23 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_28; |
| SearchPathDelta SPDelta_29; |
| SearchPathDelta SPDelta_30; |
| SearchPathDelta SPDelta_31; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW23; |
| |
| // DW24 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_32; |
| SearchPathDelta SPDelta_33; |
| SearchPathDelta SPDelta_34; |
| SearchPathDelta SPDelta_35; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW24; |
| |
| // DW25 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_36; |
| SearchPathDelta SPDelta_37; |
| SearchPathDelta SPDelta_38; |
| SearchPathDelta SPDelta_39; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW25; |
| |
| // DW26 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_40; |
| SearchPathDelta SPDelta_41; |
| SearchPathDelta SPDelta_42; |
| SearchPathDelta SPDelta_43; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW26; |
| |
| // DW27 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_44; |
| SearchPathDelta SPDelta_45; |
| SearchPathDelta SPDelta_46; |
| SearchPathDelta SPDelta_47; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW27; |
| |
| // DW28 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_48; |
| SearchPathDelta SPDelta_49; |
| SearchPathDelta SPDelta_50; |
| SearchPathDelta SPDelta_51; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW28; |
| |
| // DW29 |
| union |
| { |
| struct |
| { |
| SearchPathDelta SPDelta_52; |
| SearchPathDelta SPDelta_53; |
| SearchPathDelta SPDelta_54; |
| SearchPathDelta SPDelta_55; |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW29; |
| |
| // DW30 |
| union |
| { |
| struct |
| { |
| uint32_t Intra4x4ModeMask : MOS_BITFIELD_RANGE(0, 8); |
| uint32_t : MOS_BITFIELD_RANGE(9, 15); |
| uint32_t Intra8x8ModeMask : MOS_BITFIELD_RANGE(16, 24); |
| uint32_t : MOS_BITFIELD_RANGE(25, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW30; |
| |
| // DW31 |
| union |
| { |
| struct |
| { |
| uint32_t Intra16x16ModeMask : MOS_BITFIELD_RANGE(0, 3); |
| uint32_t IntraChromaModeMask : MOS_BITFIELD_RANGE(4, 7); |
| uint32_t IntraComputeType : MOS_BITFIELD_RANGE(8, 9); |
| uint32_t : MOS_BITFIELD_RANGE(10, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW31; |
| |
| // DW32 |
| union |
| { |
| struct |
| { |
| uint32_t SkipVal : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t MultiPredL0Disable : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t MultiPredL1Disable : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW32; |
| |
| // DW33 |
| union |
| { |
| struct |
| { |
| uint32_t Intra16x16NonDCPredPenalty : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t Intra8x8NonDCPredPenalty : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t Intra4x4NonDCPredPenalty : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| struct |
| { |
| uint32_t Value; |
| }; |
| } DW33; |
| |
| union { |
| struct { |
| uint32_t LambdaME; |
| }; |
| uint32_t Value; |
| } DW34; |
| |
| union { |
| struct { |
| uint32_t SimpIntraInterThreshold : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t ModeCostSp : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t IntraRefreshEn : MOS_BITFIELD_RANGE(24, 25); |
| uint32_t FirstIntraRefresh : MOS_BITFIELD_BIT(26); |
| uint32_t EnableRollingIntra : MOS_BITFIELD_BIT(27); |
| uint32_t HalfUpdateMixedLCU : MOS_BITFIELD_BIT(28); |
| uint32_t Res_29_31 : MOS_BITFIELD_RANGE(29, 31); |
| }; |
| uint32_t Value; |
| } DW35; |
| |
| union { |
| struct { |
| uint32_t NumRefIdxL0MinusOne : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t HMECombinedExtraSUs : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t NumRefIdxL1MinusOne : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t PowerSaving : MOS_BITFIELD_BIT(24); |
| uint32_t BRCEnable : MOS_BITFIELD_BIT(25); |
| uint32_t LCUBRCEnable : MOS_BITFIELD_BIT(26); |
| uint32_t ROIEnable : MOS_BITFIELD_BIT(27); |
| uint32_t FASTSurveillanceFlag : MOS_BITFIELD_BIT(28); |
| uint32_t CheckAllFractionalEnable : MOS_BITFIELD_BIT(29); |
| uint32_t HMECombinedOverlap : MOS_BITFIELD_RANGE(30, 31); |
| }; |
| uint32_t Value; |
| } DW36; |
| |
| union { |
| struct { |
| uint32_t ActualQpRefID0List0 : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t ActualQpRefID1List0 : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t ActualQpRefID2List0 : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t ActualQpRefID3List0 : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| uint32_t Value; |
| } DW37; |
| |
| union { |
| struct { |
| uint32_t NumIntraRefreshOffFrames : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t NumFrameInGOB : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW38; |
| |
| union { |
| struct { |
| uint32_t ActualQpRefID0List1 : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t ActualQpRefID1List1 : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t RefCost : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW39; |
| |
| union { |
| struct { |
| uint32_t TransformThreshold0 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t TransformThreshold1 : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW40; |
| |
| union { |
| struct { |
| uint32_t TransformThreshold2 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t TextureIntraCostThreshold : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW41; |
| |
| union { |
| struct |
| { |
| uint32_t NumMVPredictorsL0 : MOS_BITFIELD_RANGE(0, 3); |
| uint32_t NumMVPredictorsL1 : MOS_BITFIELD_RANGE(4, 7); |
| uint32_t Res_8 : MOS_BITFIELD_BIT(8); |
| uint32_t PerLCUQP : MOS_BITFIELD_BIT(9); |
| uint32_t PerCTBInput : MOS_BITFIELD_BIT(10); |
| uint32_t CTBDistortionOutput : MOS_BITFIELD_BIT(11); |
| uint32_t MVPredictorBlockSize : MOS_BITFIELD_RANGE(12, 14); |
| uint32_t Res_15 : MOS_BITFIELD_BIT(15); |
| uint32_t MultiPredL0 : MOS_BITFIELD_RANGE(16, 19); |
| uint32_t MultiPredL1 : MOS_BITFIELD_RANGE(20, 23); |
| uint32_t Res_24_31 : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| uint32_t Value; |
| } DW42; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW43; |
| |
| union { |
| struct { |
| uint32_t MaxNumMergeCandidates : MOS_BITFIELD_RANGE(0, 3); |
| uint32_t MaxNumRefList0 : MOS_BITFIELD_RANGE(4, 7); |
| uint32_t MaxNumRefList1 : MOS_BITFIELD_RANGE(8, 11); |
| uint32_t Res_12_15 : MOS_BITFIELD_RANGE(12, 15); |
| uint32_t MaxVmvR : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW44; |
| |
| union { |
| struct { |
| uint32_t TemporalMvpEnableFlag : MOS_BITFIELD_BIT(0); |
| uint32_t Res_1_7 : MOS_BITFIELD_RANGE(1, 7); |
| uint32_t Log2ParallelMergeLevel : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t HMECombineLenPslice : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t HMECombineLenBslice : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| uint32_t Value; |
| } DW45; |
| |
| union { |
| struct { |
| uint32_t Log2MinTUSize : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t Log2MaxTUSize : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t Log2MinCUSize : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t Log2MaxCUSize : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| uint32_t Value; |
| } DW46; |
| |
| union { |
| struct { |
| uint32_t NumRegionsInSlice : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t TypeOfWalkingPattern : MOS_BITFIELD_RANGE(8, 11); |
| uint32_t ChromaFlatnessCheckFlag : MOS_BITFIELD_BIT(12); |
| uint32_t EnableIntraEarlyExit : MOS_BITFIELD_BIT(13); |
| uint32_t SkipIntraKrnFlag : MOS_BITFIELD_BIT(14); |
| uint32_t ScreenContentFlag : MOS_BITFIELD_BIT(15); |
| uint32_t IsLowDelay : MOS_BITFIELD_BIT(16); |
| uint32_t CollocatedFromL0Flag : MOS_BITFIELD_BIT(17); |
| uint32_t ArbitarySliceFlag : MOS_BITFIELD_BIT(18); |
| uint32_t MultiSliceFlag : MOS_BITFIELD_BIT(19); |
| uint32_t Res_20_23 : MOS_BITFIELD_RANGE(20, 23); |
| uint32_t isCurrRefL0LongTerm : MOS_BITFIELD_BIT(24); |
| uint32_t isCurrRefL1LongTerm : MOS_BITFIELD_BIT(25); |
| uint32_t NumRegionMinus1 : MOS_BITFIELD_RANGE(26, 31); |
| }; |
| uint32_t Value; |
| } DW47; |
| |
| union { |
| struct { |
| uint32_t CurrentTdL0_0 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t CurrentTdL0_1 : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW48; |
| |
| union { |
| struct { |
| uint32_t CurrentTdL0_2 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t CurrentTdL0_3 : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW49; |
| |
| union { |
| struct { |
| uint32_t CurrentTdL1_0 : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t CurrentTdL1_1 : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW50; |
| |
| union { |
| struct { |
| uint32_t IntraRefreshMBNum : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t IntraRefreshUnitInMB : MOS_BITFIELD_RANGE(16, 23); |
| uint32_t IntraRefreshQPDelta : MOS_BITFIELD_RANGE(24, 31); |
| }; |
| uint32_t Value; |
| } DW51; |
| |
| union { |
| struct { |
| uint32_t NumofUnitInRegion : MOS_BITFIELD_RANGE(0, 15); |
| uint32_t MaxHeightInRegion : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW52; |
| |
| union { |
| struct { |
| uint32_t IntraRefreshRefWidth : MOS_BITFIELD_RANGE(0, 7); |
| uint32_t IntraRefreshRefHeight : MOS_BITFIELD_RANGE(8, 15); |
| uint32_t Res_16_31 : MOS_BITFIELD_RANGE(16, 31); |
| }; |
| uint32_t Value; |
| } DW53; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW54; |
| |
| union { |
| struct { |
| uint32_t Reserved; |
| }; |
| uint32_t Value; |
| } DW55; |
| |
| union { |
| struct { |
| uint32_t BTI_CU_Record; |
| }; |
| uint32_t Value; |
| } DW56; |
| |
| union { |
| struct { |
| uint32_t BTI_PAK_Cmd; |
| }; |
| uint32_t Value; |
| } DW57; |
| |
| union { |
| struct { |
| uint32_t BTI_Src_Y; |
| }; |
| uint32_t Value; |
| } DW58; |
| |
| union { |
| struct { |
| uint32_t BTI_Intra_Dist; |
| }; |
| uint32_t Value; |
| } DW59; |
| |
| union { |
| struct { |
| uint32_t BTI_Min_Dist; |
| }; |
| uint32_t Value; |
| } DW60; |
| |
| union { |
| struct { |
| uint32_t BTI_HMEMVPredFwdBwdSurfIndex; |
| }; |
| uint32_t Value; |
| } DW61; |
| |
| union { |
| struct { |
| uint32_t BTI_HMEDistSurfIndex; |
| }; |
| uint32_t Value; |
| } DW62; |
| |
| union { |
| struct { |
| uint32_t BTI_Slice_Map; |
| }; |
| uint32_t Value; |
| } DW63; |
| |
| union { |
| struct { |
| uint32_t BTI_VME_Saved_UNI_SIC; |
| }; |
| uint32_t Value; |
| } DW64; |
| |
| union { |
| struct { |
| uint32_t BTI_Simplest_Intra; |
| }; |
| uint32_t Value; |
| } DW65; |
| |
| union { |
| struct { |
| uint32_t BTI_Collocated_RefFrame; |
| }; |
| uint32_t Value; |
| } DW66; |
| |
| union { |
| struct { |
| uint32_t BTI_Reserved; |
| }; |
| uint32_t Value; |
| } DW67; |
| |
| union { |
| struct { |
| uint32_t BTI_BRC_Input; |
| }; |
| uint32_t Value; |
| } DW68; |
| |
| union { |
| struct { |
| uint32_t BTI_LCU_QP; |
| }; |
| uint32_t Value; |
| } DW69; |
| |
| union { |
| struct { |
| uint32_t BTI_BRC_Data; |
| }; |
| uint32_t Value; |
| } DW70; |
| |
| union { |
| struct { |
| uint32_t BTI_VMEInterPredictionSurfIndex; |
| }; |
| uint32_t Value; |
| } DW71; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_VMEInterPredictionBSurfIndex; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_ConcurrentThreadMap; |
| }; |
| uint32_t Value; |
| } DW72; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_ConcurrentThreadMap; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_MB_Data_CurFrame; |
| }; |
| uint32_t Value; |
| } DW73; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_MB_Data_CurFrame; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_MVP_CurFrame; |
| }; |
| uint32_t Value; |
| } DW74; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_MVP_CurFrame; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_Haar_Dist16x16; |
| }; |
| uint32_t Value; |
| } DW75; |
| |
| union { |
| // this surface need to take same surface name from Hevc_LCUEnc_I_8x8_PU_FMode_inLCU as input |
| //For B frame |
| struct { |
| uint32_t BTI_Haar_Dist16x16; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_Stats_Data; |
| }; |
| uint32_t Value; |
| } DW76; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_Stats_Data; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_Frame_Stats_Data; |
| }; |
| uint32_t Value; |
| } DW77; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_Frame_Stats_Data; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_MVPredictor_Surface; |
| }; |
| uint32_t Value; |
| } DW78; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_MVPredictor_Surface; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_CTB_Input_Surface; |
| }; |
| uint32_t Value; |
| } DW79; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_CTB_Input_Surface; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_CTB_Distortion_Output_Surface; |
| }; |
| uint32_t Value; |
| } DW80; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_CTB_Distortion_Output_Surface; |
| }; |
| //For P frame |
| struct { |
| uint32_t BTI_Debug; |
| }; |
| uint32_t Value; |
| } DW81; |
| |
| union { |
| //For B frame |
| struct { |
| uint32_t BTI_Debug; |
| }; |
| uint32_t Value; |
| } DW82; |
| }; |
| |
| using PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 = struct CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9*; |
| C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9)) == 83 ); |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::SetMbEncKernelParams(MHW_KERNEL_PARAM* kernelParams, uint32_t idx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams); |
| |
| auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment(); |
| |
| kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads; |
| kernelParams->iIdCount = 1; |
| |
| switch (idx) |
| { |
| case CODECHAL_HEVC_MBENC_2xSCALING: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_SCALING_FRAME_END - CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 32; |
| kernelParams->iBlockHeight = 32; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_32x32MD: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_32x32_PU_END - CODECHAL_HEVC_FEI_32x32_PU_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 32; |
| kernelParams->iBlockHeight = 32; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_16x16SAD: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_16x16_PU_SAD_END - CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 16; |
| kernelParams->iBlockHeight = 16; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_16x16MD: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_16x16_PU_MD_END - CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 32; |
| kernelParams->iBlockHeight = 32; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_8x8PU: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_8x8_PU_END - CODECHAL_HEVC_FEI_8x8_PU_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 8; |
| kernelParams->iBlockHeight = 8; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_8x8FMODE: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_8x8_PU_FMODE_END - CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 32; |
| kernelParams->iBlockHeight = 32; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_32x32INTRACHECK: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_32x32_PU_END - CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 32; |
| kernelParams->iBlockHeight = 32; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_BENC: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_MBENC_END - CODECHAL_HEVC_FEI_B_MBENC_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 16; |
| kernelParams->iBlockHeight = 16; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_BPAK: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_B_PAK_END - CODECHAL_HEVC_FEI_B_PAK_BEGIN; |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_B_PAK_CURBE_G9), curbeAlignment); |
| kernelParams->iBlockWidth = 32; |
| kernelParams->iBlockHeight = 32; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED: |
| if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit)) |
| { |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_DS_COMBINED_END - CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN; |
| uint32_t uiDSCombinedKernelCurbeSize = sizeof(CODECHAL_ENC_HEVC_DS_COMBINED_CURBE_G9); |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(uiDSCombinedKernelCurbeSize, curbeAlignment); |
| kernelParams->iBlockWidth = 8; |
| kernelParams->iBlockHeight = 8; |
| } |
| else |
| { |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| } |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_PENC: |
| kernelParams->iBTCount = CODECHAL_HEVC_FEI_P_MBENC_END - CODECHAL_HEVC_FEI_P_MBENC_BEGIN; |
| //P MBEnc curbe has one less DWord than B MBEnc curbe |
| kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9) - sizeof(uint32_t), (size_t)curbeAlignment); |
| kernelParams->iBlockWidth = 16; |
| kernelParams->iBlockHeight = 16; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::SetMbEncBindingTable(PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable, uint32_t idx) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable); |
| |
| MOS_ZeroMemory(bindingTable, sizeof(*bindingTable)); |
| bindingTable->dwMediaState = ConvertKrnOpsToMediaState(ENC_MBENC, idx); |
| |
| switch (idx) |
| { |
| case CODECHAL_HEVC_MBENC_2xSCALING: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_SCALING_FRAME_END - CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_SCALING_FRAME_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_32x32MD: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_32x32_PU_END - CODECHAL_HEVC_FEI_32x32_PU_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_32x32_PU_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_16x16SAD: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_16x16_PU_SAD_END - CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_16x16_PU_SAD_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_16x16MD: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_16x16_PU_MD_END - CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_16x16_PU_MD_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_8x8PU: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_8x8_PU_END - CODECHAL_HEVC_FEI_8x8_PU_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_8x8_PU_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_8x8FMODE: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_8x8_PU_FMODE_END - CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_8x8_PU_FMODE_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_MBENC_32x32INTRACHECK: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_32x32_PU_END - CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_32x32_PU_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_BENC: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_MBENC_END - CODECHAL_HEVC_FEI_B_MBENC_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_MBENC_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_BPAK: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_B_PAK_END - CODECHAL_HEVC_FEI_B_PAK_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_B_PAK_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_DS_COMBINED_END - CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_DS_COMBINED_BEGIN; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_PENC: |
| bindingTable->dwNumBindingTableEntries = CODECHAL_HEVC_FEI_P_MBENC_END - CODECHAL_HEVC_FEI_P_MBENC_BEGIN; |
| bindingTable->dwBindingTableStartOffset = CODECHAL_HEVC_FEI_P_MBENC_BEGIN; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERT(false); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++) |
| { |
| bindingTable->dwBindingTableEntries[i] = i; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::EndKernelCall( |
| CODECHAL_MEDIA_STATE_TYPE mediaStateType, |
| PMHW_KERNEL_STATE kernelState, |
| PMOS_COMMAND_BUFFER cmdBuffer) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcStateG9::EndKernelCall(mediaStateType, kernelState, cmdBuffer)); |
| |
| // skip haar distortion surface, statstics data dump surface |
| // and frame level statstics data surface because they are not used |
| #if 0 |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| &m_encStatsBuffers.m_puStatsSurface, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_B_MBENC_PU_StatsSurface", |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| &m_encStatsBuffers.m_8x8PuHaarDist, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_B_MBENC_8X8_PU_HaarDistSurface", |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_encStatsBuffers.m_8x8PuFrameStats.sResource, |
| "HEVC_B_MBENC_ConstantData_In", |
| CodechalDbgAttr::attrOutput, |
| m_encStatsBuffers.m_8x8PuFrameStats.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| &m_encStatsBuffers.m_mbEncStatsSurface, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_B_MBENC_MB_ENC_StatsSurface", |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_encStatsBuffers.m_mbEncFrameStats.sResource, |
| "HEVC_B_MBENC_ConstantData_In", |
| CodechalDbgAttr::attrOutput, |
| m_encStatsBuffers.m_mbEncFrameStats.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| ) |
| #endif |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::InitKernelState() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // InitKernelStateMbEnc |
| m_numMbEncEncKrnStates = CODECHAL_HEVC_FEI_MBENC_NUM_BXT_SKL; |
| |
| m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates); |
| |
| m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory( |
| sizeof(GenericBindingTable) * m_numMbEncEncKrnStates); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable); |
| |
| auto krnStateIdx = m_mbEncKernelStates; |
| |
| for (uint32_t KrnStateIdx = 0; KrnStateIdx < m_numMbEncEncKrnStates; KrnStateIdx++) |
| { |
| auto kernelSize = m_combinedKernelSize; |
| CODECHAL_KERNEL_HEADER currKrnHeader; |
| |
| if (KrnStateIdx == CODECHAL_HEVC_FEI_MBENC_DS_COMBINED && |
| m_numMbEncEncKrnStates == CODECHAL_HEVC_FEI_MBENC_NUM_BXT_SKL) //Ignore. It isn't used on BXT. |
| { |
| krnStateIdx++; |
| continue; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize( |
| m_kernelBinary, |
| ENC_MBENC, |
| KrnStateIdx, |
| &currKrnHeader, |
| &kernelSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncKernelParams( |
| &krnStateIdx->KernelParams, |
| KrnStateIdx)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetMbEncBindingTable( |
| &m_mbEncKernelBindingTable[KrnStateIdx], KrnStateIdx)); |
| |
| krnStateIdx->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData(); |
| krnStateIdx->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| krnStateIdx->KernelParams.iSize = kernelSize; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested( |
| m_stateHeapInterface, |
| krnStateIdx->KernelParams.iBTCount, |
| &krnStateIdx->dwSshSize, |
| &krnStateIdx->dwBindingTableSize)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, krnStateIdx)); |
| |
| krnStateIdx++; |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::GetKernelHeaderAndSize( |
| void *binary, |
| EncOperation operation, |
| uint32_t krnStateIdx, |
| void *krnHeader, |
| uint32_t *krnSize) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(binary); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize); |
| |
| PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL kernelHeaderTable = (PCODECHAL_ENC_HEVC_KERNEL_HEADER_FEI_G9_SKL)binary; |
| PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr; |
| |
| if (operation == ENC_SCALING4X) |
| { |
| currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_DS4HME; |
| } |
| else if (operation == ENC_ME) |
| { |
| // SKL supports P frame. P HME index CODECHAL_ENCODE_ME_IDX_P is 0 and B HME index CODECHAL_ENCODE_ME_IDX_B is 1 |
| if (krnStateIdx == 0) |
| { |
| currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_P_HME; |
| } |
| else |
| { |
| currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_B_HME; |
| } |
| } |
| else if (operation == ENC_BRC) |
| { |
| switch (krnStateIdx) |
| { |
| case CODECHAL_HEVC_BRC_COARSE_INTRA: |
| currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_COARSE; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| } |
| else if (operation == ENC_MBENC) |
| { |
| switch (krnStateIdx) |
| { |
| case CODECHAL_HEVC_MBENC_2xSCALING: |
| case CODECHAL_HEVC_MBENC_32x32MD: |
| case CODECHAL_HEVC_MBENC_16x16SAD: |
| case CODECHAL_HEVC_MBENC_16x16MD: |
| case CODECHAL_HEVC_MBENC_8x8PU: |
| case CODECHAL_HEVC_MBENC_8x8FMODE: |
| case CODECHAL_HEVC_MBENC_32x32INTRACHECK: |
| case CODECHAL_HEVC_FEI_MBENC_BENC: |
| currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_I_2xDownSampling_Kernel; |
| currKrnHeader += krnStateIdx; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_BPAK: |
| currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_PB_Pak; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_DS_COMBINED: |
| currKrnHeader = &kernelHeaderTable->Hevc_FEI_LCUEnc_DS_Combined; |
| break; |
| |
| case CODECHAL_HEVC_FEI_MBENC_PENC: |
| currKrnHeader = &kernelHeaderTable->HEVC_FEI_LCUEnc_P_MB; |
| break; |
| |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| } |
| else |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested"); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| *((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader; |
| |
| PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1); |
| PCODECHAL_KERNEL_HEADER invalidEntry = (PCODECHAL_KERNEL_HEADER)(((uint8_t*)binary) + sizeof(*kernelHeaderTable)); |
| uint32_t nextKrnOffset = *krnSize; |
| |
| if (nextKrnHeader < invalidEntry) |
| { |
| nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT; |
| } |
| *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT); |
| |
| return eStatus; |
| } |
| |
| #ifndef HEVC_FEI_ENABLE_CMRT |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode2xScalingKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_MBENC_2xSCALING; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto pScalingBindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo( |
| m_osInterface, |
| &m_scaled2xSurface)); |
| |
| // Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| //Setup CURBE |
| MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.PicWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.PicHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| uint32_t startBTI = 0; |
| curbe->DW8.BTI_Src_Y = pScalingBindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW9.BTI_Dst_Y = pScalingBindingTable->dwBindingTableEntries[startBTI++]; |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable( |
| &cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| |
| // Add surface states, 2X scaling uses U16Norm surface format |
| startBTI = 0; |
| |
| // Source surface/s |
| auto surfaceCodecParams = &m_surfaceParams[SURFACE_RAW_Y]; |
| surfaceCodecParams->bUse16UnormSurfaceFormat = true; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_Y, |
| &pScalingBindingTable->dwBindingTableEntries[startBTI++] |
| )); |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_mmcState); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_mmcState->SetSurfaceParams(surfaceCodecParams)); |
| |
| // Destination surface/s |
| m_scaled2xSurface.dwWidth = MOS_ALIGN_CEIL((m_frameWidth / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_WIDTH); |
| m_scaled2xSurface.dwHeight = MOS_ALIGN_CEIL((m_frameHeight / SCALE_FACTOR_2x), CODECHAL_MACROBLOCK_HEIGHT); |
| |
| m_surfaceParams[SURFACE_Y_2X].bUse16UnormSurfaceFormat = |
| m_surfaceParams[SURFACE_Y_2X].bIsWritable = |
| m_surfaceParams[SURFACE_Y_2X].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_Y_2X, |
| &pScalingBindingTable->dwBindingTableEntries[startBTI++] |
| )); |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| // check kernel of Downscaling 2x kernels for Ultra HME. |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; |
| // The frame kernel process 32x32 input pixels and output 16x16 down sampled pixels |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5; |
| /* Enforce no dependency dispatch order for Scaling kernel, */ |
| walkerCodecParams.bNoDependency = true; |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32x32PuModeDecisionKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32MD; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| // Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| //Setup CURBE |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| |
| CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR); |
| int32_t sliceQp = CalSliceQp(); |
| |
| double lambdaScalingFactor = 1.0; |
| double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp]; |
| double squaredQpLambda = qpLambda * qpLambda; |
| m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10)); |
| |
| CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/; |
| curbe->DW1.PuType = 0; // 32x32 PU |
| curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW1.SliceQp = sliceQp; |
| curbe->DW1.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled; |
| curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| |
| curbe->DW2.Lambda = m_fixedPointLambda; |
| |
| curbe->DW3.ModeCost32x32 = 0; |
| |
| curbe->DW4.EarlyExit = (uint32_t)-1; |
| if (curbe->DW1.EnableStatsDataDump) |
| { |
| double lambdaMd; |
| float hadBias = 2.0f; |
| |
| lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp]; |
| lambdaMd = lambdaMd * hadBias; |
| curbe->DW5.NewLambdaForHaarTransform = (uint32_t)(lambdaMd*(1<<10)); |
| } |
| |
| uint32_t startIndex = 0; |
| curbe->DW8.BTI_32x32PU_Output = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW9.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++]; |
| startIndex++; // skip one BTI for Y and UV have the same BTI |
| curbe->DW10.BTI_Src_Y2x = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW12.BTI_Src_Y2x_VME = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW13.BTI_Brc_Input = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW15.BTI_Brc_Data = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW16.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW17.BTI_Kernel_Debug = bindingTable->dwBindingTableEntries[startIndex++]; |
| |
| CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable( |
| &cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| |
| //Add surface states |
| startIndex = 0; |
| |
| // 32x32 PU output |
| m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable = |
| m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_32x32_PU_OUTPUT, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Source Y and UV |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_Y_UV, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| startIndex ++; // UV index |
| |
| // Source Y2x |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_Y_2X, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Slice map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Source Y2x for VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_Y_2X_VME, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // BRC Input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_INPUT, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // LCU Qp surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_LCU_QP, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // BRC data surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_DATA, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // skip statstics data dump surface because it is not used |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; /* looping for Walker is needed at 8x8 block level */ |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5; |
| walkerCodecParams.bNoDependency = true; /* Enforce no dependency dispatch order for 32x32 MD kernel */ |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16SadPuComputationKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16SAD; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| //Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| // Setup CURBE |
| CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd; |
| |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| |
| curbe->DW2.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW2.SimFlagForInter = false; |
| if(m_hevcPicParams->CodingType != I_TYPE) |
| { |
| curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance; |
| } |
| |
| uint32_t startIndex = 0; |
| curbe->DW8.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++]; |
| startIndex++; // skip UV BTI |
| curbe->DW9.BTI_Sad_16x16_PU_Output = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW10.BTI_32x32_Pu_ModeDecision = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW12.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW13.BTI_Debug = bindingTable->dwBindingTableEntries[startIndex++]; |
| |
| CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable( |
| &cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| |
| //Add surface states |
| startIndex = 0; |
| |
| // Source Y and UV |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_Y_UV, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| startIndex++; |
| |
| // 16x16 PU SAD output |
| m_surfaceParams[SURFACE_16x16PU_SAD].bIsWritable = |
| m_surfaceParams[SURFACE_16x16PU_SAD].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_16x16PU_SAD, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // 32x32 PU MD data |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_32x32_PU_OUTPUT, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Slice map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Simplest Intra |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SIMPLIFIED_INTRA, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| /* looping for Walker is needed at 16x16 block level */ |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 16) >> 4; |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 16) >> 4; |
| /* Enforce no dependency dispatch order for the 16x16 SAD kernel */ |
| walkerCodecParams.bNoDependency = true; |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16PuModeDecisionKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_MBENC_16x16MD; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| // Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| // Setup CURBE |
| int32_t sliceQp = CalSliceQp(); |
| uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| |
| double lambdaScaleFactor = 0.46 + sliceQp - 22; |
| if (lambdaScaleFactor < 0) |
| { |
| lambdaScaleFactor = 0.46; |
| } |
| |
| if (lambdaScaleFactor > 15) |
| { |
| lambdaScaleFactor = 15; |
| } |
| |
| double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6); |
| m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10)); |
| |
| double lambdaScalingFactor = 1.0; |
| double qpLambda = m_qpLambdaMd[sliceType][sliceQp]; |
| double squaredQpLambda = qpLambda * qpLambda; |
| m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10)); |
| |
| LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR); |
| |
| CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.Log2MaxCUSize = log2MaxCUSize; |
| curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW1.SliceQp = sliceQp; |
| |
| curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma; |
| |
| curbe->DW3.LambdaScalingFactor = 1; |
| curbe->DW3.SliceType = sliceType; |
| curbe->DW3.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| curbe->DW3.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW3.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW3.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW3.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW3.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh; |
| //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel |
| curbe->DW3.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW3.HalfUpdateMixedLCU = 0; |
| curbe->DW3.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| |
| curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0; |
| curbe->DW4.IntraComputeType = 1; |
| curbe->DW4.AVCIntra8x8Mask = 0; |
| curbe->DW4.IntraSadAdjust = 2; |
| |
| double lambdaMd = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3)); |
| squredLambda = lambdaMd * lambdaMd; |
| uint32_t newLambda = (uint32_t)(squredLambda*(1<<10)); |
| curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda; |
| |
| curbe->DW6.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| |
| curbe->DW7.ModeCostIntraNonPred = m_modeCost[0]; |
| curbe->DW7.ModeCostIntra16x16 = m_modeCost[1]; |
| curbe->DW7.ModeCostIntra8x8 = m_modeCost[2]; |
| curbe->DW7.ModeCostIntra4x4 = m_modeCost[3]; |
| |
| curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma; |
| |
| if (m_hevcPicParams->bEnableRollingIntraRefresh) |
| { |
| curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| } |
| |
| curbe->DW10.SimplifiedFlagForInter = 0; |
| if (m_encodeParams.bReportStatisticsEnabled) |
| { |
| curbe->DW10.HaarTransformMode = true; |
| } |
| else |
| { |
| curbe->DW10.HaarTransformMode = (m_hevcPicParams->CodingType == I_TYPE)? false: true; |
| } |
| |
| uint32_t startBTI = 0; |
| curbe->DW16.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++]; |
| startBTI++; // skip UV BTI |
| curbe->DW17.BTI_Sad_16x16_PU = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW18.BTI_PAK_Object = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW19.BTI_SAD_32x32_PU_mode = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW20.BTI_VME_Mode_8x8 = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW21.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW22.BTI_VME_Src = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW23.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW24.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW25.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW26.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW27.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++]; |
| |
| CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable( |
| &cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| |
| //Add surface states |
| startBTI = 0; |
| |
| // Source Y and UV: |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_Y_UV, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| startBTI++; |
| |
| // 16x16 PU SAD output |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_16x16PU_SAD, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // PAK object output |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_CU_RECORD, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 32x32 PU MD data |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_32x32_PU_OUTPUT, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // VME 8x8 mode |
| m_surfaceParams[SURFACE_VME_8x8].bIsWritable = |
| m_surfaceParams[SURFACE_VME_8x8].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_VME_8x8, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Slice map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Source Y for VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_VME, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // BRC Input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_INPUT, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Simplest Intra |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SIMPLIFIED_INTRA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // LCU Qp surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_LCU_QP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // BRC data surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_DATA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| /* looping for Walker is needed at 32x32 block level in OPT case*/ |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5; |
| walkerCodecParams.bNoDependency = true; |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8PU; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| // Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| // Setup CURBE |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW1.PuType = 2; // 8x8 |
| curbe->DW1.DcFilterFlag = true; |
| curbe->DW1.AngleRefineFlag = true; |
| curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/; |
| curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW1.QPValue = CalSliceQp(); |
| if (m_hevcPicParams->bEnableRollingIntraRefresh) |
| { |
| curbe->DW1.EnableRollingIntra = true; |
| curbe->DW1.IntraRefreshEn = true; |
| curbe->DW1.HalfUpdateMixedLCU = 0; |
| |
| curbe->DW5.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW5.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| int32_t qp = CalSliceQp(); |
| curbe->DW1.QPValue = (uint32_t)qp; |
| } |
| |
| curbe->DW2.LumaLambda = m_fixedPointLambdaForLuma; |
| |
| curbe->DW3.ChromaLambda = m_fixedPointLambdaForChroma; |
| |
| if (m_encodeParams.bReportStatisticsEnabled) |
| { |
| curbe->DW4.HaarTransformFlag = true; |
| } |
| else |
| { |
| curbe->DW4.HaarTransformFlag = (m_hevcPicParams->CodingType == I_TYPE) ? false : true; |
| } |
| curbe->DW4.SimplifiedFlagForInter = false; |
| |
| uint32_t startBTI = 0; |
| curbe->DW8.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++]; |
| startBTI++; // skip one BTI for Y and UV have the same BTI |
| curbe->DW9.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW10.BTI_VME_8x8_Mode = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW11.BTI_Intra_Mode = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW12.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW14.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW15.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW16.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++]; |
| |
| CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| if(m_numMb8x8IntraKernelSplit == 0) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; |
| MOS_ZeroMemory(&idParams, sizeof(idParams)); |
| idParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( |
| m_stateHeapInterface, |
| 1, |
| &idParams)); |
| |
| // Add binding table |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( |
| m_stateHeapInterface, |
| kernelState)); |
| } |
| |
| //Add surface states |
| startBTI = 0; |
| |
| // Source Y and UV |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_Y_UV, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| startBTI++; |
| |
| // Slice Map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // VME 8x8 mode |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_VME_8x8, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Intra mode |
| m_surfaceParams[SURFACE_INTRA_MODE].bIsWritable = |
| m_surfaceParams[SURFACE_INTRA_MODE].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_INTRA_MODE, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // BRC Input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_INPUT, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Simplest Intra |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SIMPLIFIED_INTRA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // LCU Qp surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_LCU_QP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // BRC data surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_DATA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| // each EU is based on one 8x8 block |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) >> 3; |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT) >> 3; |
| /* Enforce no dependency dispatch order for 8x8 PU kernel */ |
| walkerCodecParams.bNoDependency = true; |
| |
| if(m_numMb8x8IntraKernelSplit == 0) |
| { |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| } |
| else |
| { |
| uint32_t numRowPerSplit = (walkerCodecParams.dwResolutionY + m_numMb8x8IntraKernelSplit - 1) / m_numMb8x8IntraKernelSplit; |
| uint32_t currentNumRow = 0; |
| |
| for(uint32_t i = 0; i < m_numMb8x8IntraKernelSplit; i++) |
| { |
| // Program render engine pipe commands |
| SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams(); |
| sendKernelCmdsParams.EncFunctionType = encFunctionType; |
| sendKernelCmdsParams.pKernelState = kernelState; |
| sendKernelCmdsParams.bEnableCustomScoreBoard= true; |
| sendKernelCmdsParams.pCustomScoreBoard = &m_walkingPatternParam.ScoreBoard; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| if(currentNumRow + numRowPerSplit >= walkerCodecParams.dwResolutionY) |
| { |
| // the last split may not have the same number of rows as previous splits |
| numRowPerSplit = walkerCodecParams.dwResolutionY - currentNumRow; |
| } |
| |
| walkerParams.LocalStart.y = currentNumRow; |
| walkerParams.dwLocalLoopExecCount = numRowPerSplit * walkerCodecParams.dwResolutionX; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| currentNumRow += numRowPerSplit; |
| if(currentNumRow >= walkerCodecParams.dwResolutionY) |
| { |
| break; |
| } |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUFMODEKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_MBENC_8x8FMODE; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| // Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| // Setup CURBE |
| int32_t qp = CalSliceQp(); |
| uint32_t sliceQp = (uint32_t)qp; |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| |
| CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW1.PuType = 2; |
| curbe->DW1.PakReordingFlag = (m_hevcPicParams->CodingType == I_TYPE)? true : false; |
| curbe->DW1.LCUType = (log2MaxCUSize == 6)? 0 /*64x64*/: 1 /*32x32*/; |
| curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW1.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW1.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW1.HalfUpdateMixedLCU = 0; |
| curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW2.LambdaForLuma = m_fixedPointLambdaForLuma; |
| |
| if (m_hevcPicParams->CodingType != I_TYPE || |
| m_encodeParams.bReportStatisticsEnabled) |
| { |
| float hadBias = 2.0f; |
| |
| double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp]; |
| lambdaMd = lambdaMd * hadBias; |
| curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10)); |
| } |
| curbe->DW4.ModeCostFor8x8PU_TU8 = 0; |
| curbe->DW5.ModeCostFor8x8PU_TU4 = 0; |
| curbe->DW6.SATD16x16PuThreshold = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0); |
| curbe->DW6.BiasFactorToward8x8 = (m_hevcPicParams->bScreenContent) ? 1024 : 1126+102; |
| curbe->DW7.Qp = sliceQp; |
| curbe->DW7.QpForInter = 0; |
| curbe->DW8.SimplifiedFlagForInter = false; |
| curbe->DW8.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled; |
| // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+ |
| curbe->DW8.KBLControlFlag = UsePlatformControlFlag(); |
| curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| uint32_t startBTI = 0; |
| curbe->DW16.BTI_PAK_Object = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW17.BTI_VME_8x8_Mode = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW18.BTI_Intra_Mode = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW19.BTI_PAK_Command = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW20.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW21.BTI_IntraDist = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW22.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW23.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW24.BTI_LCU_Qp_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW25.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW26.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW27.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW28.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW29.BTI_CTB_Distortion_Surface = 0; |
| startBTI++; |
| curbe->DW30.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++]; |
| |
| CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable( |
| &cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| |
| //Add surface states |
| startBTI = 0; |
| |
| // PAK object |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_CU_RECORD, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // VME 8x8 mode |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_VME_8x8, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Intra mode |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_INTRA_MODE, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // PAK command |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_HCP_PAK, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Slice Map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Intra dist |
| m_surfaceParams[SURFACE_INTRA_DIST].bIsWritable = |
| m_surfaceParams[SURFACE_INTRA_DIST].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_INTRA_DIST, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // BRC Input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_INPUT, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // Simplest Intra |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SIMPLIFIED_INTRA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // LCU Qp surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_LCU_QP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // BRC data surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_DATA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // skip haar distortion surface, statstics data dump surface |
| // and frame level statstics data surface because they are not used |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| // each EU is based on one LCU |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, (1<<log2MaxCUSize)) >> log2MaxCUSize; |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, (1<<log2MaxCUSize)) >> log2MaxCUSize; |
| /* Enforce no dependency dispatch order for 8x8 PU FMODE kernel */ |
| walkerCodecParams.bNoDependency = true; |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32X32BIntraCheckKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_MBENC_32x32INTRACHECK; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| // Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| // Setup CURBE |
| if (m_pictureCodingType == P_TYPE) |
| { |
| CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR); |
| } |
| else |
| { |
| CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR); |
| } |
| int32_t sliceQp = CalSliceQp(); |
| |
| double lambdaScalingFactor = 1.0; |
| double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp]; |
| double squaredQpLambda = qpLambda * qpLambda; |
| m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10)); |
| |
| CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1; |
| curbe->DW1.Flags = 0; |
| curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW1.SliceType = m_hevcSliceParams->slice_type; |
| curbe->DW1.HMEEnable = 0; |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| |
| curbe->DW2.QpMultiplier = 100; |
| curbe->DW2.QpValue = 0; // MBZ |
| |
| uint32_t startIndex = 0; |
| curbe->DW8.BTI_Per32x32PuIntraCheck = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW9.BTI_Src_Y = bindingTable->dwBindingTableEntries[startIndex++]; |
| startIndex++; // skip one BTI for Y and UV have the same BTI |
| curbe->DW10.BTI_Src_Y2X = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW11.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW12.BTI_VME_Y2X = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW13.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW14.BTI_HME_MVPred = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW15.BTI_HME_Dist = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW16.BTI_LCU_Skip = bindingTable->dwBindingTableEntries[startIndex++]; |
| curbe->DW17.BTI_Debug = bindingTable->dwBindingTableEntries[startIndex++]; |
| |
| CODECHAL_ENCODE_ASSERT(startIndex == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable( |
| &cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| |
| //Add surface states |
| startIndex = 0; |
| |
| // 32x32 PU B Intra Check Output |
| m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bIsWritable = |
| m_surfaceParams[SURFACE_32x32_PU_OUTPUT].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_32x32_PU_OUTPUT, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Source Y and UV |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_Y_UV, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| startIndex++; |
| |
| // Source Y2x |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_Y_2X, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Slice map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Source Y2x for VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_Y_2X_VME, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // Simplest Intra |
| m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bIsWritable = |
| m_surfaceParams[SURFACE_SIMPLIFIED_INTRA].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SIMPLIFIED_INTRA, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| // skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME |
| startIndex += 2; |
| |
| // LCU Qp/Skip surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_LCU_QP, |
| &bindingTable->dwBindingTableEntries[startIndex++])); |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| /* looping for Walker is needed at 8x8 block level */ |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5; |
| /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel */ |
| walkerCodecParams.bNoDependency = true; |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8BPakKernel( |
| PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(pEncBCurbe); |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_FEI_MBENC_BPAK; |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| //Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| //Setup CURBE |
| CODECHAL_FEI_HEVC_B_PAK_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.MaxVmvR = pEncBCurbe->DW44.MaxVmvR; |
| curbe->DW1.Qp = pEncBCurbe->DW13.QpPrimeY; |
| curbe->DW2.BrcEnable = pEncBCurbe->DW36.BRCEnable; |
| curbe->DW2.LcuBrcEnable = pEncBCurbe->DW36.LCUBRCEnable; |
| curbe->DW2.ScreenContent = pEncBCurbe->DW47.ScreenContentFlag; |
| curbe->DW2.SimplestIntraEnable = pEncBCurbe->DW47.SkipIntraKrnFlag; |
| curbe->DW2.SliceType = pEncBCurbe->DW4.SliceType; |
| curbe->DW2.EnableWA = 0; |
| curbe->DW2.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW2.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+ |
| curbe->DW2.KBLControlFlag = UsePlatformControlFlag(); |
| curbe->DW2.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW2.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW3.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW3.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW3.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| uint32_t startBTI = 0; |
| curbe->DW16.BTI_CU_Record = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW17.BTI_PAK_Obj = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW18.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW19.BTI_Brc_Input = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW20.BTI_LCU_Qp = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW21.BTI_Brc_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW22.BTI_MB_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW23.BTI_MVP_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW24.BTI_WA_PAK_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW25.BTI_WA_PAK_Obj = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW26.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++]; |
| |
| CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable( |
| &cmdBuffer, |
| kernelState, |
| encFunctionType, |
| nullptr)); |
| |
| //Add surface states |
| startBTI = 0; |
| //0: CU record |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_CU_RECORD, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| //1: PAK command |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_HCP_PAK, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| //2: slice map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 3: BRC Input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_INPUT, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 4: LCU Qp |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_LCU_QP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 5: LCU BRC constant |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_DATA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 6: MV index buffer or MB data |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_MB_MV_INDEX, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 7: MVP index buffer |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_MVP_INDEX, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // skip 8 and 9 for SURFACE_WA_CU_RECORD and SURFACE_WA_HCP_PAK |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams; |
| MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams)); |
| walkerCodecParams.WalkerMode = m_walkerMode; |
| /* looping for Walker is needed at 8x8 block level */ |
| walkerCodecParams.dwResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5; |
| walkerCodecParams.dwResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5; |
| /* Enforce no dependency dispatch order for 32x32 B Intra Check kernel */ |
| walkerCodecParams.bNoDependency = true; |
| walkerCodecParams.wPictureCodingType = m_pictureCodingType; |
| walkerCodecParams.bUseScoreboard = m_useHwScoreboard; |
| |
| MHW_WALKER_PARAMS walkerParams; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams( |
| m_hwInterface, |
| &walkerParams, |
| &walkerCodecParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PBMbEncKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL); |
| |
| uint32_t krnIdx = CODECHAL_HEVC_FEI_MBENC_BENC; |
| if (m_pictureCodingType == P_TYPE) |
| { |
| //krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_FEI_MBENC_ADV_P : CODECHAL_HEVC_FEI_MBENC_PENC; |
| krnIdx = CODECHAL_HEVC_FEI_MBENC_PENC; |
| } |
| else if (m_pictureCodingType == B_TYPE) |
| { |
| // In TU7, we still need the original ENC B kernel to process the I frame |
| //krnIdx = m_hevcPicParams->bEnableRollingIntraRefresh ? CODECHAL_HEVC_FEI_MBENC_ADV : CODECHAL_HEVC_FEI_MBENC_BENC; |
| krnIdx = CODECHAL_HEVC_FEI_MBENC_BENC; |
| } |
| |
| auto kernelState = &m_mbEncKernelStates[krnIdx]; |
| auto bindingTable = &m_mbEncKernelBindingTable[krnIdx]; |
| if (m_firstTaskInPhase || !m_singleTaskPhaseSupported) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(RequestSshAndVerifyCommandBufferSize(kernelState)); |
| } |
| |
| int32_t sliceQp = CalSliceQp(); |
| uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType); |
| |
| if (m_feiPicParams->FastIntraMode) |
| { |
| // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped. |
| CalcLambda(sliceType, INTRA_TRANSFORM_HAAR); |
| } |
| LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR); |
| |
| uint8_t mbCodeIdxForTempMVP = 0xFF; |
| if(m_pictureCodingType != I_TYPE) |
| { |
| if(m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC) |
| { |
| uint8_t FrameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx; |
| |
| mbCodeIdxForTempMVP = m_refList[FrameIdx]->ucScalingIdx; |
| } |
| |
| if(mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag) |
| { |
| // Temporal reference MV index is invalid and so disable the temporal MVP |
| CODECHAL_ENCODE_ASSERT(false); |
| m_hevcSliceParams->slice_temporal_mvp_enable_flag = false; |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion()); |
| |
| //Setup DSH |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace( |
| m_stateHeapInterface, |
| kernelState, |
| false, |
| 0, |
| false, |
| m_storeData)); |
| |
| //Setup CURBE |
| uint8_t forwardTransformThd[7] = { 0 }; |
| CalcForwardCoeffThd(forwardTransformThd, sliceQp); |
| |
| uint32_t curbeSize = 0; |
| void *defaultCurbe = (void *)GetDefaultCurbeEncBKernel(curbeSize); |
| CODECHAL_ENCODE_ASSERT(defaultCurbe); |
| |
| CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize)); |
| |
| bool transform_8x8_mode_flag = true; |
| uint32_t SearchPath = (m_feiPicParams->SearchWindow == 5) ? 2 : 1; // 2 means full search, 1 means diamand search |
| uint32_t LenSP = m_feiPicParams->LenSP; |
| uint32_t RefWidth = m_feiPicParams->RefWidth; |
| uint32_t RefHeight = m_feiPicParams->RefHeight; |
| |
| switch (m_feiPicParams->SearchWindow) |
| { |
| case 0: |
| // not use predefined search window |
| if((m_feiPicParams->SearchPath != 0) && (m_feiPicParams->SearchPath != 1) && (m_feiPicParams->SearchPath != 2)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input SearchPath for SearchWindow=0 case!!!."); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| SearchPath = m_feiPicParams->SearchPath; |
| if(((RefWidth * RefHeight) > 2048) || (RefWidth > 64) || (RefHeight > 64)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input RefWidth/RefHeight size for SearchWindow=0 case!!!."); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| break; |
| case 1: |
| // Tiny SUs 24x24 window |
| RefWidth = 24; |
| RefHeight = 24; |
| LenSP = 4; |
| break; |
| case 2: |
| // Small SUs 28x28 window |
| RefWidth = 28; |
| RefHeight = 28; |
| LenSP = 9; |
| break; |
| case 3: |
| // Diamond SUs 48x40 window |
| RefWidth = 48; |
| RefHeight = 40; |
| LenSP = 16; |
| break; |
| case 4: |
| // Large Diamond SUs 48x40 window |
| RefWidth = 48; |
| RefHeight = 40; |
| LenSP = 32; |
| break; |
| case 5: |
| // Exhaustive SUs 48x40 window |
| RefWidth = 48; |
| RefHeight = 40; |
| LenSP = 48; |
| if (m_hevcSeqParams->TargetUsage != 7) |
| { |
| if (m_pictureCodingType == B_TYPE) |
| { |
| LenSP = 48; |
| } else { |
| LenSP = 57; |
| } |
| } else { |
| LenSP = 25; |
| } |
| break; |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC SearchWindow value for HEVC FEI on SKL!!!."); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if((m_pictureCodingType == B_TYPE) && (curbe->DW3.BMEDisableFBR == 0)) |
| { |
| if(RefWidth > 32) |
| { |
| RefWidth = 32; |
| } |
| if(RefHeight > 32) |
| { |
| RefHeight = 32; |
| } |
| } |
| |
| curbe->DW0.AdaptiveEn = m_feiPicParams->AdaptiveSearch; |
| curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag; |
| curbe->DW2.PicWidth = m_picWidthInMb; |
| curbe->DW2.LenSP = LenSP; |
| curbe->DW3.SrcAccess = curbe->DW3.RefAccess = 0; |
| if (m_feiPicParams->FastIntraMode) |
| { |
| curbe->DW3.FTEnable = (m_ftqBasedSkip[0x07] >> 1) & 0x01; |
| } |
| else |
| { |
| curbe->DW3.FTEnable = (m_ftqBasedSkip[0x04] >> 1) & 0x01; |
| } |
| curbe->DW3.SubPelMode = m_feiPicParams->SubPelMode; |
| |
| curbe->DW4.PicHeightMinus1 = m_picHeightInMb - 1; |
| curbe->DW4.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled; |
| curbe->DW4.HMEEnable = 0; |
| curbe->DW4.SliceType = sliceType; |
| curbe->DW4.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW4.UseActualRefQPValue = false; |
| |
| curbe->DW5.RefWidth = RefWidth; |
| curbe->DW5.RefHeight = RefHeight; |
| |
| curbe->DW7.IntraPartMask = 0x3; |
| |
| curbe->DW6.FrameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH; |
| curbe->DW6.FrameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT; |
| |
| curbe->DW8.Mode0Cost = m_modeCost[0]; |
| curbe->DW8.Mode1Cost = m_modeCost[1]; |
| curbe->DW8.Mode2Cost = m_modeCost[2]; |
| curbe->DW8.Mode3Cost = m_modeCost[3]; |
| |
| curbe->DW9.Mode4Cost = m_modeCost[4]; |
| curbe->DW9.Mode5Cost = m_modeCost[5]; |
| curbe->DW9.Mode6Cost = m_modeCost[6]; |
| curbe->DW9.Mode7Cost = m_modeCost[7]; |
| |
| curbe->DW10.Mode8Cost= m_modeCost[8]; |
| curbe->DW10.Mode9Cost= m_modeCost[9]; |
| curbe->DW10.RefIDCost = m_modeCost[10]; |
| curbe->DW10.ChromaIntraModeCost = m_modeCost[11]; |
| |
| curbe->DW11.MV0Cost = m_mvCost[0]; |
| curbe->DW11.MV1Cost = m_mvCost[1]; |
| curbe->DW11.MV2Cost = m_mvCost[2]; |
| curbe->DW11.MV3Cost = m_mvCost[3]; |
| |
| curbe->DW12.MV4Cost = m_mvCost[4]; |
| curbe->DW12.MV5Cost = m_mvCost[5]; |
| curbe->DW12.MV6Cost = m_mvCost[6]; |
| curbe->DW12.MV7Cost = m_mvCost[7]; |
| |
| curbe->DW13.QpPrimeY = sliceQp; |
| uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only |
| int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8; |
| int32_t qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset)); |
| int32_t QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30]; |
| curbe->DW13.QpPrimeCb= QPc + qpBdOffsetC; |
| qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset)); |
| QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30]; |
| curbe->DW13.QpPrimeCr= QPc; |
| |
| curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0]; |
| curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1]; |
| curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2]; |
| |
| curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3]; |
| curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4]; |
| curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5]; |
| curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6]; |
| |
| if (SearchPath == 1) |
| { |
| // diamond search |
| if (m_pictureCodingType == P_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7PCurbeInit[16]), 14 * sizeof(uint32_t))); |
| } |
| else if (m_pictureCodingType == B_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7BCurbeInit[16]), 14 * sizeof(uint32_t))); |
| } |
| } |
| else if((SearchPath != 0) && (SearchPath != 2)) |
| { |
| // default 0 and 2 are full sparil search |
| CODECHAL_ENCODE_ASSERT(false); |
| } |
| |
| curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp]; |
| |
| if(m_pictureCodingType == I_TYPE) |
| { |
| *(float*)&(curbe->DW34.LambdaME) = 0.0; |
| } |
| else if (m_pictureCodingType == P_TYPE) |
| { |
| *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp]; |
| } |
| else |
| { |
| *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp]; |
| } |
| |
| curbe->DW35.ModeCostSp = m_modeCostSp; |
| curbe->DW35.SimpIntraInterThreshold = m_simplestIntraInterThreshold; |
| |
| curbe->DW36.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1; |
| curbe->DW36.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1; |
| curbe->DW36.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW36.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW36.PowerSaving = m_powerSavingEnabled; |
| curbe->DW36.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW36.FASTSurveillanceFlag= (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| |
| if(m_pictureCodingType != I_TYPE) |
| { |
| curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0); |
| curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1); |
| curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2); |
| curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3); |
| curbe->DW41.TextureIntraCostThreshold = 500; |
| |
| if(m_pictureCodingType == B_TYPE) { |
| curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0); |
| curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1); |
| float lambda_me = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp]; |
| if (m_encodeParams.bQualityImprovementEnable) |
| { |
| curbe->DW40.TransformThreshold0 = (uint16_t) (lambda_me * 56.25 + 0.5); |
| curbe->DW40.TransformThreshold1 = (uint16_t) (lambda_me * 21 + 0.5); |
| curbe->DW41.TransformThreshold2 = (uint16_t) (lambda_me * 9 + 0.5); |
| } |
| } |
| } |
| |
| curbe->DW42.NumMVPredictorsL0 = m_feiPicParams->NumMVPredictorsL0; |
| curbe->DW42.NumMVPredictorsL1 = m_feiPicParams->NumMVPredictorsL1; |
| curbe->DW42.PerLCUQP = m_encodeParams.bMbQpDataEnabled; |
| curbe->DW42.PerCTBInput = m_feiPicParams->bPerCTBInput; |
| curbe->DW42.CTBDistortionOutput = m_feiPicParams->bDistortionEnable; |
| curbe->DW42.MultiPredL0 = m_feiPicParams->MultiPredL0; |
| curbe->DW42.MultiPredL1 = m_feiPicParams->MultiPredL1; |
| curbe->DW42.MVPredictorBlockSize = m_feiPicParams->MVPredictorInput; |
| |
| curbe->DW44.MaxVmvR = 511 * 4; |
| curbe->DW44.MaxNumMergeCandidates = m_hevcSliceParams->MaxNumMergeCand; |
| |
| if(m_pictureCodingType != I_TYPE) |
| { |
| curbe->DW44.MaxNumRefList0 = curbe->DW36.NumRefIdxL0MinusOne + 1; |
| |
| curbe->DW45.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag; |
| curbe->DW45.HMECombineLenPslice = 8; |
| if(m_pictureCodingType == B_TYPE) |
| { |
| curbe->DW44.MaxNumRefList1 = curbe->DW36.NumRefIdxL1MinusOne + 1; |
| curbe->DW45.HMECombineLenBslice = 8; |
| } |
| } |
| |
| curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2; |
| |
| curbe->DW46.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2; |
| curbe->DW46.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW46.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| curbe->DW46.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| |
| curbe->DW47.NumRegionsInSlice = m_numRegionsInSlice; |
| curbe->DW47.TypeOfWalkingPattern = m_enable26WalkingPattern; |
| curbe->DW47.ChromaFlatnessCheckFlag= (m_feiPicParams->FastIntraMode) ? 0 : 1; |
| curbe->DW47.EnableIntraEarlyExit = (m_feiPicParams->FastIntraMode) ? 0 : 1; |
| curbe->DW47.SkipIntraKrnFlag = (m_feiPicParams->FastIntraMode) ? 1 : 0; |
| curbe->DW47.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag; |
| curbe->DW47.IsLowDelay = m_lowDelay; |
| curbe->DW47.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| curbe->DW47.MultiSliceFlag = (m_numSlices > 1); |
| curbe->DW47.ArbitarySliceFlag = m_arbitraryNumMbsInSlice; |
| curbe->DW47.NumRegionMinus1 = m_walkingPatternParam.dwNumRegion - 1; |
| |
| if(m_pictureCodingType != I_TYPE) |
| { |
| curbe->DW48.CurrentTdL0_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]); |
| curbe->DW48.CurrentTdL0_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]); |
| curbe->DW49.CurrentTdL0_2 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]); |
| curbe->DW49.CurrentTdL0_3 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]); |
| if(m_pictureCodingType == B_TYPE) { |
| curbe->DW50.CurrentTdL1_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]); |
| curbe->DW50.CurrentTdL1_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]); |
| } |
| } |
| |
| curbe->DW52.NumofUnitInRegion = m_walkingPatternParam.dwNumUnitsInRegion; |
| curbe->DW52.MaxHeightInRegion = m_walkingPatternParam.dwMaxHeightInRegion; |
| |
| uint32_t startBTI = 0; |
| curbe->DW56.BTI_CU_Record = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW57.BTI_PAK_Cmd = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW58.BTI_Src_Y = bindingTable->dwBindingTableEntries[startBTI++]; |
| startBTI++; //skip UV index |
| curbe->DW59.BTI_Intra_Dist = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW60.BTI_Min_Dist = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW61.BTI_HMEMVPredFwdBwdSurfIndex = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW62.BTI_HMEDistSurfIndex = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW63.BTI_Slice_Map = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW64.BTI_VME_Saved_UNI_SIC = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW65.BTI_Simplest_Intra = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW66.BTI_Collocated_RefFrame = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW67.BTI_Reserved = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW68.BTI_BRC_Input = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW69.BTI_LCU_QP = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW70.BTI_BRC_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW71.BTI_VMEInterPredictionSurfIndex= bindingTable->dwBindingTableEntries[startBTI++]; |
| if(m_pictureCodingType == P_TYPE) |
| { |
| //P MBEnc curbe 72~75 are different from B frame. |
| startBTI += (CODECHAL_HEVC_P_MBENC_CONCURRENT_THD_MAP - CODECHAL_HEVC_P_MBENC_VME_FORWARD_0); |
| curbe->DW72.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW73.BTI_MB_Data_CurFrame = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW74.BTI_MVP_CurFrame = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW75.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW76.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW77.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW78.BTI_MVPredictor_Surface= bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW79.BTI_CTB_Input_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW80.BTI_CTB_Distortion_Output_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW81.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++]; |
| } |
| else |
| { |
| startBTI += (CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_VME_FORWARD_0 + 1); |
| |
| curbe->DW72.BTI_VMEInterPredictionBSurfIndex = bindingTable->dwBindingTableEntries[startBTI++]; |
| startBTI += (CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_VME_MUL_BACKWARD_0 + 1); |
| |
| curbe->DW73.BTI_ConcurrentThreadMap= bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW74.BTI_MB_Data_CurFrame = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW75.BTI_MVP_CurFrame = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW76.BTI_Haar_Dist16x16 = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW77.BTI_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW78.BTI_Frame_Stats_Data = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW79.BTI_MVPredictor_Surface= bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW80.BTI_CTB_Input_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW81.BTI_CTB_Distortion_Output_Surface = bindingTable->dwBindingTableEntries[startBTI++]; |
| curbe->DW82.BTI_Debug = bindingTable->dwBindingTableEntries[startBTI++]; |
| } |
| |
| // Intra refresh is enabled. Program related CURBE fields |
| if (m_hevcPicParams->bEnableRollingIntraRefresh) |
| { |
| curbe->DW35.IntraRefreshEn = true; |
| curbe->DW35.FirstIntraRefresh = m_firstIntraRefresh; |
| curbe->DW35.HalfUpdateMixedLCU = 0; |
| curbe->DW35.EnableRollingIntra = true; |
| |
| curbe->DW38.NumFrameInGOB = m_frameNumInGob; |
| curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh; |
| |
| curbe->DW51.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW51.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW51.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| curbe->DW53.IntraRefreshRefHeight = 40; |
| curbe->DW53.IntraRefreshRefWidth = 48; |
| |
| m_firstIntraRefresh = false; |
| m_frameNumWithoutIntraRefresh = 0; |
| } |
| else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames |
| { |
| m_frameNumWithoutIntraRefresh++; |
| } |
| |
| CODECHAL_ENCODE_ASSERT(startBTI == bindingTable->dwNumBindingTableEntries); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC; |
| if (m_pictureCodingType == P_TYPE) |
| { |
| //P frame curbe only use the DW0~DW75 |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd) - sizeof(uint32_t))); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN( |
| AddCurbeToStateHeap(kernelState, encFunctionType, &cmd, sizeof(cmd))); |
| } |
| |
| MOS_COMMAND_BUFFER cmdBuffer; |
| if(m_numMbBKernelSplit == 0) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendKernelCmdsAndBindingTable(&cmdBuffer, |
| kernelState, |
| encFunctionType, |
| &m_walkingPatternParam.ScoreBoard)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCommandBuffer(&cmdBuffer)); |
| |
| MHW_INTERFACE_DESCRIPTOR_PARAMS idParams; |
| MOS_ZeroMemory(&idParams, sizeof(idParams)); |
| idParams.pKernelState = kernelState; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor( |
| m_stateHeapInterface, |
| 1, |
| &idParams)); |
| |
| // Add binding table |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable( |
| m_stateHeapInterface, |
| kernelState)); |
| } |
| |
| //Add surface states |
| startBTI = 0; |
| |
| //0: CU record |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_CU_RECORD, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| //1: PAK command |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_HCP_PAK, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| //2 and 3 Source Y and UV |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_Y_UV, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| startBTI++; |
| |
| //4: Intra dist |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_INTRA_DIST, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| //5: min distortion |
| m_surfaceParams[SURFACE_MIN_DIST].bIsWritable = |
| m_surfaceParams[SURFACE_MIN_DIST].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_MIN_DIST, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 6 and 7, skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME |
| startBTI += 2; |
| |
| //8: slice map |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SLICE_MAP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| //9: VME UNI and SIC data |
| m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bIsWritable = |
| m_surfaceParams[SURFACE_VME_UNI_SIC_DATA].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_VME_UNI_SIC_DATA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| //10: Simplest Intra |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_SIMPLIFIED_INTRA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 11: Reference frame col-located data surface |
| if(mbCodeIdxForTempMVP == 0xFF) |
| { |
| startBTI++; |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_COL_MB_MV, |
| &bindingTable->dwBindingTableEntries[startBTI++], |
| m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP))); |
| } |
| |
| // 12: Current frame col-located data surface -- reserved now |
| startBTI++; |
| |
| // 13: BRC Input |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_INPUT, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 14: LCU Qp |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_LCU_QP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 15: LCU BRC constant |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_BRC_DATA, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // 16 - 32 Current plus forward and backward surface 0-7 |
| //16: Source Y for VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_VME, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx]; |
| if(!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| |
| // Picture Y VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_REF_FRAME_VME, |
| &bindingTable->dwBindingTableEntries[startBTI++], |
| &m_refList[idx]->sRefBuffer, |
| curbe->DW6.FrameWidth, |
| curbe->DW6.FrameHeight)); |
| |
| } |
| else |
| { |
| // Skip the binding table index because it is not used |
| startBTI++; |
| } |
| |
| refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx]; |
| if(!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| |
| // Picture Y VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_REF_FRAME_VME, |
| &bindingTable->dwBindingTableEntries[startBTI++], |
| &m_refList[idx]->sRefBuffer, |
| curbe->DW6.FrameWidth, |
| curbe->DW6.FrameHeight)); |
| |
| } |
| else |
| { |
| // Skip the binding table index because it is not used |
| startBTI++; |
| } |
| } |
| CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_BACKWARD_7 - CODECHAL_HEVC_B_MBENC_BEGIN + 1); |
| |
| if (m_pictureCodingType != P_TYPE) |
| { |
| //33-41 VME multi-ref BTI -- Current plus [backward, nil][0..3] |
| //33: Current Y VME surface |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_RAW_VME, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| for(uint32_t surfaceIdx = 0; surfaceIdx < 4; surfaceIdx++) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[1][surfaceIdx]; |
| if(!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| |
| // Picture Y VME |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_REF_FRAME_VME, |
| &bindingTable->dwBindingTableEntries[startBTI++], |
| &m_refList[idx]->sRefBuffer, |
| curbe->DW6.FrameWidth, |
| curbe->DW6.FrameHeight)); |
| } |
| else |
| { |
| // Skip the binding table index because it is not used |
| startBTI++; |
| } |
| |
| // Skip the binding table index because it is not used |
| startBTI++; |
| } |
| CODECHAL_ENCODE_ASSERT(startBTI == CODECHAL_HEVC_B_MBENC_VME_MUL_NOUSE_3 - CODECHAL_HEVC_B_MBENC_BEGIN + 1); |
| } |
| |
| // B 42 or P 33: Concurrent thread |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| (SURFACE_ID)(SURFACE_CONCURRENT_THREAD + m_concurrentThreadIndex), |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD) |
| { |
| m_concurrentThreadIndex = 0; |
| } |
| |
| // B 43 or P 34: MV index buffer |
| m_surfaceParams[SURFACE_MB_MV_INDEX].bIsWritable = |
| m_surfaceParams[SURFACE_MB_MV_INDEX].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_MB_MV_INDEX, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // B 44: or P 35: MVP index buffer |
| m_surfaceParams[SURFACE_MVP_INDEX].bIsWritable = |
| m_surfaceParams[SURFACE_MVP_INDEX].bRenderTarget = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_MVP_INDEX, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| |
| // skip three BTI for haar distortion surface, statstics data dump surface |
| // and frame level statstics data surface because they are not used |
| startBTI += 3; |
| |
| // 48: FEI external MVPredictor surface |
| if (m_feiPicParams->MVPredictorInput) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_FEI_EXTERNAL_MVP, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| } |
| else |
| { |
| startBTI++; |
| } |
| |
| if (m_feiPicParams->bPerCTBInput) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetSurfacesState( |
| kernelState, |
| &cmdBuffer, |
| SURFACE_FEI_PER_CTB_CTRL, |
| &bindingTable->dwBindingTableEntries[startBTI++])); |
| } |
| else |
| { |
| startBTI ++; |
| } |
| startBTI += 1; |
| |
| if (!m_hwWalker) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("Currently HW walker shall not be disabled for CM based down scaling kernel."); |
| return eStatus; |
| } |
| |
| if(m_numMbBKernelSplit == 0) |
| { |
| // always use customized media walker |
| MHW_WALKER_PARAMS walkerParams; |
| MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker)); |
| walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| } |
| else |
| { |
| int32_t localOuterLoopExecCount = m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount; |
| int32_t localInitialStartPointY = m_walkingPatternParam.MediaWalker.LocalStart.y; |
| int32_t phase = MOS_MIN(m_numMbBKernelSplit, MAX_NUM_KERNEL_SPLIT); |
| int32_t totalExecCount = localOuterLoopExecCount + 1; |
| int32_t deltaExecCount = (((totalExecCount+phase - 1) / phase) + 1) & 0xfffe; |
| int32_t remainExecCount = totalExecCount; |
| |
| int32_t deltaY = 0; |
| if (m_enable26WalkingPattern) |
| { |
| deltaY = deltaExecCount / 2; |
| } |
| else |
| { |
| deltaY = deltaExecCount * 2; |
| } |
| |
| int32_t startPointY[MAX_NUM_KERNEL_SPLIT] = { 0 }; |
| int32_t currentExecCount[MAX_NUM_KERNEL_SPLIT] = { -1 }; |
| currentExecCount[0] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) : (remainExecCount-1); |
| startPointY[0] = localInitialStartPointY; |
| |
| for (auto i = 1; i < phase; i++) |
| { |
| remainExecCount -= deltaExecCount; |
| if (remainExecCount < 1) |
| { |
| remainExecCount = 1; |
| } |
| |
| currentExecCount[i] = (remainExecCount > deltaExecCount)?(deltaExecCount-1) : (remainExecCount-1); |
| startPointY[i] = startPointY[i-1] + deltaY; |
| } |
| |
| for(auto i = 0; i < phase; i++) |
| { |
| if(currentExecCount[i] < 0) |
| { |
| break; |
| } |
| |
| // Program render engine pipe commands |
| SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams(); |
| sendKernelCmdsParams.EncFunctionType = encFunctionType; |
| sendKernelCmdsParams.pKernelState = kernelState; |
| sendKernelCmdsParams.bEnableCustomScoreBoard= true; |
| sendKernelCmdsParams.pCustomScoreBoard = &m_walkingPatternParam.ScoreBoard; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams)); |
| |
| // Change walker execution count and local start Y for different phases |
| m_walkingPatternParam.MediaWalker.dwLocalLoopExecCount = currentExecCount[i]; |
| m_walkingPatternParam.MediaWalker.LocalStart.y = startPointY[i]; |
| |
| // always use customized media walker |
| MHW_WALKER_PARAMS walkerParams; |
| MOS_SecureMemcpy(&walkerParams, sizeof(walkerParams), &m_walkingPatternParam.MediaWalker, sizeof(m_walkingPatternParam.MediaWalker)); |
| walkerParams.ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd( |
| &cmdBuffer, |
| &walkerParams)); |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EndKernelCall( |
| encFunctionType, |
| kernelState, |
| &cmdBuffer)); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_mvIndex.sResource, |
| CodechalDbgAttr::attrOutput, |
| "MbData", |
| m_mvpIndex.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_mvpIndex.sResource, |
| CodechalDbgAttr::attrOutput, |
| "MvData", |
| m_mvpIndex.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| ) |
| |
| m_lastTaskInPhase = true; |
| eStatus = Encode8x8BPakKernel(curbe); |
| |
| return eStatus; |
| } |
| |
| #else |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode2xScalingKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL); |
| |
| //Setup CURBE |
| MEDIA_OBJECT_DOWNSCALING_2X_STATIC_DATA_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.PicWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.PicHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| DownScalingKernelParams scalingParams; |
| MOS_ZeroMemory(&scalingParams, sizeof(scalingParams)); |
| |
| scalingParams.m_cmSurfDS_TopIn = &m_rawSurfaceToEnc->OsResource; |
| scalingParams.m_cmSurfDS_TopOut = &m_scaled2xSurface.OsResource; |
| scalingParams.m_cmSurfTopVProc = nullptr; |
| |
| if (m_cmKernelMap.count("2xScaling") == 0) |
| { |
| m_cmKernelMap["2xScaling"] = new CMRTKernelDownScalingUMD(); |
| m_cmKernelMap["2xScaling"]->Init((void *)m_osInterface->pOsContext); |
| } |
| |
| m_cmKernelMap["2xScaling"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["2xScaling"]->AllocateSurfaces(&scalingParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["2xScaling"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32x32PuModeDecisionKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_PU_MD); |
| |
| //Setup CURBE |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| |
| CalcLambda(CODECHAL_ENCODE_HEVC_I_SLICE, INTRA_TRANSFORM_HAAR); |
| int32_t sliceQp = CalSliceQp(); |
| |
| double lambdaScalingFactor = 1.0; |
| double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp]; |
| double squaredQpLambda = qpLambda * qpLambda; |
| m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10)); |
| |
| CODECHAL_FEI_HEVC_I_32x32_PU_MODE_DECISION_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 /*64x64*/ : 1 /*32x32*/; |
| curbe->DW1.PuType = 0; // 32x32 PU |
| curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW1.SliceQp = sliceQp; |
| curbe->DW1.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled; |
| curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| |
| curbe->DW2.Lambda = m_fixedPointLambda; |
| |
| curbe->DW3.ModeCost32x32 = 0; |
| |
| curbe->DW4.EarlyExit = (uint32_t)-1; |
| if (curbe->DW1.EnableStatsDataDump) |
| { |
| double lambdaMd; |
| float hadBias = 2.0f; |
| |
| lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp]; |
| lambdaMd = lambdaMd * hadBias; |
| curbe->DW5.NewLambdaForHaarTransform = (uint32_t)(lambdaMd*(1<<10)); |
| } |
| |
| IFrameKernelParams I32x32Params; |
| MOS_ZeroMemory(&I32x32Params, sizeof(I32x32Params)); |
| |
| I32x32Params.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource; |
| I32x32Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource; |
| I32x32Params.m_cmSurfCurrY2 = &m_scaled2xSurface.OsResource; |
| I32x32Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| I32x32Params.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel); |
| I32x32Params.m_cmLCUQPSurf = &m_lcuQP.OsResource; |
| I32x32Params.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource; |
| |
| if (m_cmKernelMap.count("I_32X32") == 0) |
| { |
| m_cmKernelMap["I_32X32"] = new CMRTKernelI32x32UMD(); |
| m_cmKernelMap["I_32X32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr); |
| } |
| |
| m_cmKernelMap["I_32X32"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["I_32X32"]->AllocateSurfaces(&I32x32Params); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["I_32X32"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16SadPuComputationKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_SAD); |
| |
| // Setup CURBE |
| CODECHAL_ENC_HEVC_I_16x16_SAD_CURBE_G9 cmd, *curbe = &cmd; |
| |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| |
| curbe->DW2.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW2.SimFlagForInter = false; |
| if (m_hevcPicParams->CodingType != I_TYPE) |
| { |
| curbe->DW2.FASTSurveillanceFlag = m_hevcSeqParams->bVideoSurveillance; |
| } |
| |
| IFrameKernelParams I16x16SadParams; |
| MOS_ZeroMemory(&I16x16SadParams, sizeof(I16x16SadParams)); |
| |
| I16x16SadParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource; |
| I16x16SadParams.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource; |
| I16x16SadParams.m_cmSurfSAD16x16 = &m_sad16x16Pu.sResource; |
| I16x16SadParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| I16x16SadParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource; |
| |
| //in case I_32x32 isn't initialized when using FastIntraMode for per-frame control (I: enable; P/B: disable) |
| if (m_cmKernelMap.count("I_32X32") == 0) |
| { |
| m_cmKernelMap["I_32X32"] = new CMRTKernelI32x32UMD(); |
| m_cmKernelMap["I_32X32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr); |
| } |
| |
| if (m_cmKernelMap.count("I_16X16_SAD") == 0) |
| { |
| m_cmKernelMap["I_16X16_SAD"] = new CMRTKernelI16x16SadUMD(); |
| m_cmKernelMap["I_16X16_SAD"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram); |
| } |
| |
| m_cmKernelMap["I_16X16_SAD"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_SAD; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["I_16X16_SAD"]->AllocateSurfaces(&I16x16SadParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["I_16X16_SAD"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode16x16PuModeDecisionKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_16X16_PU_MD); |
| |
| // Setup CURBE |
| int32_t sliceQp = CalSliceQp(); |
| uint8_t sliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| |
| double lambdaScaleFactor = 0.46 + sliceQp - 22; |
| if (lambdaScaleFactor < 0) |
| { |
| lambdaScaleFactor = 0.46; |
| } |
| |
| if (lambdaScaleFactor > 15) |
| { |
| lambdaScaleFactor = 15; |
| } |
| |
| double squredLambda = lambdaScaleFactor * pow(2.0, ((double)sliceQp-12.0)/6); |
| m_fixedPointLambdaForLuma = (uint32_t)(squredLambda * (1<<10)); |
| |
| double lambdaScalingFactor = 1.0; |
| double qpLambda = m_qpLambdaMd[sliceType][sliceQp]; |
| double squaredQpLambda = qpLambda * qpLambda; |
| m_fixedPointLambdaForChroma = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10)); |
| |
| LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_HAAR); |
| |
| CODECHAL_FEI_HEVC_I_16x16_PU_MODEDECISION_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.Log2MaxCUSize = log2MaxCUSize; |
| curbe->DW1.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW1.SliceQp = sliceQp; |
| |
| curbe->DW2.FixedPoint_Lambda_PredMode = m_fixedPointLambdaForChroma; |
| |
| curbe->DW3.LambdaScalingFactor = 1; |
| curbe->DW3.SliceType = sliceType; |
| curbe->DW3.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| curbe->DW3.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW3.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW3.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW3.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW3.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh; |
| //Given only Column Rolling I is supported, if in future, Row Rolling I support to be added, then, need to make change here as per Kernel |
| curbe->DW3.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW3.HalfUpdateMixedLCU = 0; |
| curbe->DW3.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| |
| curbe->DW4.PenaltyForIntra8x8NonDCPredMode = 0; |
| curbe->DW4.IntraComputeType = 1; |
| curbe->DW4.AVCIntra8x8Mask = 0; |
| curbe->DW4.IntraSadAdjust = 2; |
| |
| double lambdaMd = sqrt(0.57*pow(2.0, ((double)sliceQp-12.0)/3)); |
| squredLambda = lambdaMd * lambdaMd; |
| uint32_t newLambda = (uint32_t)(squredLambda*(1<<10)); |
| curbe->DW5.FixedPoint_Lambda_CU_Mode_for_Cost_Calculation = newLambda; |
| |
| curbe->DW6.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| |
| curbe->DW7.ModeCostIntraNonPred = m_modeCost[0]; |
| curbe->DW7.ModeCostIntra16x16 = m_modeCost[1]; |
| curbe->DW7.ModeCostIntra8x8 = m_modeCost[2]; |
| curbe->DW7.ModeCostIntra4x4 = m_modeCost[3]; |
| |
| curbe->DW8.FixedPoint_Lambda_CU_Mode_for_Luma = m_fixedPointLambdaForLuma; |
| |
| if (m_hevcPicParams->bEnableRollingIntraRefresh) |
| { |
| curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| } |
| |
| curbe->DW10.SimplifiedFlagForInter = 0; |
| if (m_encodeParams.bReportStatisticsEnabled) |
| { |
| curbe->DW10.HaarTransformMode = true; |
| } |
| else |
| { |
| curbe->DW10.HaarTransformMode = (m_hevcPicParams->CodingType == I_TYPE) ? false : true; |
| } |
| |
| IFrameKernelParams I16x16ModeParams; |
| MOS_ZeroMemory(&I16x16ModeParams, sizeof(I16x16ModeParams)); |
| |
| I16x16ModeParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource; |
| I16x16ModeParams.m_cmSurfSAD16x16 = &m_sad16x16Pu.sResource; |
| I16x16ModeParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface; |
| I16x16ModeParams.m_bufSize = m_mbCodeSize - m_mvOffset; |
| I16x16ModeParams.m_bufOffset = m_mvOffset; |
| I16x16ModeParams.m_cmSurfPer32x32PUDataOut = &m_32x32PuOutputData.sResource; |
| I16x16ModeParams.m_cmSurfVMEMode = &m_vme8x8Mode.sResource; |
| I16x16ModeParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| I16x16ModeParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource; |
| I16x16ModeParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel); |
| I16x16ModeParams.m_cmLCUQPSurf = &m_lcuQP.OsResource; |
| I16x16ModeParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource; |
| |
| if (m_cmKernelMap.count("I_16X16_MODE") == 0) |
| { |
| m_cmKernelMap["I_16X16_MODE"] = new CMRTKernelI16x16ModeUMD(); |
| m_cmKernelMap["I_16X16_MODE"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram); |
| } |
| |
| m_cmKernelMap["I_16X16_MODE"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["I_16X16_MODE"]->AllocateSurfaces(&I16x16ModeParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["I_16X16_MODE"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_PU); |
| |
| // Setup CURBE |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| CODECHAL_FEI_HEVC_I_8x8_PU_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW1.PuType = 2; // 8x8 |
| curbe->DW1.DcFilterFlag = true; |
| curbe->DW1.AngleRefineFlag = true; |
| curbe->DW1.LCUType = (log2MaxCUSize==6)? 0 : 1; |
| curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW1.QPValue = CalSliceQp(); |
| if (m_hevcPicParams->bEnableRollingIntraRefresh) |
| { |
| curbe->DW1.EnableRollingIntra = true; |
| curbe->DW1.IntraRefreshEn = true; |
| curbe->DW1.HalfUpdateMixedLCU = 0; |
| |
| curbe->DW5.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW5.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW5.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| int32_t qp = CalSliceQp(); |
| curbe->DW1.QPValue = (uint32_t)qp; |
| } |
| |
| curbe->DW2.LumaLambda = m_fixedPointLambdaForLuma; |
| |
| curbe->DW3.ChromaLambda = m_fixedPointLambdaForChroma; |
| |
| if (m_encodeParams.bReportStatisticsEnabled) |
| { |
| curbe->DW4.HaarTransformFlag = true; |
| } |
| else |
| { |
| curbe->DW4.HaarTransformFlag = (m_hevcPicParams->CodingType == I_TYPE) ? false : true; |
| } |
| curbe->DW4.SimplifiedFlagForInter = false; |
| |
| IFrameKernelParams I8x8Params; |
| MOS_ZeroMemory(&I8x8Params, sizeof(I8x8Params)); |
| |
| I8x8Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource; |
| I8x8Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| I8x8Params.m_cmSurfVMEMode = &m_vme8x8Mode.sResource; |
| I8x8Params.m_cmSurfMode = &m_intraMode.sResource; |
| I8x8Params.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel); |
| I8x8Params.m_cmSurfSIF = &m_simplestIntraSurface.OsResource; |
| I8x8Params.m_cmLCUQPSurf = &m_lcuQP.OsResource; |
| I8x8Params.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource; |
| |
| if (m_cmKernelMap.count("I_8X8") == 0) |
| { |
| m_cmKernelMap["I_8X8"] = new CMRTKernelI8x8UMD(); |
| m_cmKernelMap["I_8X8"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram); |
| } |
| |
| m_cmKernelMap["I_8X8"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["I_8X8"]->AllocateSurfaces(&I8x8Params); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["I_8X8"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PUFMODEKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_8X8_FMODE); |
| |
| // Setup CURBE |
| int32_t qp = CalSliceQp(); |
| uint32_t sliceQp = (uint32_t)qp; |
| uint32_t log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| |
| CODECHAL_FEI_HEVC_I_8x8_PU_FMODE_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.SliceType = PicCodingTypeToSliceType(m_hevcPicParams->CodingType); |
| curbe->DW1.PuType = 2; |
| curbe->DW1.PakReordingFlag = (m_hevcPicParams->CodingType == I_TYPE) ? true : false; |
| curbe->DW1.LCUType = (log2MaxCUSize == 6)? 0 : 1; |
| curbe->DW1.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcSeqParams->TargetUsage == 0x04) ? ((m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1) : 0; |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW1.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW1.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| curbe->DW1.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW1.IntraRefreshEn = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW1.HalfUpdateMixedLCU = 0; |
| curbe->DW1.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW2.LambdaForLuma = m_fixedPointLambdaForLuma; |
| if (m_hevcPicParams->CodingType != I_TYPE || |
| m_encodeParams.bReportStatisticsEnabled) |
| { |
| float hadBias = 2.0f; |
| |
| double lambdaMd = m_qpLambdaMd[curbe->DW1.SliceType][sliceQp]; |
| lambdaMd = lambdaMd * hadBias; |
| curbe->DW3.LambdaForDistCalculation = (uint32_t)(lambdaMd*(1<<10)); |
| } |
| curbe->DW4.ModeCostFor8x8PU_TU8 = 0; |
| curbe->DW5.ModeCostFor8x8PU_TU4 = 0; |
| curbe->DW6.SATD16x16PuThreshold = MOS_MAX(200 * ((int32_t)sliceQp - 12), 0); |
| curbe->DW6.BiasFactorToward8x8 = (m_hevcPicParams->bScreenContent) ? 1024 : 1126 + 102; |
| curbe->DW7.Qp = sliceQp; |
| curbe->DW7.QpForInter = 0; |
| curbe->DW8.SimplifiedFlagForInter = false; |
| curbe->DW8.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled; |
| // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+ |
| curbe->DW8.KBLControlFlag = UsePlatformControlFlag(); |
| curbe->DW9.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW9.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW9.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| IFrameKernelParams I8x8ModeParams; |
| MOS_ZeroMemory(&I8x8ModeParams, sizeof(I8x8ModeParams)); |
| |
| I8x8ModeParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface; |
| I8x8ModeParams.m_bufSize = m_mbCodeSize - m_mvOffset; |
| I8x8ModeParams.m_bufOffset = m_mvOffset; |
| I8x8ModeParams.m_cmSurfVMEMode = &m_vme8x8Mode.sResource; |
| I8x8ModeParams.m_cmSurfMode = &m_intraMode.sResource; |
| I8x8ModeParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| I8x8ModeParams.m_cmSurfIntraDist = &m_intraDist.sResource; |
| I8x8ModeParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource; |
| I8x8ModeParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel); |
| I8x8ModeParams.m_cmLCUQPSurf = &m_lcuQP.OsResource; |
| I8x8ModeParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource; |
| |
| if (m_cmKernelMap.count("I_8X8_MODE") == 0) |
| { |
| m_cmKernelMap["I_8X8_MODE"] = new CMRTKernelI8x8ModeUMD(); |
| m_cmKernelMap["I_8X8_MODE"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["I_32X32"]->m_cmProgram); |
| } |
| |
| m_cmKernelMap["I_8X8_MODE"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_8x8_PU_FMODE; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["I_8X8_MODE"]->AllocateSurfaces(&I8x8ModeParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["I_8X8_MODE"]->CreateAndDispatchKernel(m_cmEvent, false, ((!m_singleTaskPhaseSupported)|| m_lastTaskInPhase)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode32X32BIntraCheckKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_32X32_B_IC); |
| |
| // Setup CURBE |
| if (m_pictureCodingType == P_TYPE) |
| { |
| CalcLambda(CODECHAL_ENCODE_HEVC_P_SLICE, INTRA_TRANSFORM_HAAR); |
| } |
| else |
| { |
| CalcLambda(CODECHAL_ENCODE_HEVC_B_SLICE, INTRA_TRANSFORM_HAAR); |
| } |
| int32_t sliceQp = CalSliceQp(); |
| |
| double lambdaScalingFactor = 1.0; |
| double qpLambda = m_qpLambdaMd[CODECHAL_ENCODE_HEVC_I_SLICE][sliceQp]; |
| double squaredQpLambda = qpLambda * qpLambda; |
| m_fixedPointLambda = (uint32_t)(lambdaScalingFactor * squaredQpLambda * (1<<10)); |
| |
| CODECHAL_FEI_HEVC_B_32x32_PU_INTRA_CHECK_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.EnableDebugDump = false; |
| curbe->DW1.EnableIntraEarlyExit = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : 1; |
| curbe->DW1.Flags = 0; |
| curbe->DW1.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW1.SliceType = m_hevcSliceParams->slice_type; |
| curbe->DW1.HMEEnable = 0; |
| curbe->DW1.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| |
| curbe->DW2.QpMultiplier = 100; |
| curbe->DW2.QpValue = 0; // MBZ |
| |
| PBFrameKernelParams PB32x32Params; |
| MOS_ZeroMemory(&PB32x32Params, sizeof(PB32x32Params)); |
| |
| PB32x32Params.m_cmSurfPer32x32ICOut = &m_32x32PuOutputData.sResource; |
| PB32x32Params.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource; |
| PB32x32Params.m_cmSurfCurrY2 = &m_scaled2xSurface.OsResource; |
| PB32x32Params.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| PB32x32Params.m_cmSurfSIF = &m_simplestIntraSurface.OsResource; |
| PB32x32Params.m_cmLCUQPSurf = &m_lcuQP.OsResource; |
| |
| if (m_cmKernelMap.count("PB_32x32") == 0) |
| { |
| m_cmKernelMap["PB_32x32"] = new CMRTKernelPB32x32UMD(); |
| m_cmKernelMap["PB_32x32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr); |
| } |
| |
| m_cmKernelMap["PB_32x32"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_32x32_B_INTRA_CHECK; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["PB_32x32"]->AllocateSurfaces(&PB32x32Params); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["PB_32x32"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8BPakKernel( |
| PCODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 pEncBCurbe) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| CODECHAL_ENCODE_CHK_NULL_RETURN(pEncBCurbe); |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL); |
| |
| //Setup CURBE |
| CODECHAL_FEI_HEVC_B_PAK_CURBE_G9 cmd, *curbe = &cmd; |
| MOS_ZeroMemory(curbe, sizeof(*curbe)); |
| curbe->DW0.FrameWidth = MOS_ALIGN_CEIL(m_frameWidth, CODECHAL_MACROBLOCK_WIDTH); |
| curbe->DW0.FrameHeight = MOS_ALIGN_CEIL(m_frameHeight, CODECHAL_MACROBLOCK_HEIGHT); |
| |
| curbe->DW1.MaxVmvR = pEncBCurbe->DW44.MaxVmvR; |
| curbe->DW1.Qp = pEncBCurbe->DW13.QpPrimeY; |
| curbe->DW2.BrcEnable = pEncBCurbe->DW36.BRCEnable; |
| curbe->DW2.LcuBrcEnable = pEncBCurbe->DW36.LCUBRCEnable; |
| curbe->DW2.ScreenContent = pEncBCurbe->DW47.ScreenContentFlag; |
| curbe->DW2.SimplestIntraEnable = pEncBCurbe->DW47.SkipIntraKrnFlag; |
| curbe->DW2.SliceType = pEncBCurbe->DW4.SliceType; |
| curbe->DW2.EnableWA = 0; |
| curbe->DW2.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW2.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| // KBLControlFlag determines the PAK OBJ format as it varies from Gen9 to Gen9.5+ |
| curbe->DW2.KBLControlFlag = UsePlatformControlFlag(); |
| curbe->DW2.EnableRollingIntra = m_hevcPicParams->bEnableRollingIntraRefresh; |
| curbe->DW2.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW3.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW3.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW3.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| PBFrameKernelParams PB8x8PakParams; |
| MOS_ZeroMemory(&PB8x8PakParams, sizeof(PB8x8PakParams)); |
| |
| PB8x8PakParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface; |
| PB8x8PakParams.m_bufSize = m_mbCodeSize - m_mvOffset; |
| PB8x8PakParams.m_bufOffset = m_mvOffset; |
| PB8x8PakParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| PB8x8PakParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel); |
| PB8x8PakParams.m_cmLCUQPSurf = &m_lcuQP.OsResource; |
| PB8x8PakParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource; |
| PB8x8PakParams.m_cmSurfMVIndex = &m_mvIndex.sResource; |
| PB8x8PakParams.m_cmSurfMVPred = &m_mvpIndex.sResource; |
| |
| if (m_cmKernelMap.count("PB_8x8_PAK") == 0) |
| { |
| m_cmKernelMap["PB_8x8_PAK"] = new CMRTKernelPB8x8PakUMD(); |
| m_cmKernelMap["PB_8x8_PAK"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram); |
| } |
| |
| m_cmKernelMap["PB_8x8_PAK"]->SetupCurbe(curbe); |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_PAK; |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| |
| m_cmKernelMap["PB_8x8_PAK"]->AllocateSurfaces(&PB8x8PakParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["PB_8x8_PAK"]->CreateAndDispatchKernel(m_cmEvent, false, ((!m_singleTaskPhaseSupported)|| m_lastTaskInPhase)); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Encode8x8PBMbEncKernel() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| PerfTagSetting perfTag; |
| CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL); |
| |
| int32_t sliceQp = CalSliceQp(); |
| uint8_t sliceType = PicCodingTypeToSliceType(m_pictureCodingType); |
| |
| if (m_feiPicParams->FastIntraMode) |
| { |
| // When TU=7, lambda is not computed in the 32x32 MD stage for it is skipped. |
| CalcLambda(sliceType, INTRA_TRANSFORM_HAAR); |
| } |
| LoadCosts(sliceType, (uint8_t)sliceQp, INTRA_TRANSFORM_REGULAR); |
| |
| uint8_t mbCodeIdxForTempMVP = 0xFF; |
| if(m_pictureCodingType != I_TYPE) |
| { |
| if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC) |
| { |
| uint8_t FrameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx; |
| |
| mbCodeIdxForTempMVP = m_refList[FrameIdx]->ucScalingIdx; |
| } |
| |
| if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag) |
| { |
| // Temporal reference MV index is invalid and so disable the temporal MVP |
| CODECHAL_ENCODE_ASSERT(false); |
| m_hevcSliceParams->slice_temporal_mvp_enable_flag = false; |
| } |
| } |
| else |
| { |
| mbCodeIdxForTempMVP = 0; |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion()); |
| |
| //Setup CURBE |
| uint8_t forwardTransformThd[7] = { 0 }; |
| CalcForwardCoeffThd(forwardTransformThd, sliceQp); |
| |
| uint32_t curbeSize = 0; |
| void *defaultCurbe = (void *)GetDefaultCurbeEncBKernel(curbeSize); |
| CODECHAL_ENCODE_ASSERT(defaultCurbe); |
| |
| CODECHAL_FEI_HEVC_B_MB_ENC_CURBE_G9 cmd, *curbe = &cmd; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(curbe, sizeof(cmd), defaultCurbe, curbeSize)); |
| |
| bool transform_8x8_mode_flag = true; |
| uint32_t SearchPath = (m_feiPicParams->SearchWindow == 5) ? 2 : 1; // 2 means full search, 1 means diamand search |
| uint32_t LenSP = m_feiPicParams->LenSP; |
| uint32_t RefWidth = (m_feiPicParams->RefWidth < 20) ? 20 : m_feiPicParams->RefWidth; |
| uint32_t RefHeight = (m_feiPicParams->RefHeight < 20) ? 20 : m_feiPicParams->RefHeight; |
| |
| switch (m_feiPicParams->SearchWindow) |
| { |
| case 0: |
| // not use predefined search window |
| if ((m_feiPicParams->SearchPath != 0) && (m_feiPicParams->SearchPath != 1) && (m_feiPicParams->SearchPath != 2)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input SearchPath for SearchWindow=0 case!!!."); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| SearchPath = m_feiPicParams->SearchPath; |
| if(((RefWidth * RefHeight) > 2048) || (RefWidth > 64) || (RefHeight > 64)) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC input RefWidth/RefHeight size for SearchWindow=0 case!!!."); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| break; |
| case 1: |
| // Tiny SUs 24x24 window |
| RefWidth = 24; |
| RefHeight = 24; |
| LenSP = 4; |
| break; |
| case 2: |
| // Small SUs 28x28 window |
| RefWidth = 28; |
| RefHeight = 28; |
| LenSP = 9; |
| break; |
| case 3: |
| // Diamond SUs 48x40 window |
| RefWidth = 48; |
| RefHeight = 40; |
| LenSP = 16; |
| break; |
| case 4: |
| // Large Diamond SUs 48x40 window |
| RefWidth = 48; |
| RefHeight = 40; |
| LenSP = 32; |
| break; |
| case 5: |
| // Exhaustive SUs 48x40 window |
| RefWidth = 48; |
| RefHeight = 40; |
| LenSP = 48; |
| if (m_hevcSeqParams->TargetUsage != 7) |
| { |
| if (m_pictureCodingType == B_TYPE) |
| { |
| LenSP = 48; |
| } else { |
| LenSP = 57; |
| } |
| } else { |
| LenSP = 25; |
| } |
| break; |
| default: |
| CODECHAL_ENCODE_ASSERTMESSAGE("Invalid picture FEI MB ENC SearchWindow value for HEVC FEI on SKL!!!."); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if((m_pictureCodingType == B_TYPE) && (curbe->DW3.BMEDisableFBR == 0)) |
| { |
| if(RefWidth > 32) |
| { |
| RefWidth = 32; |
| } |
| if(RefHeight > 32) |
| { |
| RefHeight = 32; |
| } |
| } |
| |
| curbe->DW0.AdaptiveEn = m_feiPicParams->AdaptiveSearch; |
| curbe->DW0.T8x8FlagForInterEn = transform_8x8_mode_flag; |
| curbe->DW2.PicWidth = m_picWidthInMb; |
| curbe->DW2.LenSP = LenSP; |
| curbe->DW3.SrcAccess = curbe->DW3.RefAccess = 0; |
| if (m_feiPicParams->FastIntraMode) |
| { |
| curbe->DW3.FTEnable = (m_ftqBasedSkip[0x07] >> 1) & 0x01; |
| } |
| else |
| { |
| curbe->DW3.FTEnable = (m_ftqBasedSkip[0x04] >> 1) & 0x01; |
| } |
| curbe->DW3.SubPelMode = m_feiPicParams->SubPelMode; |
| |
| curbe->DW4.PicHeightMinus1 = m_picHeightInMb - 1; |
| curbe->DW4.EnableStatsDataDump = m_encodeParams.bReportStatisticsEnabled; |
| curbe->DW4.HMEEnable = 0; |
| curbe->DW4.SliceType = sliceType; |
| curbe->DW4.EnableQualityImprovement = m_encodeParams.bQualityImprovementEnable; |
| curbe->DW4.UseActualRefQPValue = false; |
| |
| curbe->DW5.RefWidth = RefWidth; |
| curbe->DW5.RefHeight = RefHeight; |
| |
| curbe->DW7.IntraPartMask = 0x3; |
| |
| curbe->DW6.FrameWidth = m_picWidthInMb * CODECHAL_MACROBLOCK_WIDTH; |
| curbe->DW6.FrameHeight = m_picHeightInMb * CODECHAL_MACROBLOCK_HEIGHT; |
| |
| curbe->DW8.Mode0Cost = m_modeCost[0]; |
| curbe->DW8.Mode1Cost = m_modeCost[1]; |
| curbe->DW8.Mode2Cost = m_modeCost[2]; |
| curbe->DW8.Mode3Cost = m_modeCost[3]; |
| |
| curbe->DW9.Mode4Cost = m_modeCost[4]; |
| curbe->DW9.Mode5Cost = m_modeCost[5]; |
| curbe->DW9.Mode6Cost = m_modeCost[6]; |
| curbe->DW9.Mode7Cost = m_modeCost[7]; |
| |
| curbe->DW10.Mode8Cost= m_modeCost[8]; |
| curbe->DW10.Mode9Cost= m_modeCost[9]; |
| curbe->DW10.RefIDCost = m_modeCost[10]; |
| curbe->DW10.ChromaIntraModeCost = m_modeCost[11]; |
| |
| curbe->DW11.MV0Cost = m_mvCost[0]; |
| curbe->DW11.MV1Cost = m_mvCost[1]; |
| curbe->DW11.MV2Cost = m_mvCost[2]; |
| curbe->DW11.MV3Cost = m_mvCost[3]; |
| |
| curbe->DW12.MV4Cost = m_mvCost[4]; |
| curbe->DW12.MV5Cost = m_mvCost[5]; |
| curbe->DW12.MV6Cost = m_mvCost[6]; |
| curbe->DW12.MV7Cost = m_mvCost[7]; |
| |
| curbe->DW13.QpPrimeY = sliceQp; |
| uint8_t bitDepthChromaMinus8 = 0; // support 4:2:0 only |
| int32_t qpBdOffsetC = 6 * bitDepthChromaMinus8; |
| int32_t qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cb_qp_offset)); |
| int32_t QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30]; |
| curbe->DW13.QpPrimeCb= QPc + qpBdOffsetC; |
| qPi = (int32_t)CodecHal_Clip3((-qpBdOffsetC), 51, (sliceQp + m_hevcPicParams->pps_cr_qp_offset)); |
| QPc = (qPi < 30) ? qPi : QPcTable[qPi - 30]; |
| curbe->DW13.QpPrimeCr= QPc; |
| |
| curbe->DW14.SICFwdTransCoeffThreshold_0 = forwardTransformThd[0]; |
| curbe->DW14.SICFwdTransCoeffThreshold_1 = forwardTransformThd[1]; |
| curbe->DW14.SICFwdTransCoeffThreshold_2 = forwardTransformThd[2]; |
| |
| curbe->DW15.SICFwdTransCoeffThreshold_3 = forwardTransformThd[3]; |
| curbe->DW15.SICFwdTransCoeffThreshold_4 = forwardTransformThd[4]; |
| curbe->DW15.SICFwdTransCoeffThreshold_5 = forwardTransformThd[5]; |
| curbe->DW15.SICFwdTransCoeffThreshold_6 = forwardTransformThd[6]; |
| |
| if (SearchPath == 1) |
| { |
| // diamond search |
| if (m_pictureCodingType == P_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7PCurbeInit[16]), 14 * sizeof(uint32_t))); |
| } |
| else if (m_pictureCodingType == B_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(MOS_SecureMemcpy(&(curbe->DW16), 14 * sizeof(uint32_t), &(m_encBTu7BCurbeInit[16]), 14 * sizeof(uint32_t))); |
| } |
| } |
| else if((SearchPath != 0) && (SearchPath != 2)) |
| { |
| // default 0 and 2 are full sparil search |
| CODECHAL_ENCODE_ASSERT(false); |
| } |
| |
| curbe->DW32.SkipVal = m_skipValB[curbe->DW3.BlockBasedSkipEnable][transform_8x8_mode_flag][sliceQp]; |
| |
| if(m_pictureCodingType == I_TYPE) |
| { |
| *(float*)&(curbe->DW34.LambdaME) = 0.0; |
| } |
| else if (m_pictureCodingType == P_TYPE) |
| { |
| *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_P_SLICE][sliceQp]; |
| } |
| else |
| { |
| *(float*)&(curbe->DW34.LambdaME) = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp]; |
| } |
| |
| curbe->DW35.ModeCostSp = m_modeCostSp; |
| curbe->DW35.SimpIntraInterThreshold = m_simplestIntraInterThreshold; |
| |
| curbe->DW36.NumRefIdxL0MinusOne = m_hevcSliceParams->num_ref_idx_l0_active_minus1; |
| curbe->DW36.NumRefIdxL1MinusOne = m_hevcSliceParams->num_ref_idx_l1_active_minus1; |
| curbe->DW36.BRCEnable = m_encodeParams.bMbQpDataEnabled || m_brcEnabled; |
| curbe->DW36.LCUBRCEnable = m_encodeParams.bMbQpDataEnabled || m_lcuBrcEnabled; |
| curbe->DW36.PowerSaving = m_powerSavingEnabled; |
| curbe->DW36.ROIEnable = (m_hevcPicParams->NumROI > 0); |
| curbe->DW36.FASTSurveillanceFlag = (m_hevcPicParams->CodingType == I_TYPE) ? 0 : m_hevcSeqParams->bVideoSurveillance; |
| |
| if(m_pictureCodingType != I_TYPE) |
| { |
| curbe->DW37.ActualQpRefID0List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_0); |
| curbe->DW37.ActualQpRefID1List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_1); |
| curbe->DW37.ActualQpRefID2List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_2); |
| curbe->DW37.ActualQpRefID3List0 = GetQPValueFromRefList(LIST_0, CODECHAL_ENCODE_REF_ID_3); |
| curbe->DW41.TextureIntraCostThreshold = 500; |
| |
| if(m_pictureCodingType == B_TYPE) { |
| curbe->DW39.ActualQpRefID0List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_0); |
| curbe->DW39.ActualQpRefID1List1 = GetQPValueFromRefList(LIST_1, CODECHAL_ENCODE_REF_ID_1); |
| float lambda_me = (float)m_qpLambdaMe[CODECHAL_ENCODE_HEVC_B_SLICE][sliceQp]; |
| if (m_encodeParams.bQualityImprovementEnable) |
| { |
| curbe->DW40.TransformThreshold0 = (uint16_t) (lambda_me * 56.25 + 0.5); |
| curbe->DW40.TransformThreshold1 = (uint16_t) (lambda_me * 21 + 0.5); |
| curbe->DW41.TransformThreshold2 = (uint16_t) (lambda_me * 9 + 0.5); |
| } |
| } |
| } |
| |
| curbe->DW42.NumMVPredictorsL0 = m_feiPicParams->NumMVPredictorsL0; |
| curbe->DW42.NumMVPredictorsL1 = m_feiPicParams->NumMVPredictorsL1; |
| curbe->DW42.PerLCUQP = m_encodeParams.bMbQpDataEnabled; |
| curbe->DW42.PerCTBInput = m_feiPicParams->bPerCTBInput; |
| curbe->DW42.CTBDistortionOutput = m_feiPicParams->bDistortionEnable; |
| curbe->DW42.MultiPredL0 = m_feiPicParams->MultiPredL0; |
| curbe->DW42.MultiPredL1 = m_feiPicParams->MultiPredL1; |
| curbe->DW42.MVPredictorBlockSize = m_feiPicParams->MVPredictorInput; |
| |
| curbe->DW44.MaxVmvR = 511 * 4; |
| curbe->DW44.MaxNumMergeCandidates = m_hevcSliceParams->MaxNumMergeCand; |
| |
| if(m_pictureCodingType != I_TYPE) |
| { |
| curbe->DW44.MaxNumRefList0 = curbe->DW36.NumRefIdxL0MinusOne + 1; |
| |
| curbe->DW45.TemporalMvpEnableFlag = m_hevcSliceParams->slice_temporal_mvp_enable_flag; |
| curbe->DW45.HMECombineLenPslice = 8; |
| if(m_pictureCodingType == B_TYPE) |
| { |
| curbe->DW44.MaxNumRefList1 = curbe->DW36.NumRefIdxL1MinusOne + 1; |
| curbe->DW45.HMECombineLenBslice = 8; |
| } |
| } |
| |
| curbe->DW45.Log2ParallelMergeLevel = m_hevcPicParams->log2_parallel_merge_level_minus2 + 2; |
| |
| curbe->DW46.Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2; |
| curbe->DW46.Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2; |
| curbe->DW46.Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3; |
| curbe->DW46.Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; |
| |
| curbe->DW47.NumRegionsInSlice = m_numRegionsInSlice; |
| curbe->DW47.TypeOfWalkingPattern = m_enable26WalkingPattern; |
| curbe->DW47.ChromaFlatnessCheckFlag= (m_feiPicParams->FastIntraMode) ? 0 : 1; |
| curbe->DW47.EnableIntraEarlyExit = (m_feiPicParams->FastIntraMode) ? 0 : 1; |
| curbe->DW47.SkipIntraKrnFlag = (m_feiPicParams->FastIntraMode) ? 1 : 0; |
| curbe->DW47.CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag; |
| curbe->DW47.IsLowDelay = m_lowDelay; |
| curbe->DW47.ScreenContentFlag = m_hevcPicParams->bScreenContent; |
| curbe->DW47.MultiSliceFlag = (m_numSlices > 1); |
| curbe->DW47.ArbitarySliceFlag = m_arbitraryNumMbsInSlice; |
| curbe->DW47.NumRegionMinus1 = m_walkingPatternParam.dwNumRegion - 1; |
| |
| if(m_pictureCodingType != I_TYPE) |
| { |
| curbe->DW48.CurrentTdL0_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][0]); |
| curbe->DW48.CurrentTdL0_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][1]); |
| curbe->DW49.CurrentTdL0_2 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][2]); |
| curbe->DW49.CurrentTdL0_3 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[0][3]); |
| if(m_pictureCodingType == B_TYPE) { |
| curbe->DW50.CurrentTdL1_0 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][0]); |
| curbe->DW50.CurrentTdL1_1 = ComputeTemporalDifference(m_hevcSliceParams->RefPicList[1][1]); |
| } |
| } |
| |
| curbe->DW52.NumofUnitInRegion = m_walkingPatternParam.dwNumUnitsInRegion; |
| curbe->DW52.MaxHeightInRegion = m_walkingPatternParam.dwMaxHeightInRegion; |
| |
| // Intra refresh is enabled. Program related CURBE fields |
| if (m_hevcPicParams->bEnableRollingIntraRefresh) |
| { |
| curbe->DW35.IntraRefreshEn = true; |
| curbe->DW35.FirstIntraRefresh = m_firstIntraRefresh; |
| curbe->DW35.HalfUpdateMixedLCU = 0; |
| curbe->DW35.EnableRollingIntra = true; |
| |
| curbe->DW38.NumFrameInGOB = m_frameNumInGob; |
| curbe->DW38.NumIntraRefreshOffFrames = m_frameNumWithoutIntraRefresh; |
| |
| curbe->DW51.IntraRefreshQPDelta = m_hevcPicParams->QpDeltaForInsertedIntra; |
| curbe->DW51.IntraRefreshMBNum = m_hevcPicParams->IntraInsertionLocation; |
| curbe->DW51.IntraRefreshUnitInMB = m_hevcPicParams->IntraInsertionSize; |
| |
| curbe->DW53.IntraRefreshRefHeight = 40; |
| curbe->DW53.IntraRefreshRefWidth = 48; |
| |
| m_firstIntraRefresh = false; |
| m_frameNumWithoutIntraRefresh = 0; |
| } |
| else if (m_pictureCodingType != I_TYPE) // don't increment num frames w/o refresh in case of TU7 I frames |
| { |
| m_frameNumWithoutIntraRefresh++; |
| } |
| |
| PBFrameKernelParams PB8x8MbEncParams; |
| MOS_ZeroMemory(&PB8x8MbEncParams, sizeof(PB8x8MbEncParams)); |
| |
| PB8x8MbEncParams.m_width = curbe->DW6.FrameWidth; |
| PB8x8MbEncParams.m_height = curbe->DW6.FrameHeight; |
| |
| for(uint32_t surfaceIdx = 0; surfaceIdx < 8; surfaceIdx++) |
| { |
| CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surfaceIdx]; |
| if (!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| PB8x8MbEncParams.m_cmSurfRef0[PB8x8MbEncParams.m_ucRefNum0] = &m_refList[idx]->sRefBuffer.OsResource; |
| PB8x8MbEncParams.m_ucRefNum0++; |
| } |
| |
| refPic = m_hevcSliceParams->RefPicList[LIST_1][surfaceIdx]; |
| if (!CodecHal_PictureIsInvalid(refPic) && |
| !CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx])) |
| { |
| uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx; |
| PB8x8MbEncParams.m_cmSurfRef1[PB8x8MbEncParams.m_ucRefNum1] = &m_refList[idx]->sRefBuffer.OsResource; |
| PB8x8MbEncParams.m_ucRefNum1++; |
| } |
| } |
| |
| PB8x8MbEncParams.m_cmSurfCurrY = &m_rawSurfaceToEnc->OsResource; |
| PB8x8MbEncParams.m_cmSurfPOCDbuf = &m_resMbCodeSurface; |
| PB8x8MbEncParams.m_bufSize = m_mbCodeSize - m_mvOffset; |
| PB8x8MbEncParams.m_bufOffset = m_mvOffset; |
| if(mbCodeIdxForTempMVP == 0xFF) |
| { |
| PB8x8MbEncParams.m_cmSurfColRefData = nullptr; |
| } |
| else |
| { |
| PB8x8MbEncParams.m_cmSurfColRefData = m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP); |
| } |
| PB8x8MbEncParams.m_cmSurfIntraDist = &m_intraDist.sResource; |
| PB8x8MbEncParams.m_cmSurfMinDist = &m_minDistortion.OsResource; |
| PB8x8MbEncParams.m_cmSurfSliceMap = &m_sliceMapSurface.OsResource; |
| PB8x8MbEncParams.m_cmSurfVMEIN = &m_vmeSavedUniSic.sResource; |
| PB8x8MbEncParams.m_cmSurfSIF = &m_simplestIntraSurface.OsResource; |
| PB8x8MbEncParams.m_cmSurfCombinedQP = (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel); |
| PB8x8MbEncParams.m_cmLCUQPSurf = &m_lcuQP.OsResource; |
| PB8x8MbEncParams.m_cmBRCConstSurf = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx].OsResource; |
| PB8x8MbEncParams.m_cmWaveFrontMap = &m_concurrentThreadSurface[m_concurrentThreadIndex].OsResource; |
| if (++m_concurrentThreadIndex >= NUM_CONCURRENT_THREAD) |
| { |
| m_concurrentThreadIndex = 0; |
| } |
| PB8x8MbEncParams.m_cmSurfMVIndex = &m_mvIndex.sResource; |
| PB8x8MbEncParams.m_cmSurfMVPred = &m_mvpIndex.sResource; |
| if (m_feiPicParams->MVPredictorInput) |
| { |
| PB8x8MbEncParams.m_cmSurfMVPredictor = &m_feiPicParams->resMVPredictor; |
| } |
| else |
| { |
| PB8x8MbEncParams.m_cmSurfMVPredictor = nullptr; |
| } |
| |
| if (m_feiPicParams->bPerCTBInput) |
| { |
| PB8x8MbEncParams.m_cmSurfPerCTBInput = &m_feiPicParams->resCTBCtrl; |
| } |
| else |
| { |
| PB8x8MbEncParams.m_cmSurfPerCTBInput = nullptr; |
| } |
| |
| //to avoid multi contexts in case per-frame control of FastIntraMode, always use 2xScaling kernel to initialize the context. |
| if (m_cmKernelMap.count("2xScaling") == 0) |
| { |
| m_cmKernelMap["2xScaling"] = new CMRTKernelDownScalingUMD(); |
| m_cmKernelMap["2xScaling"]->Init((void *)m_osInterface->pOsContext); |
| } |
| |
| //in case PB_32x32 isn't initialized when using FastIntraMode for per-frame control (I: disable; P/B: enable) |
| if (m_cmKernelMap.count("PB_32x32") == 0) |
| { |
| m_cmKernelMap["PB_32x32"] = new CMRTKernelPB32x32UMD(); |
| m_cmKernelMap["PB_32x32"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, nullptr); |
| } |
| |
| if (m_pictureCodingType == I_TYPE && m_feiPicParams->FastIntraMode) |
| { |
| if (m_cmKernelMap.count("I_8x8_MBENC") == 0) |
| { |
| m_cmKernelMap["I_8x8_MBENC"] = new CMRTKernelB8x8MbEncUMD(); |
| m_cmKernelMap["I_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram); |
| } |
| |
| m_cmKernelMap["I_8x8_MBENC"]->SetupCurbe(curbe); |
| m_cmKernelMap["I_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["I_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| } |
| else if (m_pictureCodingType == B_TYPE) |
| { |
| if (m_cmKernelMap.count("B_8x8_MBENC") == 0) |
| { |
| m_cmKernelMap["B_8x8_MBENC"] = new CMRTKernelB8x8MbEncUMD(); |
| m_cmKernelMap["B_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram); |
| } |
| |
| m_cmKernelMap["B_8x8_MBENC"]->SetupCurbe(curbe); |
| m_cmKernelMap["B_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["B_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| } |
| else if (m_pictureCodingType == P_TYPE) |
| { |
| if (m_cmKernelMap.count("P_8x8_MBENC") == 0) |
| { |
| m_cmKernelMap["P_8x8_MBENC"] = new CMRTKernelP8x8MbEncUMD(); |
| m_cmKernelMap["P_8x8_MBENC"]->Init(nullptr, m_cmKernelMap["2xScaling"]->m_cmDev, m_cmKernelMap["2xScaling"]->m_cmQueue, m_cmKernelMap["2xScaling"]->m_cmTask, m_cmKernelMap["PB_32x32"]->m_cmProgram); |
| } |
| m_cmKernelMap["P_8x8_MBENC"]->SetupCurbe(curbe); |
| m_cmKernelMap["P_8x8_MBENC"]->AllocateSurfaces(&PB8x8MbEncParams); |
| |
| //No need to wait for task finished |
| m_cmEvent = CM_NO_EVENT; |
| m_cmKernelMap["P_8x8_MBENC"]->CreateAndDispatchKernel(m_cmEvent, false, (!m_singleTaskPhaseSupported)); |
| } |
| |
| CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_B_MBENC; |
| if (m_pictureCodingType == P_TYPE) |
| { |
| //P frame curbe only use the DW0~DW75 |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe) - sizeof(uint32_t))); |
| ) |
| } |
| else |
| { |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpMDFCurbe( |
| encFunctionType, |
| (uint8_t *)curbe, sizeof(*curbe))); |
| ) |
| } |
| |
| m_lastTaskInPhase = true; |
| eStatus = Encode8x8BPakKernel(curbe); |
| return eStatus; |
| } |
| |
| #endif |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::EncodeKernelFunctions() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_feiPicParams = (CodecEncodeHevcFeiPicParams *)m_encodeParams.pFeiPicParams; |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| m_rawSurfaceToEnc, |
| CodechalDbgAttr::attrEncodeRawInputSurface, |
| "SrcSurf")); |
| ) |
| |
| if (m_pakOnlyTest) |
| { |
| // Skip all ENC kernel operations for now it is in the PAK only test mode. |
| // PAK and CU records will be passed via the app |
| return eStatus; |
| } |
| |
| if (m_brcEnabled || m_hmeEnabled) |
| { |
| eStatus = MOS_STATUS_UNKNOWN; |
| CODECHAL_ENCODE_ASSERTMESSAGE("HEVC FEI does not support BRC and HMEenabled."); |
| return eStatus; |
| } |
| |
| if(m_osInterface->bSimIsActive) |
| { |
| MOS_LOCK_PARAMS lockFlags; |
| MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); |
| lockFlags.WriteOnly = 1; |
| |
| uint8_t* data = (uint8_t* )m_osInterface->pfnLockResource(m_osInterface, &m_resMbCodeSurface, &lockFlags); |
| if (data) |
| { |
| MOS_ZeroMemory(data, m_mbCodeSize); |
| m_osInterface->pfnUnlockResource(m_osInterface, &m_resMbCodeSurface); |
| } |
| } |
| |
| // Generate slice map for kernel |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateSliceMap()); |
| |
| //Reset to use a different performance tag ID for I kernels. Each kernel has a different buffer ID |
| m_osInterface->pfnResetPerfBufferID(m_osInterface); |
| |
| m_firstTaskInPhase = true; |
| m_lastTaskInPhase = false; |
| |
| // ROI uses the BRC LCU update kernel, even in CQP. So we will call it |
| // first if in CQP. It has no other kernel execution dependencies, even |
| // that brc is not initialized is not a dependency |
| if (m_hevcPicParams->NumROI && !m_brcEnabled) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcUpdateLCUBasedKernel(nullptr)); |
| } |
| |
| // config LCU QP input |
| if (m_encodeParams.bMbQpDataEnabled) |
| { |
| // Setup Lamda/Cost table for LCU QP mode |
| auto psBrcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(psBrcConstantData)); |
| |
| if (m_encodeParams.psMbQpDataSurface) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Convert1byteTo2bytesQPperLCU(m_encodeParams.psMbQpDataSurface, &m_lcuQP)); |
| m_surfaceParams[SURFACE_LCU_QP].psSurface = &m_lcuQP; |
| } |
| } |
| |
| CODECHAL_DEBUG_TOOL( |
| if (m_feiPicParams->bPerBlockQP) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface( |
| &m_lcuQP, |
| CodechalDbgAttr::attrInput, |
| "HEVC_B_MBENC_MB_QP", |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| } |
| |
| if (m_feiPicParams->MVPredictorInput) { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_feiPicParams->resMVPredictor, |
| "HEVC_B_MBENC_ConstantData_In", |
| CodechalDbgAttr::attrInput, |
| m_feiPicParams->resMVPredictor.iSize, |
| 0, |
| CODECHAL_MEDIA_STATE_HEVC_B_MBENC)); |
| }) |
| |
| if(m_feiPicParams->FastIntraMode) |
| { |
| if (m_hevcPicParams->CodingType == I_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel()); |
| } |
| } |
| else |
| { |
| //Step 1: perform 2:1 down-scaling |
| if (m_hevcSeqParams->bit_depth_luma_minus8 == 0) // use this for 8 bit only case. |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode2xScalingKernel()); |
| } |
| |
| //Step 2: 32x32 PU Mode Decision or 32x32 PU Intra check kernel |
| if (m_hevcPicParams->CodingType == I_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32x32PuModeDecisionKernel()); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode32X32BIntraCheckKernel()); |
| } |
| |
| //Step 3: 16x16 SAD Computation |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16SadPuComputationKernel()); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_sad16x16Pu.sResource, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_16x16_PU_SAD_Out", |
| m_sad16x16Pu.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_16x16_PU_SAD)); |
| ) |
| |
| //Step 4: 16x16 PU Mode Decision |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode16x16PuModeDecisionKernel()); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_vme8x8Mode.sResource, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_16x16_PU_MD_Out", |
| m_vme8x8Mode.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_16x16_PU_MODE_DECISION)); |
| ) |
| |
| //Step 5: 8x8 PU |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUKernel()); |
| |
| //Step 6: 8x8 PU FMODE |
| m_lastTaskInPhase = true; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PUFMODEKernel()); |
| |
| CODECHAL_DEBUG_TOOL( |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface( |
| &m_scaled2xSurface, |
| CodechalDbgAttr::attrReferenceSurfaces, |
| "2xScaledSurf")) |
| |
| if (m_pictureCodingType == I_TYPE) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_32x32PuOutputData.sResource, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_32x32_PU_MD_Out", |
| m_32x32PuOutputData.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION)); |
| } |
| else |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_32x32PuOutputData.sResource, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_32x32_B_INTRA_CHECK_Out", |
| m_32x32PuOutputData.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_32x32_PU_MODE_DECISION)); |
| |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_intraMode.sResource, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_8x8_PU_MD_Out", |
| m_intraMode.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_8x8_PU)); |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer( |
| &m_intraDist.sResource, |
| CodechalDbgAttr::attrOutput, |
| "HEVC_8x8_PU_FMOD_Out", |
| m_intraDist.dwSize, |
| 0, |
| CODECHAL_MEDIA_STATE_8x8_PU_FMODE)); |
| ) |
| } |
| |
| // Sync-wait can be executed after I-kernel is submitted before there is no dependency for I to wait for PAK to be ready |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak()); |
| |
| //Step 7: B MB ENC kernel for B picture only |
| if (m_hevcPicParams->CodingType != I_TYPE) |
| { |
| m_firstTaskInPhase = true; |
| m_lastTaskInPhase = false; |
| |
| if (m_feiPicParams->MVPredictorInput) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| &m_surfaceParams[SURFACE_FEI_EXTERNAL_MVP], |
| &m_feiPicParams->resMVPredictor, |
| m_feiPicParams->resMVPredictor.iSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| } |
| |
| if ((m_hevcSeqParams->bit_depth_luma_minus8)) |
| { |
| bool formatConversionDone[NUM_FORMAT_CONV_FRAMES] = { false }; |
| formatConversionDone[0] = true; // always true since its for the input surface. |
| |
| for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++) |
| { |
| if (!m_picIdx[i].bValid || !m_currUsedRefPic[i]) |
| { |
| continue; |
| } |
| |
| uint8_t picIdx = m_picIdx[i].ucPicIdx; |
| CODECHAL_ENCODE_ASSERT(picIdx < 127); |
| |
| uint8_t frameStoreId = (uint8_t)m_refIdxMapping[i]; |
| |
| if (frameStoreId >= CODECHAL_MAX_CUR_NUM_REF_FRAME_HEVC) |
| { |
| CODECHAL_ENCODE_ASSERT(0); |
| eStatus = MOS_STATUS_INVALID_PARAMETER; |
| return eStatus; |
| } |
| |
| if (formatConversionDone[frameStoreId + 1] != true) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeDSCombinedKernel(dsDisabled, (frameStoreId + 1), picIdx)); |
| formatConversionDone[frameStoreId + 1] = true; |
| m_refList[picIdx]->sRefBuffer = m_formatConvertedSurface[frameStoreId + 1]; |
| } |
| } |
| } |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(Encode8x8PBMbEncKernel()); |
| } |
| #ifdef HEVC_FEI_ENABLE_CMRT |
| |
| for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++) |
| { |
| it->second->DestroySurfResources(); |
| } |
| |
| #endif |
| |
| // Notify PAK engine once ENC is done |
| if (!m_pakOnlyTest && !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse)) |
| { |
| MOS_SYNC_PARAMS syncParams = g_cInitSyncParams; |
| syncParams.GpuContext = m_renderContext; |
| syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams)); |
| } |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::Initialize(CodechalSetting * settings) |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| // common initilization |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings)); |
| |
| m_cscDsState->EnableMmc(); |
| |
| m_brcBuffers.dwBrcConstantSurfaceWidth = BRC_CONSTANT_SURFACE_WIDTH; |
| m_brcBuffers.dwBrcConstantSurfaceHeight = BRC_CONSTANT_SURFACE_HEIGHT; |
| |
| // LCU size is 32x32 in Gen9 |
| m_widthAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameWidth, 32); |
| m_heightAlignedMaxLcu = MOS_ALIGN_CEIL(m_frameHeight, 32); |
| |
| m_brcEnabled = false; |
| m_hmeEnabled = false; |
| m_hmeSupported = false; |
| m_16xMeUserfeatureControl = false; |
| m_16xMeSupported = false; |
| m_32xMeUserfeatureControl = false; |
| m_32xMeSupported = false; |
| |
| // regkey setup |
| MOS_USER_FEATURE_VALUE_DATA userFeatureData; |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID, |
| &userFeatureData, |
| m_osInterface->pOsContext); |
| m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID, |
| &userFeatureData, |
| m_osInterface->pOsContext); |
| m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| eStatus = MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID, |
| &userFeatureData, |
| m_osInterface->pOsContext); |
| |
| if (eStatus == MOS_STATUS_SUCCESS) |
| { |
| // Region number must be greater than 1 |
| m_numRegionsInSlice = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data; |
| } |
| else |
| { |
| // Reset the status to success if regkey is not set |
| eStatus = MOS_STATUS_SUCCESS; |
| } |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_8x8_INTRA_KERNEL_SPLIT, |
| &userFeatureData, |
| m_osInterface->pOsContext); |
| m_numMb8x8IntraKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_NUM_B_KERNEL_SPLIT, |
| &userFeatureData, |
| m_osInterface->pOsContext); |
| m_numMbBKernelSplit = (userFeatureData.i32Data < 0) ? 0 : userFeatureData.i32Data; |
| |
| MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData)); |
| MOS_UserFeature_ReadValue_ID( |
| nullptr, |
| __MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_POWER_SAVING, |
| &userFeatureData, |
| m_osInterface->pOsContext); |
| m_powerSavingEnabled = (userFeatureData.i32Data) ? true : false; |
| |
| if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit)) |
| { |
| /* Make the width aligned to a multiple of 32 and then get the no of macroblocks.*/ |
| /* This is done to facilitate the use of format conversion kernel for downscaling to 4x and 2x along with formatconversion of 10 bit data to 8 bit data. |
| Refer format conversion kernel for further details . |
| We will use only 4x downscale for HME, Super and ultra HME use the traditional scaling kernels. |
| */ |
| uint32_t downscaledSurfaceWidth4x = MOS_ALIGN_CEIL((m_downscaledWidthInMb4x* CODECHAL_MACROBLOCK_WIDTH), (CODECHAL_MACROBLOCK_WIDTH * 2)); |
| m_downscaledWidthInMb4x = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(downscaledSurfaceWidth4x); |
| |
| } |
| |
| return eStatus; |
| } |
| |
| uint32_t CodechalFeiHevcStateG9Skl::GetMaxBtCount() |
| { |
| auto wBtIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment(); |
| |
| // 6 I kernels |
| uint32_t uiBtCountPhase1 = MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_2xSCALING].KernelParams.iBTCount, wBtIdxAlignment) + |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16SAD].KernelParams.iBTCount, wBtIdxAlignment) + |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_16x16MD].KernelParams.iBTCount, wBtIdxAlignment) + |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8PU].KernelParams.iBTCount, wBtIdxAlignment) + |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_8x8FMODE].KernelParams.iBTCount, wBtIdxAlignment); |
| |
| uiBtCountPhase1 += MOS_MAX( |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32MD].KernelParams.iBTCount, wBtIdxAlignment), |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_MBENC_32x32INTRACHECK].KernelParams.iBTCount, wBtIdxAlignment)); |
| |
| if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit)) |
| { |
| uiBtCountPhase1 += MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_DS_COMBINED].KernelParams.iBTCount, wBtIdxAlignment); |
| } |
| |
| // two B kernels |
| uint32_t uiBtCountPhase2 = MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_BENC].KernelParams.iBTCount, wBtIdxAlignment) + |
| MOS_ALIGN_CEIL(m_mbEncKernelStates[CODECHAL_HEVC_FEI_MBENC_BPAK].KernelParams.iBTCount, wBtIdxAlignment); |
| |
| uint32_t uiMaxBtCount = MOS_MAX(uiBtCountPhase1, uiBtCountPhase2); |
| |
| return uiMaxBtCount; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::AllocateEncResources() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_sliceMap = (PCODECHAL_ENCODE_HEVC_SLICE_MAP)MOS_AllocAndZeroMemory( |
| m_widthAlignedMaxLcu * m_heightAlignedMaxLcu * sizeof(m_sliceMap[0])); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_sliceMap); |
| |
| uint32_t Downscaling2xWidth = m_widthAlignedMaxLcu >> 1; |
| uint32_t Downscaling2xHeight = m_heightAlignedMaxLcu >> 1; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface( |
| &m_scaled2xSurface, |
| Downscaling2xWidth, |
| Downscaling2xHeight, |
| "2x Downscaling")); |
| |
| uint32_t uiWidth = m_widthAlignedMaxLcu >> 3; |
| uint32_t uiHeight = m_heightAlignedMaxLcu >> 5; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_sliceMapSurface, |
| uiWidth, |
| uiHeight, |
| "Slice Map")); |
| |
| uint32_t uiSize = 32 * (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_32x32PuOutputData, |
| uiSize, |
| "32x32 PU Output Data")); |
| |
| uiSize = 8 * 4 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_sad16x16Pu, |
| uiSize, |
| "SAD 16x16 PU")); |
| |
| // need 64 bytes for statistics report . |
| uiSize = 64 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_vme8x8Mode, |
| uiSize, |
| "VME 8x8 mode")); |
| |
| uiSize = 32 * (m_widthAlignedMaxLcu >> 3) * (m_heightAlignedMaxLcu >> 3); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_intraMode, |
| uiSize, |
| "Intra mode")); |
| |
| uiSize = 16 * (m_widthAlignedMaxLcu >> 4) * (m_heightAlignedMaxLcu >> 4); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_intraDist, |
| uiSize, |
| "Intra dist")); |
| |
| // Change the surface size |
| uiWidth = m_widthAlignedMaxLcu >> 1; |
| uiHeight = m_heightAlignedMaxLcu >> 4; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_minDistortion, |
| uiWidth, |
| uiHeight, |
| "Min distortion surface")); |
| |
| // Allocate FEI 2D 2bytes LCU QP surface |
| uiWidth = MOS_ALIGN_CEIL((m_widthAlignedMaxLcu >> 4), 64); |
| uiHeight = MOS_ALIGN_CEIL((m_heightAlignedMaxLcu >> 5), 4); |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_lcuQP, |
| uiWidth, |
| uiHeight, |
| "LCU_QP surface")); |
| |
| uiWidth = sizeof(CODECHAL_ENCODE_HEVC_WALKING_CONTROL_REGION); |
| uiHeight = HEVC_CONCURRENT_SURFACE_HEIGHT; |
| for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++) |
| { |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_concurrentThreadSurface[i], |
| uiWidth, |
| uiHeight, |
| "Concurrent Thread")); |
| } |
| |
| //uiSize = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 4); |
| uiSize = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 4) + GPUMMU_WA_PADDING; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_mvIndex, |
| uiSize, |
| "MV index surface")); |
| |
| //uiSize = (dwWidthAlignedMaxLCU * dwHeightAlignedMaxLCU / 2); |
| uiSize = (m_widthAlignedMaxLcu * m_heightAlignedMaxLcu / 2) + GPUMMU_WA_PADDING; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_mvpIndex, |
| uiSize, |
| "MVP index surface")); |
| |
| uiSize = m_widthAlignedMaxLcu * m_heightAlignedMaxLcu; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_vmeSavedUniSic, |
| uiSize, |
| "VME Saved UniSic surface")); |
| |
| uiWidth = m_widthAlignedMaxLcu >> 3; |
| uiHeight = m_heightAlignedMaxLcu >> 5; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_simplestIntraSurface, |
| uiWidth, |
| uiHeight, |
| "Simplest Intra surface")); |
| |
| m_allocator->AllocateResource(m_standard, 1024, 1, brcInputForEncKernel, "brcInputForEncKernel", true); |
| |
| if (MEDIA_IS_SKU(m_skuTable, FtrEncodeHEVC10bit)) |
| { |
| // adding 10 bit support for KBL : output surface for format conversion from 10bit to 8 bit |
| for (uint32_t i = 0; i < NUM_FORMAT_CONV_FRAMES; i++) |
| { |
| if (Mos_ResourceIsNull(&m_formatConvertedSurface[i].OsResource)) |
| { |
| uiWidth = m_widthAlignedMaxLcu; |
| uiHeight = m_heightAlignedMaxLcu; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface( |
| &m_formatConvertedSurface[i], |
| uiWidth, |
| uiHeight, |
| "Format Converted Surface")); |
| } |
| } |
| |
| if (Mos_ResourceIsNull(&m_resMbStatisticsSurface.sResource)) |
| { |
| uiSize = 52 * m_picWidthInMb * m_picHeightInMb; // 13 DWs or 52 bytes for statistics per MB |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer( |
| &m_resMbStatisticsSurface, |
| uiSize, |
| "MB stats surface")); |
| } |
| } |
| |
| // ROI |
| // ROI buffer size uses MB units for HEVC, not LCU |
| uiWidth = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64); |
| uiHeight = MOS_ALIGN_CEIL(m_picHeightInMb, 8); |
| |
| MOS_ZeroMemory(&m_roiSurface, sizeof(m_roiSurface)); |
| m_roiSurface.TileType = MOS_TILE_LINEAR; |
| m_roiSurface.bArraySpacing = true; |
| m_roiSurface.Format = Format_Buffer_2D; |
| m_roiSurface.dwWidth = uiWidth; |
| m_roiSurface.dwPitch = uiWidth; |
| m_roiSurface.dwHeight = uiHeight; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D( |
| &m_roiSurface, |
| uiWidth, |
| uiHeight, |
| "ROI Buffer")); |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::FreeEncResources() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| MOS_Delete(m_meKernelState); |
| m_meKernelState = nullptr; |
| MOS_FreeMemory(m_meKernelBindingTable); |
| m_meKernelBindingTable = nullptr; |
| |
| MOS_DeleteArray(m_mbEncKernelStates); |
| m_mbEncKernelStates = nullptr; |
| MOS_FreeMemory(m_mbEncKernelBindingTable); |
| m_mbEncKernelBindingTable = nullptr; |
| |
| MOS_DeleteArray(m_brcKernelStates); |
| m_brcKernelStates = nullptr; |
| MOS_FreeMemory(m_brcKernelBindingTable); |
| m_brcKernelBindingTable = nullptr; |
| |
| MOS_FreeMemory(m_surfaceParams); m_surfaceParams = nullptr; |
| |
| for (auto i = 0; i < NUM_FORMAT_CONV_FRAMES; i++) |
| { |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_formatConvertedSurface[i].OsResource); |
| } |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_scaled2xSurface.OsResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_resMbStatisticsSurface.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_sliceMapSurface.OsResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_32x32PuOutputData.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_sad16x16Pu.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_vme8x8Mode.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_intraMode.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_intraDist.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_mvIndex.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_mvpIndex.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_vmeSavedUniSic.sResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_minDistortion.OsResource); |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_lcuQP.OsResource); |
| |
| for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++) |
| { |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_concurrentThreadSurface[i].OsResource); |
| } |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_simplestIntraSurface.OsResource); |
| |
| MOS_FreeMemory(m_sliceMap); |
| m_sliceMap = nullptr; |
| |
| m_osInterface->pfnFreeResource( |
| m_osInterface, |
| &m_roiSurface.OsResource); |
| |
| #ifdef HEVC_FEI_ENABLE_CMRT |
| |
| for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++) |
| { |
| it->second->DestroyKernelResources(); |
| } |
| if (m_cmKernelMap.count("2xScaling")) |
| { |
| m_cmKernelMap["2xScaling"]->DestroyProgramResources(); |
| } |
| if (m_cmKernelMap.count("I_32x32")) |
| { |
| m_cmKernelMap["I_32x32"]->DestroyProgramResources(); |
| } |
| if (m_cmKernelMap.count("PB_32x32")) |
| { |
| m_cmKernelMap["PB_32x32"]->DestroyProgramResources(); |
| } |
| if (m_cmKernelMap.count("2xScaling")) |
| { |
| m_cmKernelMap["2xScaling"]->Destroy(); |
| } |
| |
| for (CmKernelMapType::iterator it = m_cmKernelMap.begin(); it != m_cmKernelMap.end(); it++) |
| { |
| delete it->second; |
| } |
| |
| m_cmKernelMap.clear(); |
| |
| CmKernelMapType deallocator; |
| m_cmKernelMap.swap(deallocator); |
| |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::InitSurfaceInfoTable() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| m_surfaceParams = (PCODECHAL_SURFACE_CODEC_PARAMS)MOS_AllocAndZeroMemory( |
| sizeof(*m_surfaceParams) * SURFACE_NUM_TOTAL); |
| CODECHAL_ENCODE_CHK_NULL_RETURN(m_surfaceParams); |
| |
| PCODECHAL_SURFACE_CODEC_PARAMS param = &m_surfaceParams[SURFACE_RAW_Y]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| m_rawSurfaceToEnc, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_RAW_10bit_Y]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| m_rawSurfaceToEnc, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| // MB stats surface -- currently not used |
| param = &m_surfaceParams[SURFACE_RAW_MBSTAT]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_resMbStatisticsSurface.sResource, |
| m_resMbStatisticsSurface.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| true)); |
| param->bRawSurface = true; |
| |
| param = &m_surfaceParams[SURFACE_RAW_FC_8bit_Y_UV]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_formatConvertedSurface[0], |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| true)); //this should be writable as it is output of formatconversion |
| param->bUseUVPlane = true; |
| |
| param = &m_surfaceParams[SURFACE_RAW_Y_UV]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| m_rawSurfaceToEnc, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| param->bUseUVPlane = true; |
| |
| param = &m_surfaceParams[SURFACE_RAW_10bit_Y_UV]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| m_rawSurfaceToEnc, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false));//this should be writable as it is output of formatconversion |
| param->bUseUVPlane = true; |
| |
| param = &m_surfaceParams[SURFACE_Y_2X]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_scaled2xSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_32x32_PU_OUTPUT]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_32x32PuOutputData.sResource, |
| m_32x32PuOutputData.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_SLICE_MAP]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_sliceMapSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_Y_2X_VME]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| param, |
| &m_scaled2xSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| 0)); |
| |
| param = &m_surfaceParams[SURFACE_BRC_INPUT]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| (MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel), |
| m_allocator->GetResourceSize(m_standard, brcInputForEncKernel), |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_LCU_QP]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_lcuQP, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_ROI]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_roiSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_BRC_DATA]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx], |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_SIMPLIFIED_INTRA]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_simplestIntraSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| // skip SURFACE_HME_MVP and SURFACE_HME_DIST from HME since FEI alsways disables HME |
| |
| param = &m_surfaceParams[SURFACE_16x16PU_SAD]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_sad16x16Pu.sResource, |
| m_sad16x16Pu.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_RAW_VME]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| param, |
| m_rawSurfaceToEnc, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| 0)); |
| |
| param = &m_surfaceParams[SURFACE_VME_8x8]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_vme8x8Mode.sResource, |
| m_vme8x8Mode.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_CU_RECORD]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_resMbCodeSurface, |
| m_mbCodeSize - m_mvOffset, |
| m_mvOffset, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_INTRA_MODE]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_intraMode.sResource, |
| m_intraMode.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_HCP_PAK]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_resMbCodeSurface, |
| m_mvOffset, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_INTRA_DIST]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_intraDist.sResource, |
| m_intraDist.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_MIN_DIST]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_minDistortion, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_VME_UNI_SIC_DATA]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_vmeSavedUniSic.sResource, |
| m_vmeSavedUniSic.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_COL_MB_MV]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| nullptr, |
| m_sizeOfMvTemporalBuffer, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| m_concurrentThreadIndex = 0; |
| for (uint32_t i = 0; i < NUM_CONCURRENT_THREAD; i++) |
| { |
| param = &m_surfaceParams[SURFACE_CONCURRENT_THREAD + i]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_concurrentThreadSurface[i], |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| } |
| |
| param = &m_surfaceParams[SURFACE_MB_MV_INDEX]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_mvIndex.sResource, |
| m_mvIndex.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_MVP_INDEX]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_mvpIndex.sResource, |
| m_mvpIndex.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_REF_FRAME_VME]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| param, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value, |
| 0)); |
| |
| param = &m_surfaceParams[SURFACE_Y_4X]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| nullptr, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_Y_4X_VME]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME( |
| param, |
| nullptr, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value, |
| 0)); |
| |
| param = &m_surfaceParams[SURFACE_BRC_HISTORY]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_brcBuffers.resBrcHistoryBuffer, |
| m_brcHistoryBufferSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_BRC_ME_DIST]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_brcBuffers.sMeBrcDistortionBuffer, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_BRC_PAST_PAK_INFO]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_brcBuffers.resBrcPakStatisticBuffer[0], |
| m_hevcBrcPakStatisticsSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_BRC_HCP_PIC_STATE]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_brcBuffers.resBrcImageStatesWriteBuffer[0], |
| m_brcBuffers.dwBrcHcpPicStateSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| #if 0 |
| param = &m_surfaceParams[SURFACE_PU_STATS]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_encStatsBuffers.m_puStatsSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_8X8_PU_HAAR_DIST]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_encStatsBuffers.m_8x8PuHaarDist, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_8X8_PU_FRAME_STATS]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_encStatsBuffers.m_8x8PuFrameStats.sResource, |
| m_encStatsBuffers.m_8x8PuFrameStats.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_MB_ENC_STATS]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D( |
| param, |
| &m_encStatsBuffers.m_mbEncStatsSurface, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value, |
| 0, |
| m_verticalLineStride, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_MB_ENC_FRAME_STATS]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_encStatsBuffers.m_mbEncFrameStats.sResource, |
| m_encStatsBuffers.m_mbEncFrameStats.dwSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| true)); |
| |
| param = &m_surfaceParams[SURFACE_FEI_EXTERNAL_MVP]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_feiPicParams->resMVPredictor, |
| m_feiPicParams->resMVPredictor.iSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_FEI_PER_LCU_QP]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_feiPicParams->resCTBQp, |
| m_feiPicParams->resCTBQp.iSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_FEI_PER_CTB_CTRL]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_feiPicParams->resCTBCtrl, |
| m_feiPicParams->resCTBCtrl.iSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| |
| param = &m_surfaceParams[SURFACE_FEI_CTB_DISTORTION]; |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D( |
| param, |
| &m_feiPicParams->resDistortion, |
| m_feiPicParams->resDistortion.iSize, |
| 0, |
| m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value, |
| 0, |
| false)); |
| #endif |
| |
| return eStatus; |
| } |
| |
| MOS_STATUS CodechalFeiHevcStateG9Skl::SetSequenceStructs() |
| { |
| MOS_STATUS eStatus = MOS_STATUS_SUCCESS; |
| |
| CODECHAL_ENCODE_FUNCTION_ENTER; |
| |
| m_feiPicParams = (CodecEncodeHevcFeiPicParams *)m_encodeParams.pFeiPicParams; |
| |
| CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSequenceStructs()); |
| |
| m_enable26WalkingPattern = m_feiPicParams->bForceLCUSplit; |
| m_numRegionsInSlice = m_feiPicParams->NumConcurrentEncFramePartition; |
| m_encodeParams.bReportStatisticsEnabled = 0; |
| m_encodeParams.bQualityImprovementEnable = 0; |
| |
| if (m_feiPicParams->FastIntraMode) |
| { |
| m_hevcSeqParams->TargetUsage = 0x07; |
| } |
| |
| return eStatus; |
| } |
| |
| CodechalFeiHevcStateG9Skl::CodechalFeiHevcStateG9Skl(CodechalHwInterface* hwInterface, |
| CodechalDebugInterface* debugInterface, |
| PCODECHAL_STANDARD_INFO standardInfo) |
| :CodechalEncHevcStateG9(hwInterface, debugInterface, standardInfo) |
| { |
| m_kernelBase = (uint8_t *)IGCODECKRN_G9; |
| m_kuid = IDR_CODEC_HEVC_FEI_COMBINED_KENREL_INTEL; |
| pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize; |
| m_noMeKernelForPFrame = false; |
| m_feiEnable = true; |
| |
| MOS_STATUS eStatus = InitMhw(); |
| if (eStatus != MOS_STATUS_SUCCESS) |
| { |
| CODECHAL_ENCODE_ASSERTMESSAGE("HEVC FEI encoder MHW initialization failed."); |
| } |
| } |
| |