blob: cf7265bbd254cba69f6312c85e9432c4323fb55e [file] [log] [blame]
/*
* Copyright (c) 2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//!
//! \file codechal_encode_hevc_g10.cpp
//! \brief HEVC dual-pipe encoder for GEN10.
//!
#include "codechal_encode_hevc_g10.h"
#ifndef _FULL_OPEN_SOURCE
#include "igcodeckrn_g10.h"
#endif
#include "codeckrnheader.h"
//! HME mode
enum
{
HME_STAGE_4x_NO_16x = 0,
HME_STAGE_4x_AFTER_16x,
HME_STAGE_16x
};
//! MBENC kernel index
enum CODECHAL_ENC_HEVC_MBENC_KRNIDX_G10
{
MBENC_I_KRNIDX = 0,
MBENC_B_LCU32_KRNIDX,
MBENC_B_LCU64_KRNIDX,
MBENC_NUM_KRN
};
//! Index for the m_modeCost LUT
enum {
LUTMODE_INTRA_NONPRED = 0, //!< extra penalty for non-predicted modes
LUTMODE_INTRA_32x32,
LUTMODE_INTRA_16x16,
LUTMODE_INTRA_8x8,
LUTMODE_INTER_32x16,
LUTMODE_INTER_16x32 = 4,
LUTMODE_INTER_AMP = 4, //!< All asymmetrical shapes
LUTMODE_INTER_16x16,
LUTMODE_INTER_16x8,
LUTMODE_INTER_8x16 = 6,
LUTMODE_INTER_8x8,
LUTMODE_INTER_32x32,
LUTMODE_INTER_BIDIR,
LUTMODE_REF_ID,
LUTMODE_INTRA_CHROMA
};
//! Binding table offset for all kernels
enum
{
// DownScaling And Conversion Kernel
SCALING_CONVERSION_BEGIN = 0,
SCALING_CONVERSION_10BIT_Y = SCALING_CONVERSION_BEGIN,
SCALING_CONVERSION_10BIT_UV,
SCALING_CONVERSION_8BIT_Y,
SCALING_CONVERSION_8BIT_UV,
SCALING_CONVERSION_4xDS,
SCALING_CONVERSION_MB_STATS,
SCALING_CONVERSION_2xDS,
SCALING_CONVERSION_MB_SPLIT_SURFACE,
SCALING_CONVERSION_LCU32_JOB_QUEUE_SCRATCH_SURFACE,
SCALING_CONVERSION_LCU64_JOB_QUEUE_SCRATCH_SURFACE,
SCALING_CONVERSION_LCU64_64x64_DISTORTION_SURFACE,
SCALING_CONVERSION_END,
// Hme Kernel
HME_BEGIN = 0,
HME_OUTPUT_MV_DATA = HME_BEGIN,
HME_16xINPUT_MV_DATA,
HME_4xOUTPUT_DISTORTION,
HME_VME_PRED_CURR_PIC_IDX0,
HME_VME_PRED_FWD_PIC_IDX0,
HME_VME_PRED_BWD_PIC_IDX0,
HME_VME_PRED_FWD_PIC_IDX1,
HME_VME_PRED_BWD_PIC_IDX1,
HME_VME_PRED_FWD_PIC_IDX2,
HME_VME_PRED_BWD_PIC_IDX2,
HME_VME_PRED_FWD_PIC_IDX3,
HME_VME_PRED_BWD_PIC_IDX3,
HME_4xDS_INPUT,
HME_BRC_DISTORTION,
HME_MV_AND_DISTORTION_SUM,
HME_END,
//BRC Init/Reset
BRC_INIT_RESET_BEGIN = 0,
BRC_INIT_RESET_HISTORY = BRC_INIT_RESET_BEGIN,
BRC_INIT_RESET_DISTORTION,
BRC_INIT_RESET_END,
//BRC Update (frame based)
BRC_UPDATE_BEGIN = 0,
BRC_UPDATE_HISTORY = BRC_UPDATE_BEGIN,
BRC_UPDATE_PREV_PAK,
BRC_UPDATE_PIC_STATE_R,
BRC_UPDATE_PIC_STATE_W,
BRC_UPDATE_ENC_OUTPUT,
BRC_UPDATE_DISTORTION,
BRC_UPDATE_BRCDATA,
BRC_UPDATE_MB_STATS,
BRC_UPDATE_MV_AND_DISTORTION_SUM,
BRC_UPDATE_END,
//BRC Update (LCU-based)
BRC_LCU_UPDATE_BEGIN = 0,
BRC_LCU_UPDATE_HISTORY = BRC_LCU_UPDATE_BEGIN,
BRC_LCU_UPDATE_DISTORTION,
BRC_LCU_UPDATE_MB_STATS,
BRC_LCU_UPDATE_MB_QP,
BRC_LCU_UPDATE_MB_SPLIT_SURFACE,
BRC_LCU_UPDATE_INTRA_DISTORTION,
BRC_LCU_UPDATE_CU_SPLIT_SURFACE,
BRC_LCU_UPDATE_END,
// MBEnc I-kernel
MBENC_I_FRAME_BEGIN = 0,
MBENC_I_FRAME_VME_PRED_CURR_PIC_IDX0 = MBENC_I_FRAME_BEGIN,
MBENC_I_FRAME_VME_PRED_FWD_PIC_IDX0,
MBENC_I_FRAME_VME_PRED_BWD_PIC_IDX0,
MBENC_I_FRAME_VME_PRED_FWD_PIC_IDX1,
MBENC_I_FRAME_VME_PRED_BWD_PIC_IDX1,
MBENC_I_FRAME_VME_PRED_FWD_PIC_IDX2,
MBENC_I_FRAME_VME_PRED_BWD_PIC_IDX2,
MBENC_I_FRAME_VME_PRED_FWD_PIC_IDX3,
MBENC_I_FRAME_VME_PRED_BWD_PIC_IDX3,
MBENC_I_FRAME_CURR_Y,
MBENC_I_FRAME_CURR_UV,
MBENC_I_FRAME_INTERMEDIATE_CU_RECORD,
MBENC_I_FRAME_PAK_OBJ,
MBENC_I_FRAME_PAK_CU_RECORD,
MBENC_I_FRAME_SCRATCH_SURFACE,
MBENC_I_FRAME_CU_QP_DATA,
MBENC_I_FRAME_CONST_DATA_LUT,
MBENC_I_FRAME_LCU_LEVEL_DATA_INPUT,
MBENC_I_FRAME_CONCURRENT_TG_DATA,
MBENC_I_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE,
MBENC_I_FRAME_CU_SPLIT_SURFACE,
MBENC_I_FRAME_DEBUG_DUMP,
MBENC_I_FRAME_END,
// MBEnc B-kernel -- Both for LCU32
MBENC_B_FRAME_LCU32_BEGIN = 0,
MBENC_B_FRAME_LCU32_CURR_Y = MBENC_B_FRAME_LCU32_BEGIN,
MBENC_B_FRAME_LCU32_CURR_UV,
MBENC_B_FRAME_LCU32_ENC_CU_RECORD,
MBENC_B_FRAME_LCU32_PAK_OBJ,
MBENC_B_FRAME_LCU32_PAK_CU_RECORD,
MBENC_B_FRAME_LCU32_VME_PRED_CURR_PIC_IDX0,
MBENC_B_FRAME_LCU32_VME_PRED_FWD_PIC_IDX0,
MBENC_B_FRAME_LCU32_VME_PRED_BWD_PIC_IDX0,
MBENC_B_FRAME_LCU32_VME_PRED_FWD_PIC_IDX1,
MBENC_B_FRAME_LCU32_VME_PRED_BWD_PIC_IDX1,
MBENC_B_FRAME_LCU32_VME_PRED_FWD_PIC_IDX2,
MBENC_B_FRAME_LCU32_VME_PRED_BWD_PIC_IDX2,
MBENC_B_FRAME_LCU32_VME_PRED_FWD_PIC_IDX3,
MBENC_B_FRAME_LCU32_VME_PRED_BWD_PIC_IDX3,
MBENC_B_FRAME_LCU32_CU16x16_QP_DATA,
MBENC_B_FRAME_LCU32_ENC_CONST_TABLE,
MBENC_B_FRAME_LCU32_COLOCATED_CU_MV_DATA,
MBENC_B_FRAME_LCU32_HME_MOTION_PREDICTOR_DATA,
MBENC_B_FRAME_LCU32_LCU_LEVEL_DATA_INPUT,
MBENC_B_FRAME_LCU32_LCU_ENC_SCRATCH_SURFACE,
MBENC_B_FRAME_LCU32_CONCURRENT_TG_DATA,
MBENC_B_FRAME_LCU32_BRC_COMBINED_ENC_PARAMETER_SURFACE,
MBENC_B_FRAME_LCU32_JOB_QUEUE_SCRATCH_SURFACE,
MBENC_B_FRAME_LCU32_CU_SPLIT_DATA_SURFACE,
MBENC_B_FRAME_LCU32_RESIDUAL_DATA_SCRATCH_SURFACE,
MBENC_B_FRAME_LCU32_DEBUG_SURFACE,
MBENC_B_FRAME_LCU32_END,
// MBEnc B-kernel -- Both for LCU64
MBENC_B_FRAME_LCU64_BEGIN = 0,
MBENC_B_FRAME_LCU64_CURR_Y = MBENC_B_FRAME_LCU64_BEGIN,
MBENC_B_FRAME_LCU64_CURR_UV,
MBENC_B_FRAME_LCU64_CU32_ENC_CU_RECORD,
MBENC_B_FRAME_LCU64_SECOND_CU32_ENC_CU_RECORD,
MBENC_B_FRAME_LCU64_PAK_OBJ,
MBENC_B_FRAME_LCU64_PAK_CU_RECORD,
MBENC_B_FRAME_LCU64_VME_PRED_CURR_PIC_IDX0,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_IDX0,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_IDX0,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_IDX1,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_IDX1,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_IDX2,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_IDX2,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_IDX3,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_IDX3,
MBENC_B_FRAME_LCU64_CU16x16_QP_DATA,
MBENC_B_FRAME_LCU64_CU32_ENC_CONST_TABLE,
MBENC_B_FRAME_LCU64_COLOCATED_CU_MV_DATA,
MBENC_B_FRAME_LCU64_HME_MOTION_PREDICTOR_DATA,
MBENC_B_FRAME_LCU64_LCU_LEVEL_DATA_INPUT,
MBENC_B_FRAME_LCU64_CU32_LCU_ENC_SCRATCH_SURFACE,
MBENC_B_FRAME_LCU64_64X64_DISTORTION_SURFACE,
MBENC_B_FRAME_LCU64_CONCURRENT_TG_DATA,
MBENC_B_FRAME_LCU64_BRC_COMBINED_ENC_PARAMETER_SURFACE,
MBENC_B_FRAME_LCU64_CU32_JOB_QUEUE_1D_SURFACE,
MBENC_B_FRAME_LCU64_CU32_JOB_QUEUE_2D_SURFACE,
MBENC_B_FRAME_LCU64_CU32_RESIDUAL_DATA_SCRATCH_SURFACE,
MBENC_B_FRAME_LCU64_CU_SPLIT_DATA_SURFACE,
MBENC_B_FRAME_LCU64_CURR_Y_2xDS,
MBENC_B_FRAME_LCU64_INTERMEDIATE_CU_RECORD,
MBENC_B_FRAME_LCU64_CONST64_DATA_LUT,
MBENC_B_FRAME_LCU64_LCU_STORAGE_SURFACE,
MBENC_B_FRAME_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_2xDS_IDX0,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_2xDS_IDX0,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_2xDS_IDX1,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_2xDS_IDX1,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_2xDS_IDX2,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_2xDS_IDX2,
MBENC_B_FRAME_LCU64_VME_PRED_FWD_PIC_2xDS_IDX3,
MBENC_B_FRAME_LCU64_VME_PRED_BWD_PIC_2xDS_IDX3,
MBENC_B_FRAME_LCU64_JOB_QUEUE_1D_SURFACE,
MBENC_B_FRAME_LCU64_JOB_QUEUE_2D_SURFACE,
MBENC_B_FRAME_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE,
MBENC_B_FRAME_LCU64_DEBUG_SURFACE,
MBENC_B_FRAME_LCU64_END,
};
//! \cond SKIP_DOXYGEN
//! Kernel header structure
struct CODECHAL_ENC_HEVC_KERNEL_HEADER_G10 {
int32_t nKernelCount;
union
{
// HEVC
struct
{
CODECHAL_KERNEL_HEADER Gen10_HEVC_Intra;
CODECHAL_KERNEL_HEADER Gen10_HEVC_Enc_B;
CODECHAL_KERNEL_HEADER Gen10_HEVC_DS_Convert;
CODECHAL_KERNEL_HEADER Gen10_HEVC_HME;
CODECHAL_KERNEL_HEADER Gen10_HEVC_Enc_LCU64_B;
CODECHAL_KERNEL_HEADER Gen10_HEVC_brc_init;
CODECHAL_KERNEL_HEADER Gen10_HEVC_brc_lcuqp;
CODECHAL_KERNEL_HEADER Gen10_HEVC_brc_reset;
CODECHAL_KERNEL_HEADER Gen10_HEVC_brc_update;
CODECHAL_KERNEL_HEADER Gen10_HEVC_brc_blockcopy; // not used so far
};
};
};
using PCODECHAL_ENC_HEVC_KERNEL_HEADER_G10 = struct CODECHAL_ENC_HEVC_KERNEL_HEADER_G10*;
//! Structure for LCU level data
struct CODECHAL_ENC_HEVC_LCU_LEVEL_DATA_G10
{
uint16_t SliceStartLcuIndex;
uint16_t SliceEndLcuIndex;
uint16_t SliceId;
uint16_t SliceLevelQP;
uint16_t Reserved[4];
};
using PCODECHAL_ENC_HEVC_LCU_LEVEL_DATA_G10 = struct CODECHAL_ENC_HEVC_LCU_LEVEL_DATA_G10*;
//! Concurrent thread group data structure
struct CODECHAL_ENC_HEVC_CONCURRENT_THREAD_GROUP_DATA_G10
{
uint16_t CurrTgStartLcuIndex;
uint16_t CurrTgEndLcuIndex;
uint16_t CurrTgIndex;
uint16_t Reserved0;
uint16_t CurrWfLcuIndex_x;
uint16_t CurrWfLcuIndex_y;
uint16_t CurrWfLcuIndex1_y;
uint16_t NextWfLcuIndex_x;
uint16_t CurrWfYoffset;
uint16_t Reserved[23];
};
using PCODECHAL_ENC_HEVC_CONCURRENT_THREAD_GROUP_DATA_G10 = struct CODECHAL_ENC_HEVC_CONCURRENT_THREAD_GROUP_DATA_G10*;
//! curbe structure for ME kernel
struct CODECHAL_ENC_HEVC_ME_CURBE_G10
{
// DWORD 0
uint32_t DW0_RoundedFrameWidthInMvUnitsfor4X : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW0_RoundedFrameHeightInMvUnitsfor4X : MOS_BITFIELD_RANGE(16, 31);
// DWORD 1
uint32_t DW1_Reserved_0 : MOS_BITFIELD_RANGE( 0, 15); // MBZ
uint32_t DW1_MvCostScaleFactor : MOS_BITFIELD_RANGE(16, 17); // This parameter allows the user to redefine the precision of the lookup into the LUT_MV based on the MV cost difference from the cost center
uint32_t DW1_Reserved_1 : MOS_BITFIELD_RANGE(18, 31); // Default value to enable Fractional Motion Estimation: 0x11[3]
// DWORD 2
uint32_t DW2_Reserved_0 : MOS_BITFIELD_RANGE( 0, 15); // MBZ
uint32_t DW2_SubPelMode : MOS_BITFIELD_RANGE(16, 17);
uint32_t DW2_BmeDisableFbr : MOS_BITFIELD_BIT( 18);
uint32_t DW2_Reserved_1 : MOS_BITFIELD_BIT( 19); // MBZ
uint32_t DW2_InterSADMeasureAdjustment : MOS_BITFIELD_RANGE(20, 21); // This field specifies distortion measure adjustments used for the motion search SAD comparison.
uint32_t DW2_Reserved_2 : MOS_BITFIELD_RANGE(22, 31); // MBZ
// DWORD 3
uint32_t DW3_Reserved_0 : MOS_BITFIELD_BIT( 0); // MBZ
uint32_t DW3_AdaptiveSearchEnable : MOS_BITFIELD_BIT( 1); // This field determines whether adaptive search is enabled or not.
uint32_t DW3_Reserved_1 : MOS_BITFIELD_RANGE( 2, 15); // MBZ
uint32_t DW3_ImeRefWindowSize : MOS_BITFIELD_RANGE(16, 17);
uint32_t DW3_Reserved_2 : MOS_BITFIELD_RANGE(18, 31); // MBZ
// DWORD 4
uint32_t DW4_Reserved_0 : MOS_BITFIELD_RANGE( 0, 7); // MBZ
uint32_t DW4_QuarterQuadTreeCandidate : MOS_BITFIELD_RANGE( 8, 12); // This parameter indicates the current 32x32 block CU candidate that is being checked.
uint32_t DW4_Reserved_1 : MOS_BITFIELD_RANGE(13, 15); // MBZ
uint32_t DW4_BidirectionalWeight : MOS_BITFIELD_RANGE(16, 21); // This field defines the weighting for the backward and forward terms to generate the bidirectional term.
uint32_t DW4_Reserved_2 : MOS_BITFIELD_RANGE(22, 31); // MBZ
// DWORD 5
uint32_t DW5_LenSP : MOS_BITFIELD_RANGE( 0, 7); // Maximum Fixed Search Path Length. This field determines the maximum number of SUs per reference which are evaluated by predetermined SUs.
uint32_t DW5_MaxNumSU : MOS_BITFIELD_RANGE( 8, 15); // Maximum Search Path Length. This field determines the maximum number of SUs per reference including predetermined SUs and the adaptively generated SUs.
uint32_t DW5_StartCenter0_X : MOS_BITFIELD_RANGE(16, 19); // This field defines the Y position of Search Path 1 relative to the reference X location.
uint32_t DW5_StartCenter0_Y : MOS_BITFIELD_RANGE(20, 23); // This field defines the Y position of Search Path 1 relative to the reference Y location.
uint32_t DW5_Reserved_0 : MOS_BITFIELD_RANGE(24, 31); // MBZ
// DWORD 6
uint32_t DW6_Reserved_0 : MOS_BITFIELD_BIT( 0); // MBZ
uint32_t DW6_SliceType : MOS_BITFIELD_BIT( 1);
uint32_t DW6_HmeStage : MOS_BITFIELD_RANGE( 2, 3);
uint32_t DW6_NumRefL0 : MOS_BITFIELD_RANGE( 4, 5); // Valid Number of Forward L0 references
uint32_t DW6_NumRefL1 : MOS_BITFIELD_RANGE( 6, 7); // Valid Number of Backward L1 references
uint32_t DW6_Reserved_1 : MOS_BITFIELD_RANGE( 8, 31); // MBZ
// DWORD 7
uint32_t DW7_RoundedFrameWidthInMvUnitsFor16x : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW7_RoundedFrameHeightInMvUnitsfor16X : MOS_BITFIELD_RANGE(16, 31);
// DWORD 8
uint32_t DW8_ImeSearchPath_0_3 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 9
uint32_t DW9_ImeSearchPath_4_7 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 10
uint32_t DW10_ImeSearchPath_8_11 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 11
uint32_t DW11_ImeSearchPath_12_15 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 12
uint32_t DW12_ImeSearchPath_16_19 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 13
uint32_t DW13_ImeSearchPath_20_23 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 14
uint32_t DW14_ImeSearchPath_24_27 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 15
uint32_t DW15_ImeSearchPath_28_31 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 16
uint32_t DW16_ImeSearchPath_32_35 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 17
uint32_t DW17_ImeSearchPath_36_39 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 18
uint32_t DW18_ImeSearchPath_40_43 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 19
uint32_t DW19_ImeSearchPath_44_47 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 20
uint32_t DW20_ImeSearchPath_48_51 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 21
uint32_t DW21_ImeSearchPath_52_55 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 22
uint32_t DW22_ImeSearchPath_56_59 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 23
uint32_t DW23_ImeSearchPath_60_63 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 24
uint32_t DW24_Reserved_0 : MOS_BITFIELD_RANGE( 0, 5); // MBZ
uint32_t DW24_CodingUnitSize : MOS_BITFIELD_RANGE( 6, 7);
uint32_t DW24_Reserved_1 : MOS_BITFIELD_RANGE( 8, 11); // MBZ
uint32_t DW24_CodingUnitPartitionMode : MOS_BITFIELD_RANGE(12, 14);
uint32_t DW24_CodingUnitPredictionMode : MOS_BITFIELD_BIT( 15);
uint32_t DW24_Reserved_2 : MOS_BITFIELD_RANGE(16, 31); // MBZ
// DWORD 25
uint32_t DW25_FrameWidthInSamplesOfCurrentStage : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW25_FrameHeightInSamplesOfCurrentStage : MOS_BITFIELD_RANGE(16, 31);
// DWORD 26
uint32_t DW26_Intra8x8ModeMask : MOS_BITFIELD_RANGE( 0, 9);
uint32_t DW26_Reserved_0 : MOS_BITFIELD_RANGE(10, 15); // MBZ
uint32_t DW26_Intra16x16ModeMask : MOS_BITFIELD_RANGE(16, 24);
uint32_t DW26_Reserved_1 : MOS_BITFIELD_RANGE(25, 31);
// DWORD 27
uint32_t DW27_Intra32x32ModeMask : MOS_BITFIELD_RANGE( 0, 3);
uint32_t DW27_IntraChromaModeMask : MOS_BITFIELD_RANGE( 4, 8);
uint32_t DW27_IntraComputeType : MOS_BITFIELD_RANGE( 9, 10);
uint32_t DW27_Reserved_0 : MOS_BITFIELD_RANGE(11, 31); // MBZ
// DWORD 28
uint32_t DW28_Reserved_0 : MOS_BITFIELD_RANGE( 0, 7); // MBZ
uint32_t DW28_PenaltyIntra32x32NonDC : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW28_PenaltyIntra16x16NonDC : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW28_PenaltyIntra8x8NonDC : MOS_BITFIELD_RANGE(24, 31);
// DWORD 29
uint32_t DW29_Mode0Cost : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW29_Mode1Cost : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW29_Mode2Cost : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW29_Mode3Cost : MOS_BITFIELD_RANGE(24, 31);
// DWORD 30
uint32_t DW30_Mode4Cost : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW30_Mode5Cost : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW30_Mode6Cost : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW30_Mode7Cost : MOS_BITFIELD_RANGE(24, 31);
// DWORD 31
uint32_t DW31_Mode8Cost : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW31_Mode9Cost : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW31_Reserved_0 : MOS_BITFIELD_RANGE(16, 23); // MBZ
uint32_t DW31_ChromaIntraModeCost : MOS_BITFIELD_RANGE(24, 31);
// DWORD 32
uint32_t DW32_Reserved_0 : MOS_BITFIELD_RANGE( 0, 7); // MBZ
uint32_t DW32_SicIntraNeighborAvailableFlag : MOS_BITFIELD_RANGE( 8, 13);
uint32_t DW32_Reserved_1 : MOS_BITFIELD_RANGE(14, 19); // MBZ
uint32_t DW32_SicInterSadMeasure : MOS_BITFIELD_RANGE(20, 21);
uint32_t DW32_SicIntraSadMeasure : MOS_BITFIELD_RANGE(22, 23);
uint32_t DW32_Reserved_2 : MOS_BITFIELD_RANGE(24, 31); // MBZ
// DWORD 33
uint32_t DW33_SicLog2MinCuSize : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW33_Reserved_0 : MOS_BITFIELD_RANGE( 8, 19); // MBZ
uint32_t DW33_SicAcOnlyHaar : MOS_BITFIELD_BIT( 20);
uint32_t DW33_Reserved_1 : MOS_BITFIELD_RANGE(21, 23); // MBZ
uint32_t DW33_SicHevcQuarterQuadtree : MOS_BITFIELD_RANGE(24, 28);
uint32_t DW33_Reserved_2 : MOS_BITFIELD_RANGE(29, 31); // MBZ
// DWORD 34
uint32_t DW34_BTI_HmeOutputMvDataSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 35
uint32_t DW35_BTI_16xInputMvDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Only applicable for 4x Stage
// DWORD 36
uint32_t DW36_BTI_4xOutputDistortionSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 37
uint32_t DW37_BTI_VmeSurfaceIndex : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 38
uint32_t DW38_BTI_4xDsSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 39
uint32_t DW39_BTI_BrcDistortionSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 40
uint32_t DW40_BTI_Mv_And_Distortion_SumSurface : MOS_BITFIELD_RANGE( 0, 31);
};
C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G10)) == 41);
//! curbe structure for BRC InitReset kernel
struct CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G10
{
// DWORD 0
uint32_t DW0_ProfileLevelMaxFrame : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 1
uint32_t DW1_InitBufFull : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 2
uint32_t DW2_BufSize : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 3
uint32_t DW3_TargetBitRate : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 4
uint32_t DW4_MaximumBitRate : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 5
uint32_t DW5_MinimumBitRate : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 6
uint32_t DW6_FrameRateM : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 7
uint32_t DW7_FrameRateD : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 8
uint32_t DW8_BRCFlag : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW8_BRC_Param_A : MOS_BITFIELD_RANGE(16, 31);
// DWORD 9
uint32_t DW9_BRC_Param_B : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW9_FrameWidth : MOS_BITFIELD_RANGE(16, 31);
// DWORD 10
uint32_t DW10_FrameHeight : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW10_AVBRAccuracy : MOS_BITFIELD_RANGE(16, 31);
// DWORD 11
uint32_t DW11_AVBRConvergence : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW11_MinimumQP : MOS_BITFIELD_RANGE(16, 31);
// DWORD 12
uint32_t DW12_MaximumQP : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW12_NumberSlice : MOS_BITFIELD_RANGE(16, 31);
// DWORD 13
uint32_t DW13_Reserved_0 : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW13_BRC_Param_C : MOS_BITFIELD_RANGE(16, 31);
// DWORD 14
uint32_t DW14_BRC_Param_D : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW14_MaxBRCLevel : MOS_BITFIELD_RANGE(16, 31);
// DWORD 15
uint32_t DW15_LongTermInterval : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW15_Reserved_0 : MOS_BITFIELD_RANGE( 16, 31);
// DWORD 16
uint32_t DW16_InstantRateThreshold0_Pframe : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW16_InstantRateThreshold1_Pframe : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW16_InstantRateThreshold2_Pframe : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW16_InstantRateThreshold3_Pframe : MOS_BITFIELD_RANGE(24, 31);
// DWORD 17
uint32_t DW17_InstantRateThreshold0_Bframe : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW17_InstantRateThreshold1_Bframe : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW17_InstantRateThreshold2_Bframe : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW17_InstantRateThreshold3_Bframe : MOS_BITFIELD_RANGE(24, 31);
// DWORD 18
uint32_t DW18_InstantRateThreshold0_Iframe : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW18_InstantRateThreshold1_Iframe : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW18_InstantRateThreshold2_Iframe : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW18_InstantRateThreshold3_Iframe : MOS_BITFIELD_RANGE(24, 31);
// DWORD 19
uint32_t DW19_DeviationThreshold0_PBframe : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW19_DeviationThreshold1_PBframe : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW19_DeviationThreshold2_PBframe : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW19_DeviationThreshold3_PBframe : MOS_BITFIELD_RANGE(24, 31);
// DWORD 20
uint32_t DW20_DeviationThreshold4_PBframe : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW20_DeviationThreshold5_PBframe : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW20_DeviationThreshold6_PBframe : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW20_DeviationThreshold7_PBframe : MOS_BITFIELD_RANGE(24, 31);
// DWORD 21
uint32_t DW21_DeviationThreshold0_VBRcontrol : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW21_DeviationThreshold1_VBRcontrol : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW21_DeviationThreshold2_VBRcontrol : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW21_DeviationThreshold3_VBRcontrol : MOS_BITFIELD_RANGE(24, 31);
// DWORD 22
uint32_t DW22_DeviationThreshold4_VBRcontrol : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW22_DeviationThreshold5_VBRcontrol : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW22_DeviationThreshold6_VBRcontrol : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW22_DeviationThreshold7_VBRcontrol : MOS_BITFIELD_RANGE(24, 31);
// DWORD 23
uint32_t DW23_DeviationThreshold0_Iframe : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW23_DeviationThreshold1_Iframe : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW23_DeviationThreshold2_Iframe : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW23_DeviationThreshold3_Iframe : MOS_BITFIELD_RANGE(24, 31);
// DWORD 24
uint32_t DW24_DeviationThreshold4_Iframe : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW24_DeviationThreshold5_Iframe : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW24_DeviationThreshold6_Iframe : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW24_DeviationThreshold7_Iframe : MOS_BITFIELD_RANGE(24, 31);
// DWORD 25
uint32_t DW25_ACQPBuffer : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW25_IntraSADTransform : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW25_Log2MaxCuSize : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW25_SlidingWindowSize : MOS_BITFIELD_RANGE(24, 31);
// DWORD 26
uint32_t DW26_BGOPSize : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW26_Reserved_0 : MOS_BITFIELD_RANGE( 8, 31);
// DWORD 27
uint32_t DW27_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 28
uint32_t DW28_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 29
uint32_t DW29_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 30
uint32_t DW30_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 31
uint32_t DW31_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31);
};
C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G10)) == 32);
//! curbe structure for BRC Updtae kernel
struct CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G10
{
// DWORD 0
uint32_t DW0_TargetSize : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 1
uint32_t DW1_FrameNumber : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 2
uint32_t DW2_PictureHeaderSize : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 3
uint32_t DW3_StartGAdjFrame0 : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW3_StartGAdjFrame1 : MOS_BITFIELD_RANGE(16, 31);
// DWORD 4
uint32_t DW4_StartGAdjFrame2 : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW4_StartGAdjFrame3 : MOS_BITFIELD_RANGE(16, 31);
// DWORD 5
uint32_t DW5_TargetSize_Flag : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW5_Reserved_0 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW5_MaxNumPAKs : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW5_CurrFrameBrcLevel : MOS_BITFIELD_RANGE(24, 31);
// DWORD 6
uint32_t DW6_NumSkippedFrames : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW6_CqpValue : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW6_ROIEnable : MOS_BITFIELD_RANGE(16, 16);
uint32_t DW6_BRCROIEnable : MOS_BITFIELD_RANGE(17, 17);
uint32_t DW6_LCUQPAverageEnable : MOS_BITFIELD_RANGE(18, 18);
uint32_t DW6_Reserved1 : MOS_BITFIELD_RANGE(19, 19);
uint32_t DW6_SlidingWindowEnable : MOS_BITFIELD_RANGE(20, 20);
uint32_t DW6_Reserved2 : MOS_BITFIELD_RANGE(21, 23);
uint32_t DW6_RoiRatio : MOS_BITFIELD_RANGE(24, 31);
// DWORD 7
uint32_t DW7_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 8
uint32_t DW8_StartGlobalAdjustMult0 : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW8_StartGlobalAdjustMult1 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW8_StartGlobalAdjustMult2 : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW8_StartGlobalAdjustMult3 : MOS_BITFIELD_RANGE(24, 31);
// DWORD 9
uint32_t DW9_StartGlobalAdjustMult4 : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW9_StartGlobalAdjustDivd0 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW9_StartGlobalAdjustDivd1 : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW9_StartGlobalAdjustDivd2 : MOS_BITFIELD_RANGE(24, 31);
// DWORD 10
uint32_t DW10_StartGlobalAdjustDivd3 : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW10_StartGlobalAdjustDivd4 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW10_QPThreshold0 : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW10_QPThreshold1 : MOS_BITFIELD_RANGE(24, 31);
// DWORD 11
uint32_t DW11_QPThreshold2 : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW11_QPThreshold3 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW11_gRateRatioThreshold0 : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW11_gRateRatioThreshold1 : MOS_BITFIELD_RANGE(24, 31);
// DWORD 12
uint32_t DW12_gRateRatioThreshold2 : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW12_gRateRatioThreshold3 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW12_gRateRatioThreshold4 : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW12_gRateRatioThreshold5 : MOS_BITFIELD_RANGE(24, 31);
// DWORD 13
uint32_t DW13_gRateRatioThreshold6 : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW13_gRateRatioThreshold7 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW13_gRateRatioThreshold8 : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW13_gRateRatioThreshold9 : MOS_BITFIELD_RANGE(24, 31);
// DWORD 14
uint32_t DW14_gRateRatioThreshold10 : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW14_gRateRatioThreshold11 : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW14_gRateRatioThreshold12 : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW14_ParallelMode : MOS_BITFIELD_RANGE(24, 31);
// DWORD 15
uint32_t DW15_SizeOfSkippedFrames : MOS_BITFIELD_RANGE( 0, 31);
};
C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G10)) == 16);
//! curbe structure for MBENC I kernel
struct CODECHAL_ENC_HEVC_MBENC_I_CURBE_G10
{
// DWORD 0
uint32_t DW0_FrameWidthInSamples : MOS_BITFIELD_RANGE( 0, 15); // PicW should be a multiple of 8
uint32_t DW0_FrameHeightInSamples : MOS_BITFIELD_RANGE(16, 31); // PicH should be a multiple of 8
// DWORD 1
uint32_t DW1_Reserved_0 : MOS_BITFIELD_RANGE( 0, 7); // MBZ
uint32_t DW1_PenaltyForIntra32x32NonDCPredMode : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW1_PenaltyForIntra16x16NonDCPredMode : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW1_PenaltyForIntra8x8NonDCPredMode : MOS_BITFIELD_RANGE(24, 31);
// DWORD 2
uint32_t DW2_Reserved_0 : MOS_BITFIELD_RANGE( 0, 5); // MBZ
uint32_t DW2_IntraSADMeasureAdj : MOS_BITFIELD_RANGE( 6, 7);
uint32_t DW2_IntraPrediction : MOS_BITFIELD_RANGE( 8, 10);
uint32_t DW2_Reserved_1 : MOS_BITFIELD_RANGE(11, 31); // MBZ
// DWORD 3
uint32_t DW3_ModeCost_0 : MOS_BITFIELD_RANGE( 0, 7); // MODE_INTRA_NONPRED
uint32_t DW3_ModeCost_1 : MOS_BITFIELD_RANGE( 8, 15); // MODE_INTRA_32x32
uint32_t DW3_ModeCost_2 : MOS_BITFIELD_RANGE(16, 23); // MODE_INTRA_16x16
uint32_t DW3_ModeCost_3 : MOS_BITFIELD_RANGE(24, 31); // MODE_INTRA_8x8
// DWORD 4
uint32_t DW4_ModeCost_4 : MOS_BITFIELD_RANGE( 0, 7); // MODE_INTER_32x16, MODE_INTER_16x32, MODE_INTER_AMP shapes
uint32_t DW4_ModeCost_5 : MOS_BITFIELD_RANGE( 8, 15); // MODE_INTER_16x16
uint32_t DW4_ModeCost_6 : MOS_BITFIELD_RANGE(16, 23); // MODE_INTER_16x8, MODE_INTER_8x16
uint32_t DW4_ModeCost_7 : MOS_BITFIELD_RANGE(24, 31); // MODE_INTER_8x8
// DWORD 5
uint32_t DW5_ModeCost_8 : MOS_BITFIELD_RANGE( 0, 7); // MODE_INTER_32x32
uint32_t DW5_ModeCost_9 : MOS_BITFIELD_RANGE( 8, 15); // MODE_INTER_BIDIR
uint32_t DW5_RefIDCost : MOS_BITFIELD_RANGE(16, 23); // RefID costing based penalty.
uint32_t DW5_ChromaIntraModeCost : MOS_BITFIELD_RANGE(24, 31); // Penalty for chroma intra modes.
// DWORD 6
uint32_t DW6_Log2MaxCUSize : MOS_BITFIELD_RANGE( 0, 3);
uint32_t DW6_Log2MinCUSize : MOS_BITFIELD_RANGE( 4, 7);
uint32_t DW6_Log2MaxTUSize : MOS_BITFIELD_RANGE( 8, 11);
uint32_t DW6_Log2MinTUSize : MOS_BITFIELD_RANGE(12, 15);
uint32_t DW6_MaxTransformDepthIntra : MOS_BITFIELD_RANGE(16, 19);
uint32_t DW6_TuSplitFlag : MOS_BITFIELD_BIT( 20);
uint32_t DW6_TuBasedCostSetting : MOS_BITFIELD_RANGE(21, 23);
uint32_t DW6_Reserved_0 : MOS_BITFIELD_RANGE(24, 31); // MBZ
// DWORD 7
uint32_t DW7_ConcurrentGroupNum : MOS_BITFIELD_RANGE( 0, 7); // MBZ
uint32_t DW7_EncTuDecisionMode : MOS_BITFIELD_RANGE( 8, 9);
uint32_t DW7_Reserved_0 : MOS_BITFIELD_RANGE(10, 23); // MBZ
uint32_t DW7_SliceQP : MOS_BITFIELD_RANGE(24, 31);
// DWORD 8
uint32_t DW8_Lambda_Rd : MOS_BITFIELD_RANGE( 0, 31); // Derived from QP value and used for TU decision
// DWORD 9
uint32_t DW9_Lambda_Md : MOS_BITFIELD_RANGE( 0, 15); // Derived from QP value and used for distortion related calc
uint32_t DW9_Reserved_0 : MOS_BITFIELD_RANGE(16, 31); // MBZ
// DWORD 10
uint32_t DW10_IntraTuDThres : MOS_BITFIELD_RANGE( 0, 31); // Intra TU Distortion Threshold
// DWORD 11
uint32_t DW11_SliceType : MOS_BITFIELD_RANGE( 0, 1);
uint32_t DW11_QPType : MOS_BITFIELD_RANGE( 2, 3);
uint32_t DW11_CheckPcmModeFlag : MOS_BITFIELD_BIT( 4);
uint32_t DW11_EnableIntra4x4PU : MOS_BITFIELD_BIT( 5);
uint32_t DW11_EncQtDecisionMode : MOS_BITFIELD_BIT( 6);
uint32_t DW11_Reserved_0 : MOS_BITFIELD_RANGE( 7, 31); // MBZ
// DWORD 12
uint32_t DW12_PCM_8x8_SAD_Threshold : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW12_Reserved_0 : MOS_BITFIELD_RANGE(16, 31); // MBZ
// DWORD 13
uint32_t DW13_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31); // MBZ
// DWORD 14
uint32_t DW14_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31); // MBZ
// DWORD 15
uint32_t DW15_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31); // MBZ
// DWORD 16
uint32_t DW16_BTI_VmeIntraPredictionSurface : MOS_BITFIELD_RANGE( 0, 31); // Current pixel surface accessed by VME hardware
// DWORD 17
uint32_t DW17_BTI_CurrentPictureY : MOS_BITFIELD_RANGE( 0, 31); // Current Y pixel surface accessed thorough data port by kernel
// DWORD 18
uint32_t DW18_BTI_EncCuRecordSurface : MOS_BITFIELD_RANGE( 0, 31); // Surface to store intermediate CU records for kernel usage
// DWORD 19
uint32_t DW19_BTI_PakObjectCommandSurface : MOS_BITFIELD_RANGE( 0, 31); // Surface to write final PAK object commands
// DWORD 20
uint32_t DW20_BTI_CuPacketForPakSurface : MOS_BITFIELD_RANGE( 0, 31); // Surface to write CU packet
// DWORD 21
uint32_t DW21_BTI_InternalScratchSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 22
uint32_t DW22_BTI_CuBasedQpSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 23
uint32_t DW23_BTI_ConstantDataLutSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 24
uint32_t DW24_BTI_LcuLevelDataInputSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 25
uint32_t DW25_BTI_ConcurrentThreadGroupDataSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 26
uint32_t DW26_BTI_BrcCombinedEncParameterSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 27
uint32_t DW27_BTI_CuSplitSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 28
uint32_t DW28_BTI_DebugSurface : MOS_BITFIELD_RANGE( 0, 31); // Used for debug purposes
};
C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_MBENC_I_CURBE_G10)) == 29);
//! curbe structure for MBENC B kernel
struct CODECHAL_ENC_HEVC_MBENC_B_CURBE_G10
{
// DWORD 0
uint32_t DW0_FrameWidthInSamples : MOS_BITFIELD_RANGE( 0, 15); // Input picture horizontal size in pixel. It should be the actual "to-be-encoded" size.
uint32_t DW0_FrameHeightInSamples : MOS_BITFIELD_RANGE(16, 31); // Input picture vertical size in pixel. It should be the actual "to-be-encoded" size.
// DWORD 1
uint32_t DW1_Log2MaxCUSize : MOS_BITFIELD_RANGE( 0, 3);
uint32_t DW1_Log2MinCUSize : MOS_BITFIELD_RANGE( 4, 7);
uint32_t DW1_Log2MaxTUSize : MOS_BITFIELD_RANGE( 8, 11);
uint32_t DW1_Log2MinTUSize : MOS_BITFIELD_RANGE(12, 15);
uint32_t DW1_MaxTransformDepthInter : MOS_BITFIELD_RANGE(16, 19);
uint32_t DW1_MaxTransformDepthIntra : MOS_BITFIELD_RANGE(20, 23);
uint32_t DW1_Log2ParallelMergeLevel : MOS_BITFIELD_RANGE(24, 27);
uint32_t DW1_MaxNumIMESearchCenter : MOS_BITFIELD_RANGE(28, 31);
// DWORD 2
uint32_t DW2_TransquantBypassEnableFlag : MOS_BITFIELD_BIT( 0);
uint32_t DW2_CuQpDeltaEnabledFlag : MOS_BITFIELD_BIT( 1);
uint32_t DW2_PCMEnabledFlag : MOS_BITFIELD_BIT( 2);
uint32_t DW2_EnableCu64Check : MOS_BITFIELD_BIT( 3);
uint32_t DW2_EnableIntra4x4PU : MOS_BITFIELD_BIT( 4);
uint32_t DW2_ChromaSkipCheck : MOS_BITFIELD_BIT( 5);
uint32_t DW2_EncTransformSimplify : MOS_BITFIELD_RANGE( 6, 7);
uint32_t DW2_HMEFlag : MOS_BITFIELD_RANGE( 8, 9); // 2 bit flag for enabling hierarchical ME, bit0 is for P slice and bit1 is for B slice
uint32_t DW2_HMECoarseShape : MOS_BITFIELD_RANGE(10, 11);
uint32_t DW2_HMESubPelMode : MOS_BITFIELD_RANGE(12, 13);
uint32_t DW2_SuperHME : MOS_BITFIELD_BIT( 14);
uint32_t DW2_RegionsInSliceEnable : MOS_BITFIELD_BIT( 15);
uint32_t DW2_EncTuDecisionMode : MOS_BITFIELD_RANGE(16, 17);
uint32_t DW2_EncTuDecisionForAllQt : MOS_BITFIELD_BIT( 18);
uint32_t DW2_CoefBitEstMode : MOS_BITFIELD_BIT( 19);
uint32_t DW2_EncSkipDecisionMode : MOS_BITFIELD_RANGE(20, 21);
uint32_t DW2_EncQtDecisionMode : MOS_BITFIELD_BIT( 22);
uint32_t DW2_LCU32_EncRdDecisionModeForAllQt : MOS_BITFIELD_BIT( 23); //MBZ for LCU64
uint32_t DW2_QpType : MOS_BITFIELD_RANGE(24, 25);
uint32_t DW2_LCU64_Cu64SkipCheckOnly : MOS_BITFIELD_BIT( 26); // Used by LCU64-B
uint32_t DW2_SICDynamicRunPathMode : MOS_BITFIELD_RANGE(27, 28);
uint32_t DW2_Reserved_0 : MOS_BITFIELD_RANGE(29, 31); // MBZ
// DWORD 3
uint32_t DW3_ActiveNumChildThreads_CU64 : MOS_BITFIELD_RANGE( 0, 3); // only used by LCU64-B kernel, MBZ for LCU32-B kernel
uint32_t DW3_ActiveNumChildThreads_CU32_0 : MOS_BITFIELD_RANGE( 4, 7); // only used by LCU64-B kernel, MBZ for LCU32-B kernel
uint32_t DW3_ActiveNumChildThreads_CU32_1 : MOS_BITFIELD_RANGE( 8, 11); // only used by LCU64-B kernel, MBZ for LCU32-B kernel
uint32_t DW3_ActiveNumChildThreads_CU32_2 : MOS_BITFIELD_RANGE(12, 15); // only used by LCU64-B kernel, MBZ for LCU32-B kernel
uint32_t DW3_ActiveNumChildThreads_CU32_3 : MOS_BITFIELD_RANGE(16, 19); // only used by LCU64-B kernel, MBZ for LCU32-B kernel
uint32_t DW3_Reserved_0 : MOS_BITFIELD_RANGE(20, 23);
uint32_t DW3_SliceQp : MOS_BITFIELD_RANGE(24, 31);
// DWORD 4
uint32_t DW4_SkipModeEn : MOS_BITFIELD_BIT( 0); // This field specifies whether the skip mode checking is performed before the motion search.
uint32_t DW4_AdaptiveEn : MOS_BITFIELD_BIT( 1); // This field defines whether adaptive searching is enabled for IME.
uint32_t DW4_Reserved_0 : MOS_BITFIELD_BIT( 2); // MBZ
uint32_t DW4_HEVCMinCUControl : MOS_BITFIELD_RANGE( 3, 4); // These bits define the lowest CU split allowed.
uint32_t DW4_EarlyImeSuccessEn : MOS_BITFIELD_BIT( 5); // This field specifies whether the Early Success may terminate the IME search.
uint32_t DW4_Reserved_1 : MOS_BITFIELD_BIT( 6); // MBZ
uint32_t DW4_IMECostCentersSel : MOS_BITFIELD_BIT( 7); // This field determines the CostCenters that need to be used for motion vector costing purposes in IME.
uint32_t DW4_RefPixelOffset : MOS_BITFIELD_RANGE( 8, 15); // The amount the reference pixels are offset.
uint32_t DW4_IMERefWindowSize : MOS_BITFIELD_RANGE(16, 17);
uint32_t DW4_ResidualPredDatatypeCtrl : MOS_BITFIELD_BIT( 18);
uint32_t DW4_ResidualPredInterChromaCtrl : MOS_BITFIELD_BIT( 19); // This parameter indicates if RPM is generating luma predicted/residual samples or chroma predicted/residual samples
uint32_t DW4_ResidualPred16x16SelCtrl : MOS_BITFIELD_RANGE(20, 21); // Residual Prediction 16x16 Selection Control.
uint32_t DW4_Reserved_2 : MOS_BITFIELD_RANGE(22, 23); // MBZ
uint32_t DW4_EarlyImeStop : MOS_BITFIELD_RANGE(24, 31); // Early IME Successful Stop Threshold
// DWORD 5
uint32_t DW5_SubPelMode : MOS_BITFIELD_RANGE( 0, 1); // This field defines the half/quarter pel modes.
uint32_t DW5_Reserved_0 : MOS_BITFIELD_RANGE( 2, 3); // MBZ
uint32_t DW5_InterSADMeasure : MOS_BITFIELD_RANGE( 4, 5); // This field specifies distortion measure adjustments used for the motion search SAD comparison.
uint32_t DW5_IntraSADMeasure : MOS_BITFIELD_RANGE( 6, 7); // This field specifies distortion measure adjustments used for the motion search SAD comparison.
uint32_t DW5_LenSP : MOS_BITFIELD_RANGE( 8, 15); // This field defines the maximum number of SUs per reference which are evaluated by the predetermined SUs.
uint32_t DW5_MaxNumSU : MOS_BITFIELD_RANGE(16, 23); // This field defines the maximum number of SUs per reference including the predetermined SUs and the adaptively generated SUs.
uint32_t DW5_IntraPredictionMask : MOS_BITFIELD_RANGE(24, 26); // This field specifies which Luma Intra partition is enabled/disabled for intra mode decision.
uint32_t DW5_RefIDCostMode : MOS_BITFIELD_BIT( 27); // Selects the RefID costing mode.
uint32_t DW5_DisablePIntra : MOS_BITFIELD_BIT( 28);
uint32_t DW5_TuBasedCostSetting : MOS_BITFIELD_RANGE(29, 31);
// DWORD 6
uint32_t DW6_Reserved_0 : MOS_BITFIELD_RANGE( 0, 31); // MBZ
// DWORD 7
uint32_t DW7_SliceType : MOS_BITFIELD_RANGE( 0, 1);
uint32_t DW7_TemporalMvpEnableFlag : MOS_BITFIELD_BIT( 2);
uint32_t DW7_CollocatedFromL0Flag : MOS_BITFIELD_BIT( 3); // Reference index of the picture that contains the collocated partitions.
uint32_t DW7_TheSameRefList : MOS_BITFIELD_BIT( 4);
uint32_t DW7_IsLowDelay : MOS_BITFIELD_BIT( 5); // Reserved for LCU64_CU32
uint32_t DW7_Reserved_0 : MOS_BITFIELD_RANGE( 6, 7);
uint32_t DW7_MaxNumMergeCand : MOS_BITFIELD_RANGE( 8, 15); // Max number of merge candidates allowed.
uint32_t DW7_NumRefIdxL0 : MOS_BITFIELD_RANGE(16, 23); // Actual number of reference frames for FWD prediction.
uint32_t DW7_NumRefIdxL1 : MOS_BITFIELD_RANGE(24, 31); // Actual number of reference frames for BWD prediction.
// DWORD 8
uint32_t DW8_FwdPocNumber_L0_mTb_0 : MOS_BITFIELD_RANGE( 0, 7); // FWD POC Number for RefID 0 in L0
uint32_t DW8_BwdPocNumber_L1_mTb_0 : MOS_BITFIELD_RANGE( 8, 15); // BWD POC Number for RefID 0 in L1
uint32_t DW8_FwdPocNumber_L0_mTb_1 : MOS_BITFIELD_RANGE(16, 23); // FWD POC Number for RefID 1 in L0
uint32_t DW8_BwdPocNumber_L1_mTb_1 : MOS_BITFIELD_RANGE(24, 31); // BWD POC Number for RefID 1 in L1
// DWORD 9
uint32_t DW9_FwdPocNumber_L0_mTb_2 : MOS_BITFIELD_RANGE( 0, 7); // FWD POC Number for RefID 2 in L0
uint32_t DW9_BwdPocNumber_L1_mTb_2 : MOS_BITFIELD_RANGE( 8, 15); // BWD POC Number for RefID 2 in L1
uint32_t DW9_FwdPocNumber_L0_mTb_3 : MOS_BITFIELD_RANGE(16, 23); // FWD POC Number for RefID 3 in L0
uint32_t DW9_BwdPocNumber_L1_mTb_3 : MOS_BITFIELD_RANGE(24, 31); // BWD POC Number for RefID 3 in L1
// DWORD 10
uint32_t DW10_FwdPocNumber_L0_mTb_4 : MOS_BITFIELD_RANGE( 0, 7); // FWD POC Number for RefID 4 in L0
uint32_t DW10_BwdPocNumber_L1_mTb_4 : MOS_BITFIELD_RANGE( 8, 15); // BWD POC Number for RefID 4 in L1
uint32_t DW10_FwdPocNumber_L0_mTb_5 : MOS_BITFIELD_RANGE(16, 23); // FWD POC Number for RefID 5 in L0
uint32_t DW10_BwdPocNumber_L1_mTb_5 : MOS_BITFIELD_RANGE(24, 31); // BWD POC Number for RefID 5 in L1
// DWORD 11
uint32_t DW11_FwdPocNumber_L0_mTb_6 : MOS_BITFIELD_RANGE( 0, 7); // FWD POC Number for RefID 6 in L0
uint32_t DW11_BwdPocNumber_L1_mTb_6 : MOS_BITFIELD_RANGE( 8, 15); // BWD POC Number for RefID 6 in L1
uint32_t DW11_FwdPocNumber_L0_mTb_7 : MOS_BITFIELD_RANGE(16, 23); // FWD POC Number for RefID 7 in L0
uint32_t DW11_BwdPocNumber_L1_mTb_7 : MOS_BITFIELD_RANGE(24, 31); // BWD POC Number for RefID 7 in L1
// DWORD 12
uint32_t DW12_LongTermReferenceFlags_L0 : MOS_BITFIELD_RANGE( 0, 15); // Bit0~Bit7 indicates if Ref0~Ref7 in L0 is long term reference.
uint32_t DW12_LongTermReferenceFlags_L1 : MOS_BITFIELD_RANGE(16, 31); // Bit16~Bit23 indicates if Ref0~Ref7 in L1 is long term reference.
// DWORD 13
uint32_t DW13_RefFrameHorizontalSize : MOS_BITFIELD_RANGE( 0, 15);
uint32_t DW13_RefFrameVerticalSize : MOS_BITFIELD_RANGE(16, 31);
// DWORD 14
uint32_t DW14_KernelDebugDW : MOS_BITFIELD_RANGE( 0, 31); // not used in release kernel, MBZ
// DWORD 15
uint32_t DW15_ConcurrentGroupNum : MOS_BITFIELD_RANGE( 0, 7);
uint32_t DW15_TotalThreadNumPerLCU : MOS_BITFIELD_RANGE( 8, 15);
uint32_t DW15_NumRegions : MOS_BITFIELD_RANGE(16, 23);
uint32_t DW15_Reserved_0 : MOS_BITFIELD_RANGE(24, 31);
// DWORD 16
uint32_t DW16_BTI_CurrentPictureY : MOS_BITFIELD_RANGE( 0, 31); // Source Pixel Y Surface index
// DWORD 17
uint32_t DW17_BTI_EncCuRecordSurface : MOS_BITFIELD_RANGE( 0, 31); // Each 32x32 LCU will need 32x16 size 2D window
// DWORD 18
union
{
uint32_t DW18_BTI_LCU32_PAKObjectCommandSurface : MOS_BITFIELD_RANGE( 0, 31); // Output data for PAK Engine input, each LCU has 4 DWs (16 bytes) PAK Object Command
uint32_t DW18_BTI_LCU64_SecEncCuRecordSurface : MOS_BITFIELD_RANGE( 0, 31);
};
// DWORD 19
union
{
uint32_t DW19_BTI_LCU32_PAKCURecordSurface : MOS_BITFIELD_RANGE( 0, 31); // Output CURecord data for PAK Engine input. Each LCU32x32 will output 512 bytes CU packet data.
uint32_t DW19_BTI_LCU64_PAKObjectCommandSurface : MOS_BITFIELD_RANGE( 0, 31); // Output data for PAK Engine input, each LCU has 4 DWs (16 bytes) PAK Object Command
};
// DWORD 20
union
{
uint32_t DW20_BTI_LCU32_VMEIntra_InterPredictionSurface : MOS_BITFIELD_RANGE( 0, 31); // Output CURecord data for PAK Engine input. Each LCU32x32 will output 512 bytes CU packet data.
uint32_t DW20_BTI_LCU64_PAKCURecordSurface : MOS_BITFIELD_RANGE( 0, 31); // Output CURecord data for PAK Engine input. Each LCU32x32 will output 512 bytes CU packet data.
};
// DWORD 21
union
{
uint32_t DW21_BTI_LCU32_CU16x16QpDataInputSurface : MOS_BITFIELD_RANGE( 0, 31); // Each 16x16 block has one byte QP data. Each LCU32 has 4 bytes. Used only when CuQpDeltaEnabledFlag=1
uint32_t DW21_BTI_LCU64_VMEIntra_InterPredictionSurface : MOS_BITFIELD_RANGE( 0, 31); // Output CURecord data for PAK Engine input. Each LCU32x32 will output 512 bytes CU packet data.
};
// DWORD 22
union
{
uint32_t DW22_BTI_LCU32_HEVCEncConstantTableSurface : MOS_BITFIELD_RANGE( 0, 31); // This surface contains all constants used by kernel. Data will be provided during kernel release.
uint32_t DW22_BTI_LCU64_CU16x16QpDataInputSurface : MOS_BITFIELD_RANGE( 0, 31); // Each 16x16 block has one byte QP data. Each LCU32 has 4 bytes. Used only when CuQpDeltaEnabledFlag=1
};
// DWORD 23
union
{
uint32_t DW23_BTI_LCU32_ColocatedCUMotionVectorDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Each CU 16x16 has 4 DWs ColMV data. Each LCU32 has 64 bytes. Used only when TemporalMvpEnableFlag=1, temporal predicton is enabled.
uint32_t DW23_BTI_LCU64_CU32_HEVCEncConstantTableSurface : MOS_BITFIELD_RANGE( 0, 31); // This surface contains all constants used by kernel. Data will be provided during kernel release.
};
// DWORD 24
union
{
uint32_t DW24_BTI_LCU32_HmeMotionPredictorDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Each 32x32 block has 1 pair of FWD MV and 1 pair of BDW MV, total 8 bytes of data.
uint32_t DW24_BTI_LCU64_ColocatedCUMotionVectorDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Each CU 16x16 has 4 DWs ColMV data. Each LCU32 has 64 bytes. Used only when TemporalMvpEnableFlag=1, temporal predicton is enabled.
};
// DWORD 25
union
{
uint32_t DW25_BTI_LCU32_LcuLevelDataInputSurface : MOS_BITFIELD_RANGE( 0, 31); // Each LCU block has one 32 bytes data, including SliceQP and slice astart/end address
uint32_t DW25_BTI_LCU64_HmeMotionPredictorDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Each 32x32 block has 1 pair of FWD MV and 1 pair of BDW MV, total 8 bytes of data.
};
// DWORD 26
union
{
uint32_t DW26_BTI_LCU32_LcuEncodingScratchSurface : MOS_BITFIELD_RANGE( 0, 31); // Each LCU32 block has about 9k byte scratch space to store temporary data
uint32_t DW26_BTI_LCU64_LcuLevelDataInputSurface : MOS_BITFIELD_RANGE( 0, 31); // Each LCU block has one 32 bytes data, including SliceQP and slice astart/end address
};
// DWORD 27
union
{
uint32_t DW27_BTI_LCU32_ConcurrentThreadGroupDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Concurrent Thread Group Data Surface **LCU32 kernel**
uint32_t DW27_BTI_LCU64_CU32_LcuEncodingScratchSurface : MOS_BITFIELD_RANGE( 0, 31); // Each LCU32 block has about 9k byte scratch space to store temporary data
};
// DWORD 28
union
{
uint32_t DW28_BTI_LCU32_BrcCombinedEncParameterSurface : MOS_BITFIELD_RANGE( 0, 31); // Brc Combined Enc Parameter Surface **LCU32 kernel**
uint32_t DW28_BTI_LCU64_64x64_DistortionSurface : MOS_BITFIELD_RANGE( 0, 31); // Each LCU64_CU32 block has about 9k byte scratch space to store temporary data **LCU64_CU32 kernel**
};
// DWORD 29
union
{
uint32_t DW29_BTI_LCU32_JobQueueScratchBufferSurface : MOS_BITFIELD_RANGE( 0, 31); // Surface for Multi-thread implementation **LCU32 kernel**
uint32_t DW29_BTI_LCU64_ConcurrentThreadGroupDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Concurrent Thread Group Data Surface **LCU64_CU32 kernel**
};
//DWORD 30
union
{
uint32_t DW30_BTI_LCU32_CuSplitDataSurface : MOS_BITFIELD_RANGE( 0, 31); // Reserved for debug kernel. Not available for released kernel. **LCU32 kernel**
uint32_t DW30_BTI_LCU64_BrcCombinedEncParameterSurface : MOS_BITFIELD_RANGE( 0, 31); // Brc Combined Enc Parameter Surface. **LCU64_CU32 kernel**
};
//DWORD 31
union
{
uint32_t DW31_BTI_LCU32_ResidualDataScratchSurface : MOS_BITFIELD_RANGE( 0, 31);
uint32_t DW31_BTI_LCU64_CU32_JobQueue1DBufferSurface : MOS_BITFIELD_RANGE( 0, 31); // Surface for Multi-thread implementation **LCU64_CU32 kernel**
};
//DWORD 32
union
{
uint32_t DW32_BTI_LCU32_DebugSurface : MOS_BITFIELD_RANGE( 0, 31);
uint32_t DW32_BTI_LCU64_CU32_JobQueue2DBufferSurface : MOS_BITFIELD_RANGE( 0, 31);
};
// DWORD 33
uint32_t DW33_BTI_LCU64_CU32_ResidualDataScratchSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 34
uint32_t DW34_BTI_LCU64_CuSplitSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 35
uint32_t DW35_BTI_LCU64_CurrentPictureY2xDS : MOS_BITFIELD_RANGE( 0, 31); // Source Pixel Y Downscaled by 2 Surface index
// DWORD 36
uint32_t DW36_BTI_LCU64_IntermediateCuRecordSurface : MOS_BITFIELD_RANGE( 0, 31); // Each 64x64 LCU will need 32x16x4 size 2D window
// DWORD 37
uint32_t DW37_BTI_Lcu64_ConstantDataLutSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 38
uint32_t DW38_BTI_LCU64_LcuDataStorageSurface : MOS_BITFIELD_RANGE( 0, 31);
// DWORD 39
uint32_t DW39_BTI_LCU64_VmeInterPredictionSurface2xDS : MOS_BITFIELD_RANGE( 0, 31); // This is current downscaled by 2 pixel surface accessed by the VME hardware
// DWORD 40
uint32_t DW40_BTI_LCU64_JobQueue1DBufferSurface : MOS_BITFIELD_RANGE( 0, 31); // Surface for Multi-thread implementation
// DWORD 41
uint32_t DW41_BTI_LCU64_JobQueue2DBufferSurface : MOS_BITFIELD_RANGE( 0, 31); // Surface for Multi-thread implementation
// DWORD 42
uint32_t DW42_BTI_LCU64_ResidualDataScratchSurface : MOS_BITFIELD_RANGE(0, 31);
// DWORD 43
uint32_t DW43_BTI_LCU64_DebugFeatureSurface : MOS_BITFIELD_RANGE( 0, 31);
};
C_ASSERT(MOS_BYTES_TO_DWORDS(sizeof(CODECHAL_ENC_HEVC_MBENC_B_CURBE_G10)) == 44);
const CODECHAL_ENC_HEVC_ME_CURBE_G10 CodechalEncHevcStateG10::m_meCurbeInit = {
0, //DW0_RoundedFrameWidthInMvUnitsfor4X
0, //DW0_RoundedFrameHeightInMvUnitsfor4X
0, //DW1_Reserved_0
0, //DW1_MvCostScaleFactor
0, //DW1_Reserved_1
0, //DW2_Reserved_0
0x3, //DW2_SubPelMode
1, //DW2_BmeDisableFbr
0, //DW2_Reserved_1
0x2, //DW2_InterSADMeasureAdjustment
0, //DW2_Reserved_2
0, //DW3_Reserved_0
1, //DW3_AdaptiveSearchEnable
0, //DW3_Reserved_1
0x1, //DW3_ImeRefWindowSize
0, //DW3_Reserved_2
0, //DW4_Reserved_0
0x1, //DW4_QuarterQuadTreeCandidate -- Coarseshape 16x16
0, //DW4_Reserved_1
32, //DW4_BidirectionalWeight
0, //DW4_Reserved_2
0x3F, //DW5_LenSP
0x3F, //DW5_MaxNumSU
0x4, //DW5_StartCenter0_X
0x4, //DW5_StartCenter0_Y
0, //DW5_Reserved_0
0, //DW6_Reserved_0
1, //DW6_SliceType
0, //DW6_HmeStage
0, //DW6_NumRefL0
0, //DW6_NumRefL1
0, //DW6_Reserved_1
0, //DW7_RoundedFrameWidthInMvUnitsFor16x
0, //DW7_RoundedFrameHeightInMvUnitsfor16X
0x0101F00F, //DW8_ImeSearchPath_0_3
0x0F0F1010, //DW9_ImeSearchPath_4_7
0xF0F0F00F, //DW10_ImeSearchPath_8_11
0x01010101, //DW11_ImeSearchPath_12_15
0x10101010, //DW12_ImeSearchPath_16_19
0x0F0F0F0F, //DW13_ImeSearchPath_20_23
0xF0F0F00F, //DW14_ImeSearchPath_24_27
0x0101F0F0, //DW15_ImeSearchPath_28_31
0x01010101, //DW16_ImeSearchPath_32_35
0x10101010, //DW17_ImeSearchPath_36_39
0x0F0F1010, //DW18_ImeSearchPath_40_43
0x0F0F0F0F, //DW19_ImeSearchPath_44_47
0xF0F0F00F, //DW20_ImeSearchPath_48_51
0xF0F0F0F0, //DW21_ImeSearchPath_52_55
0x01010101, //DW22_ImeSearchPath_56_59
0x01010101, //DW23_ImeSearchPath_60_63
0, //DW24_Reserved_0
1, //DW24_CodingUnitSize -- CoarseShape = 16x16
0, //DW24_Reserved_1
0, //DW24_CodingUnitPartitionMode
1, //DW24_CodingUnitPredictionMode
0, //DW24_Reserved_2
0, //DW25_FrameWidthInSamplesOfCurrentStage
0, //DW25_FrameHeightInSamplesOfCurrentStage
0, //DW26_Intra8x8ModeMask
0, //DW26_Reserved_0
0, //DW26_Intra16x16ModeMask
0, //DW26_Reserved_1
0, //DW27_Intra32x32ModeMask
0, //DW27_IntraChromaModeMask
1, //DW27_IntraComputeType
0, //DW27_Reserved_0
0, //DW28_Reserved_0
36, //DW28_PenaltyIntra32x32NonDC
12, //DW28_PenaltyIntra16x16NonDC
4, //DW28_PenaltyIntra8x8NonDC
0, //DW29_Mode0Cost
0, //DW29_Mode1Cost
0, //DW29_Mode2Cost
0, //DW29_Mode3Cost
13, //DW30_Mode4Cost
9, //DW30_Mode5Cost
13, //DW30_Mode6Cost
3, //DW30_Mode7Cost
9, //DW31_Mode8Cost
0, //DW31_Mode9Cost
0, //DW31_Reserved_0
0, //DW31_ChromaIntraModeCost
0, //DW32_Reserved_0
0x3F, //DW32_SicIntraNeighborAvailableFlag
0, //DW32_Reserved_1
0x2, //DW32_SicInterSadMeasure
0x2, //DW32_SicIntraSadMeasure
0, //DW32_Reserved_2
3, //DW33_SicLog2MinCuSize
0, //DW33_Reserved_0
0, //DW33_SicAcOnlyHaar
0, //DW33_Reserved_1
0, //DW33_SicHevcQuarterQuadtree
0, //DW33_Reserved_2
0xFFFF, //DW34_BTI_HmeOutputMvDataSurface
0xFFFF, //DW35_BTI_16xInputMvDataSurface
0xFFFF, //DW36_BTI_4xOutputDistortionSurface
0xFFFF, //DW37_BTI_VmeSurfaceIndex
0xFFFF, //DW38_BTI_4xDsSurface
0xFFFF, //DW39_BTI_BrcDistortionSurface
0
};
const CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G10 CodechalEncHevcStateG10::m_brcInitResetCurbeInit = {
0, //DW0_ProfileLevelMaxFrame
0, //DW1_InitBufFull
0, //DW2_BufSize
0, //DW3_TargetBitRate
0, //DW4_MaximumBitRate
0, //DW5_MinimumBitRate
0, //DW6_FrameRateM
1, //DW7_FrameRateD
0, //DW8_BRCFlag
0, //DW8_BRC_Param_A
0, //DW9_BRC_Param_B
0, //DW9_FrameWidth
0, //DW10_FrameHeight
0, //DW10_AVBRAccuracy
0, //DW11_AVBRConvergence
1, //DW11_MinimumQP
51, //DW12_MaximumQP
0, //DW12_NumberSlice
0, //DW13_Reserved_0
0, //DW13_BRC_Param_C
0, //DW14_BRC_Param_D
0, //DW14_MaxBRCLevel
0, //DW15_LongTermInterval
0, //DW15_Reserved_0
40, //DW16_InstantRateThreshold0_Pframe
60, //DW16_InstantRateThreshold1_Pframe
80, //DW16_InstantRateThreshold2_Pframe
120, //DW16_InstantRateThreshold3_Pframe
35, //DW17_InstantRateThreshold0_Bframe
60, //DW17_InstantRateThreshold1_Bframe
80, //DW17_InstantRateThreshold2_Bframe
120, //DW17_InstantRateThreshold3_Bframe
40, //DW18_InstantRateThreshold0_Iframe
60, //DW18_InstantRateThreshold1_Iframe
90, //DW18_InstantRateThreshold2_Iframe
115, //DW18_InstantRateThreshold3_Iframe
0, //DW19_DeviationThreshold0_PBframe
0, //DW19_DeviationThreshold1_PBframe
0, //DW19_DeviationThreshold2_PBframe
0, //DW19_DeviationThreshold3_PBframe
0, //DW20_DeviationThreshold4_PBframe
0, //DW20_DeviationThreshold5_PBframe
0, //DW20_DeviationThreshold6_PBframe
0, //DW20_DeviationThreshold7_PBframe
0, //DW21_DeviationThreshold0_VBRcontrol
0, //DW21_DeviationThreshold1_VBRcontrol
0, //DW21_DeviationThreshold2_VBRcontrol
0, //DW21_DeviationThreshold3_VBRcontrol
0, //DW22_DeviationThreshold4_VBRcontrol
0, //DW22_DeviationThreshold5_VBRcontrol
0, //DW22_DeviationThreshold6_VBRcontrol
0, //DW22_DeviationThreshold7_VBRcontrol
0, //DW23_DeviationThreshold0_Iframe
0, //DW23_DeviationThreshold1_Iframe
0, //DW23_DeviationThreshold2_Iframe
0, //DW23_DeviationThreshold3_Iframe
0, //DW24_DeviationThreshold4_Iframe
0, //DW24_DeviationThreshold5_Iframe
0, //DW24_DeviationThreshold6_Iframe
0, //DW24_DeviationThreshold7_Iframe
0, //DW25_ACQPBuffer
0, //DW25_IntraSADTransform
5, //DW25_Log2MaxCuSize
30, //DW25_SlidingWindowSize
0, //DW26_BGOPSize
0, //DW26_Reserved_0
0, //DW27_Reserved_0
0, //DW28_Reserved_0
0, //DW29_Reserved_0
0, //DW30_Reserved_0
0, //DW31_Reserved_0
};
const CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G10 CodechalEncHevcStateG10::m_brcUpdateCurbeInit = {
0, // DW0_TargetSize
0, // DW1_FrameNumber
0, // DW2_PictureHeaderSize
10, // DW3_StartGAdjFrame0
50, // DW3_StartGAdjFrame1
100, // DW4_StartGAdjFrame2
150, // DW4_StartGAdjFrame3
0, // DW5_TargetSize_Flag
0, // DW5_Reserved_0
4, // DW5_MaxNumPAKs
2, // DW5_CurrFrameBrcLevel
0, // DW6_NumSkippedFrames
0, // DW6_CqpValue
0, // DW6_ROIEnable
0, // DW6_BRCROIEnable
1, // DW6_LCUQPAverageEnable
0, // DW6_Reserved1
0, // DW6_SlidingWindowEnable
0, // DW6_Reserved2
0, // DW6_RoiRatio
0, // DW7_Reserved_0
1, // DW8_StartGlobalAdjustMult0
1, // DW8_StartGlobalAdjustMult1
3, // DW8_StartGlobalAdjustMult2
2, // DW8_StartGlobalAdjustMult3
1, // DW9_StartGlobalAdjustMult4
40, // DW9_StartGlobalAdjustDivd0
5, // DW9_StartGlobalAdjustDivd1
5, // DW9_StartGlobalAdjustDivd2
3, // DW10_StartGlobalAdjustDivd3
1, // DW10_StartGlobalAdjustDivd4
7, // DW10_QPThreshold0
18, // DW10_QPThreshold1
25, // DW11_QPThreshold2
37, // DW11_QPThreshold3
40, // DW11_gRateRatioThreshold0
75, // DW11_gRateRatioThreshold1
97, // DW12_gRateRatioThreshold2
103, // DW12_gRateRatioThreshold3
125, // DW12_gRateRatioThreshold4
160, // DW12_gRateRatioThreshold5
MOS_BITFIELD_VALUE((uint32_t)-3, 8), // DW13_gRateRatioThreshold6
MOS_BITFIELD_VALUE((uint32_t)-2, 8), // DW13_gRateRatioThreshold7
MOS_BITFIELD_VALUE((uint32_t)-1, 8), // DW13_gRateRatioThreshold8
0, // DW13_gRateRatioThreshold9
1, // DW14_gRateRatioThreshold10
2, // DW14_gRateRatioThreshold11
3, // DW14_gRateRatioThreshold12
4, // DW14_ParallelMode
0, // DW15_SizeOfSkippedFrames
};
const CODECHAL_ENC_HEVC_MBENC_I_CURBE_G10 CodechalEncHevcStateG10::m_mbencICurbeInit = {
0, //DW0_FrameWidthInSamples
0, //DW0_FrameHeightInSamples
0, //DW1_Reserved_0
36, //DW1_PenaltyForIntra32x32NonDCPredMode
12, //DW1_PenaltyForIntra16x16NonDCPredMode
4, //DW1_PenaltyForIntra8x8NonDCPredMode
0, //DW2_Reserved_0
2, //DW2_IntraSADMeasureAdj
0, //DW2_IntraPrediction
0, //DW2_Reserved_1
43, //DW3_ModeCost_0
60, //DW3_ModeCost_1
60, //DW3_ModeCost_2
60, //DW3_ModeCost_3
0, //DW4_ModeCost_4
0, //DW4_ModeCost_5
0, //DW4_ModeCost_6
0, //DW4_ModeCost_7
0, //DW5_ModeCost_8
0, //DW5_ModeCost_9
0, //DW5_RefIDCost
25, //DW5_ChromaIntraModeCost
5, //DW6_Log2MaxCUSize
3, //DW6_Log2MinCUSize
5, //DW6_Log2MaxTUSize
2, //DW6_Log2MinTUSize
1, //DW6_MaxTransformDepthIntra
1, //DW6_TuSplitFlag
0, //DW6_TuBasedCostSetting
0, //DW6_Reserved_0
0, //DW7_ConcurrentGroupNum .. 0 treated same as 1
1, //DW7_EncTuDecisionMode
0, //DW7_Reserved_0
27, //DW7_SliceQP
40960, //DW8_Lambda_Rd
3238, //DW9_Lambda_Md
0, //DW9_Reserved_0
534, //DW10_IntraTuDThres
2, //DW11_SliceType
0, //DW11_QPType
0, //DW11_CheckPcmModeFlag
0, //DW11_EnableIntra4x4PU
1, //DW11_EncQtDecisionMode
0, //DW11_Reserved_0
4700, //DW12_PCM_8x8_SAD_Threshold
0, //DW12_Reserved_0
0, //DW13_Reserved_0
0, //DW14_Reserved_0
0, //DW15_Reserved_0
0xFFFF, //DW16_BTI_VmeIntraPredictionSurface
0xFFFF, //DW17_BTI_CurrentPictureY
0xFFFF, //DW18_BTI_EncCuRecordSurface
0xFFFF, //DW19_BTI_PakObjectCommandSurface
0xFFFF, //DW20_BTI_CuPacketForPakSurface
0xFFFF, //DW21_BTI_InternalScratchSurface
0xFFFF, //DW22_BTI_CuBasedQpSurface
0xFFFF, //DW23_BTI_ConstantDataLutSurface
0xFFFF, //DW24_BTI_LcuLevelDataInputSurface
0xFFFF, //DW25_BTI_ConcurrentThreadGroupDataSurface
0xFFFF, //DW26_BTI_BrcCombinedEncParameterSurface
0xFFFF, //DW27_BTI_CuSplitSurface
0xFFFF, //DW28_BTI_DebugSurface
};
const CODECHAL_ENC_HEVC_MBENC_B_CURBE_G10 CodechalEncHevcStateG10::m_mbencBCurbeInit = {
0, //DW0_FrameWidthInSamples
0, //DW0_FrameHeightInSamples
5, //DW1_Log2MaxCUSize
3, //DW1_Log2MinCUSize
5, //DW1_Log2MaxTUSize
2, //DW1_Log2MinTUSize
0, //DW1_MaxTransformDepthInter
0, //DW1_MaxTransformDepthIntra
2, //DW1_Log2ParallelMergeLevel
6, //DW1_MaxNumIMESearchCenter
0, //DW2_TransquantBypassEnableFlag
0, //DW2_CuQpDeltaEnabledFlag
0, //DW2_PCMEnabledFlag
0, //DW2_EnableCu64Check
0, //DW2_EnableIntra4x4PU
0, //DW2_ChromaSkipCheck
0, //DW2_EncTransformSimplify
0, //DW2_HMEFlag
0, //DW2_HMECoarseShape
0, //DW2_HMESubPelMode
0, //DW2_SuperHME
0, //DW2_RegionsInSliceEnable
0, //DW2_EncTuDecisionMode
0, //DW2_EncTuDecisionForAllQt
0, //DW2_CoefBitEstMode
0, //DW2_EncSkipDecisionMode
0, //DW2_EncQtDecisionMode
0, //DW2_LCU32_EncRdDecisionModeForAllQt
0, //DW2_QpType
0, //DW2_LCU64_Cu64SkipCheckOnly
0, //DW2_SICDynamicRunPathMode
0, //DW2_Reserved_0
0, //DW3_ActiveNumChildThreads_CU64
0, //DW3_ActiveNumChildThreads_CU32_0
0, //DW3_ActiveNumChildThreads_CU32_1
0, //DW3_ActiveNumChildThreads_CU32_2
0, //DW3_ActiveNumChildThreads_CU32_3
0, //DW3_Reserved_0
0, //DW3_SliceQp
1, //DW4_SkipModeEn
1, //DW4_AdaptiveEn
0, //DW4_Reserved_0
0, //DW4_HEVCMinCUControl
0, //DW4_EarlyImeSuccessEn
0, //DW4_Reserved_1
0, //DW4_IMECostCentersSel
0, //DW4_RefPixelOffset
1, //DW4_IMERefWindowSize
0, //DW4_ResidualPredDatatypeCtrl
0, //DW4_ResidualPredInterChromaCtrl
0, //DW4_ResidualPred16x16SelCtrl
0, //DW4_Reserved_2
0, //DW4_EarlyImeStop
0x3, //DW5_SubPelMode
0, //DW5_Reserved_0
2, //DW5_InterSADMeasure
2, //DW5_IntraSADMeasure
63, //DW5_LenSP
63, //DW5_MaxNumSU
0, //DW5_IntraPredictionMask
1, //DW5_RefIDCostMode
0, //DW5_DisablePIntra
0, //DW5_TuBasedCostSetting
0, //DW6_Reserved_0
0, //DW7_SliceType
0, //DW7_TemporalMvpEnableFlag
0, //DW7_CollocatedFromL0Flag
0, //DW7_TheSameRefList
0, //DW7_IsLowDelay
0, //DW7_Reserved_0
4, //DW7_MaxNumMergeCand
0, //DW7_NumRefIdxL0
0, //DW7_NumRefIdxL1
0, //DW8_FwdPocNumber_L0_mTb_0
0, //DW8_BwdPocNumber_L1_mTb_0
0, //DW8_FwdPocNumber_L0_mTb_1
0, //DW8_BwdPocNumber_L1_mTb_1
0, //DW9_FwdPocNumber_L0_mTb_2
0, //DW9_BwdPocNumber_L1_mTb_2
0, //DW9_FwdPocNumber_L0_mTb_3
0, //DW9_BwdPocNumber_L1_mTb_3
0, //DW10_FwdPocNumber_L0_mTb_4
0, //DW10_BwdPocNumber_L1_mTb_4
0, //DW10_FwdPocNumber_L0_mTb_5
0, //DW10_BwdPocNumber_L1_mTb_5
0, //DW11_FwdPocNumber_L0_mTb_6
0, //DW11_BwdPocNumber_L1_mTb_6
0, //DW11_FwdPocNumber_L0_mTb_7
0, //DW11_BwdPocNumber_L1_mTb_7
0, //DW12_LongTermReferenceFlags_L0
0, //DW12_LongTermReferenceFlags_L1
0, //DW13_RefFrameVerticalSize
0, //DW13_RefFrameHorizontalSize
0, //DW14_KernelDebugDW
0, //DW15_ConcurrentGroupNum. 0 is treated same as 1.
8, //DW15_TotalThreadNumPerLCU
0, //DW15_NumRegions
0, //DW15_Reserved_0
0xFFFF, //DW16_BTI_CurrentPictureY
0xFFFF, //DW17_BTI_EncCuRecordSurface
{ 0xFFFF }, //DW18_BTI_PAKObjectCommandSurface
{ 0xFFFF }, //DW19_BTI_PAKCURecordSurface
{ 0xFFFF }, //DW20_BTI_VMEIntra_InterPredictionSurface
{ 0xFFFF }, //DW21_BTI_CU16x16QpDataInputSurface
{ 0xFFFF }, //DW22_BTI_LCU32_HEVCEncConstantTableSurface
{ 0xFFFF }, //DW23_BTI_ColocatedCUMotionVectorDataSurface
{ 0xFFFF }, //DW24_BTI_HmeMotionPredictorDataSurface
{ 0xFFFF }, //DW25_BTI_LcuLevelDataInputSurface
{ 0xFFFF }, //DW26_BTI_LCU32_LcuEncodingScratchSurface
{ 0xFFFF }, //DW27_BTI_LCU32_ConcurrentThreadGroupDataSurface / DW27_BTI_LCU64_64x64_DistortionSurface
{ 0xFFFF }, //DW28_BTI_LCU32_BrcCombinedEncParameterSurface / DW28_BTI_LCU64_ConcurrentThreadGroupDataSurface
{ 0xFFFF }, //DW29_BTI_LCU32_JobQueueScratchBufferSurface / DW29_BTI_LCU64_BrcCombinedEncParameterSurface
{ 0xFFFF }, //DW30_BTI_LCU32_CuSplitDataSurface / DW30_BTI_LCU64_CU32_JobQueueScratchBufferSurface
{ 0xFFFF }, //DW31_BTI_LCU32_ResidualDataScratchSurface
{ 0xFFFF }, //DW32_BTI_LCU32_DebugSurface / DW32_BIT_LCU64_CuSplitSurface
0xFFFF, //DW33_BTI_LCU64_CurrentPictureY2xDS
0xFFFF, //DW34_BTI_LCU64_IntermediateCuRecordSurface
0xFFFF, //DW35_BTI_Lcu64_ConstantDataLutSurface
0xFFFF, //DW36_BTI_LCU64_LcuDataStorageSurface
0xFFFF, //DW37_BTI_LCU64_VmeInterPredictionSurface2xDS
0xFFFF, //DW38_BTI_LCU64_JobQueueScratchBufferSurface
0xFFFF, //DW39_BTI_LCU64_ResidualDataScratchSurface
0xFFFF, //DW40_BTI_LCU64_DebugFeatureSurface
0xFFFF, //DW41
0xFFFF, //DW42
0xFFFF //DW43
};
//! \endcond
MOS_STATUS CodechalEncHevcStateG10::SetSequenceStructs()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::SetSequenceStructs());
m_cqpEnabled = (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP);
// threads per LCU
m_totalNumThreadsPerLcu = m_tuSettings[TotalThreadNumPerLCUTuParam][((m_hevcSeqParams->TargetUsage + 1) >> 2) % 3];
// Gen10 optimal wave-front no. and threads assigned wrt TU and Pic width for each LCU type
if (m_numRegionsInSlice > 1) //check if wf is enabled
{
if (m_isMaxLcu64)
{
m_numRegionsInSlice = (m_frameWidth >= 640) ? m_tuSettings[NumRegionLCU64][((m_hevcSeqParams->TargetUsage + 1) >> 2) % 3] : 2;
}
else
{
if (m_encode4KSequence)
{
m_totalNumThreadsPerLcu = m_tuSettings[TotalThreadNumPerLCUTuParamFor4KOnly][((m_hevcSeqParams->TargetUsage + 1) >> 2) % 3];
}
}
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::CalcScaledDimensions()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
// 2x Scaling WxH
m_downscaledWidth2x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameWidth);
m_downscaledHeight2x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameHeight);
// HME Scaling WxH
m_downscaledWidth4x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameWidth);
m_downscaledHeight4x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameHeight);
m_downscaledWidthInMb4x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x);
m_downscaledHeightInMb4x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight4x);
// SuperHME Scaling WxH
m_downscaledWidth16x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledWidth4x);
m_downscaledHeight16x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledHeight4x);
m_downscaledWidthInMb16x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x);
m_downscaledHeightInMb16x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight16x);
// UltraHME Scaling WxH
m_downscaledWidth32x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledWidth16x);
m_downscaledHeight32x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledHeight16x);
m_downscaledWidthInMb32x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x);
m_downscaledHeightInMb32x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight32x);
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SetupBrcConstantTable(PMOS_SURFACE brcConstantData)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(brcConstantData);
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(m_osInterface, &brcConstantData->OsResource, &lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
uint32_t size = brcConstantData->dwHeight * brcConstantData->dwWidth;
MOS_SecureMemcpy(data, size, g_cInit_HEVC_BRC_QP_ADJUST, sizeof(g_cInit_HEVC_BRC_QP_ADJUST));
data += sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
size -= sizeof(g_cInit_HEVC_BRC_QP_ADJUST);
//lambda and mode cost
if (m_isMaxLcu64)
{
MOS_SecureMemcpy(data, size, m_brcLcu64x64LambdaModeCost, sizeof(m_brcLcu64x64LambdaModeCost));
}
else
{
MOS_SecureMemcpy(data, size, m_brcLcu32x32LambdaModeCost, sizeof(m_brcLcu32x32LambdaModeCost));
}
const uint32_t sizeLambaModeCostTable = m_brcLambdaModeCostTableSize;
data += sizeLambaModeCostTable;
size -= sizeLambaModeCostTable;
m_osInterface->pfnUnlockResource(m_osInterface, &brcConstantData->OsResource);
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::AllocateEncResourcesLCU64()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
uint32_t width = 0, height = 0, size = 0;
// Surfaces used only by LCU64 B-kernel
// Intermediate CU Record Surface for LCU64
if (Mos_ResourceIsNull(&m_intermediateCuRecordSurfaceLcu64B.OsResource))
{
width = m_widthAlignedMaxLcu;
height = m_heightAlignedMaxLcu >> 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_intermediateCuRecordSurfaceLcu64B,
width,
height,
"Intermediate CU record Surface For Lcu64 B-kernel"));
}
// Scratch Surface for Internal use
if( Mos_ResourceIsNull(&m_lcuEncodingScratchSurfaceLcu64B.sResource))
{
size = 13312 * ((m_widthAlignedMaxLcu >> 6) << 1) * ((m_heightAlignedMaxLcu >> 6) << 1);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_lcuEncodingScratchSurfaceLcu64B,
size,
"Lcu 64 B Encoding Scratch Surface"));
}
// Enc constant table for B LCU64
if( Mos_ResourceIsNull(&m_encConstantTableForLcu64B.sResource))
{
size = m_encBConstantDataLutLcu64Size;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_encConstantTableForLcu64B,
size,
"Enc Constant Table Surface For B LCU64"));
// Initialize the Enc Constant Table Surface
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&m_encConstantTableForLcu64B.sResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
MOS_SecureMemcpy(data, m_encBConstantDataLutLcu64Size, (const void*) m_encBConstantDataLutLcu64, m_encBConstantDataLutLcu64Size);
m_osInterface->pfnUnlockResource(
m_osInterface,
&m_encConstantTableForLcu64B.sResource);
}
// Job Queue Scratch Surface for multi-threading for LCU64 B-kernel
// free the JobQ surface allocated for LCU32
m_osInterface->pfnFreeResource(
m_osInterface,
&m_jobQueueHeaderSurfaceForB.sResource);
size = (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5) * 32;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_jobQueueHeaderSurfaceForB,
size,
"Job Queue Header Surface for multi-thread LCU64 B"));
// Job Queue Data Surface for LCU64 CU32
if (Mos_ResourceIsNull(&m_jobQueueDataSurfaceForBLcu64Cu32.OsResource))
{
width = (m_widthAlignedMaxLcu >> 5) * 32;
height = (m_heightAlignedMaxLcu >> 5) * 58;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_jobQueueDataSurfaceForBLcu64Cu32,
width,
height,
"Job Queue Data Surface for LCU64 CU32"));
}
// Job Queue Data Surface for LCU64
if (Mos_ResourceIsNull(&m_jobQueueDataSurfaceForBLcu64.OsResource))
{
width = (m_widthAlignedMaxLcu >> 6) * 32;
height = (m_heightAlignedMaxLcu >> 6) * 66;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_jobQueueDataSurfaceForBLcu64,
width,
height,
"Job Queue Data Surface for LCU64"));
}
// Residual Data Scratch Surface LCU64
if(Mos_ResourceIsNull(&m_residualDataScratchSurfaceForBLcu64.OsResource))
{
width = (m_widthAlignedLcu32 << 1);
height = (m_heightAlignedLcu32 << 2);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_residualDataScratchSurfaceForBLcu64,
width,
height,
"Residual Data Scratch Surface"));
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::AllocateMeResources()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if(!m_encEnabled || !m_hmeSupported)
{
return eStatus;
}
uint32_t width = 0, height = 0;
if (Mos_ResourceIsNull(&m_s4XMeMvDataBuffer.OsResource))
{
//each 8x8 block has a MV hence div by 8, 2 for interleaved L0 and L1, 4 bytes per MV, 4 maximum number of ref frame for both FWD and BWD references possible
width = (m_downscaledWidth4x >> 3) * 2 * 4 * 4;
height = (m_downscaledHeight4x >> 3);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_s4XMeMvDataBuffer,
width,
height,
"4xME MV Data Buffer"));
}
if (Mos_ResourceIsNull(&m_s4XMeDistortionBuffer.OsResource))
{
width = (m_downscaledWidth4x >> 3) * 4 * 2; //4 for interleaved L0,L1,Intra distortion and padding, 2 bytes of data
height = (m_downscaledHeight4x >> 3) * 4; // maximum number of ref frame for both FWD and BWD references possible
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_s4XMeDistortionBuffer,
width,
height,
"4xME Distortion Buffer"));
}
if (m_16xMeSupported)
{
if (Mos_ResourceIsNull(&m_s16XMeMvDataBuffer.OsResource))
{
//each 8x8 block has a MV hence div by 8, 2 for interleaved L0 and L1, 4 bytes per MV, 4 maximum number of ref frame for both FWD and BWD references possible
width = (m_downscaledWidth16x >> 3) * 2 * 4 * 4;
height = (m_downscaledHeight16x >> 3);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_s16XMeMvDataBuffer,
width,
height,
"16xME MV Data Buffer"));
}
}
// Mv and Distortion Summation Surface
if( Mos_ResourceIsNull(&m_mvAndDistortionSumSurface.sResource))
{
uint32_t size = 32;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_mvAndDistortionSumSurface,
size,
"Mv and Distortion Summation Surface"));
}
// BRC Distortion Surface
if (Mos_ResourceIsNull(&m_brcBuffers.sMeBrcDistortionBuffer.OsResource))
{
uint32_t width = MOS_ALIGN_CEIL((m_picWidthInMb << 3), 64);
uint32_t height = MOS_ALIGN_CEIL((m_picHeightInMb << 2), 8);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_brcBuffers.sMeBrcDistortionBuffer,
width,
height,
"Brc Distortion Surface Buffer"));
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::AllocateEncResources()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
uint32_t width = 0, height = 0, size = 0;
// Surfaces used by I and B-kernels
// Intermediate CU Record Surface
// LCU64 aligned allocation. While setting up surface state in some cases the width and height will be set LCU32 aligned.
if (Mos_ResourceIsNull(&m_intermediateCuRecordSurfaceLcu32.OsResource))
{
width = m_widthAlignedMaxLcu;
height = m_heightAlignedMaxLcu >> 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_intermediateCuRecordSurfaceLcu32,
width,
height,
"Intermediate CU record Surface"));
}
// LCU Level Input Data
if (Mos_ResourceIsNull(&m_lcuLevelInputData.sResource))
{
size = 16 * ((m_widthAlignedMaxLcu >> 6) << 1) * ((m_heightAlignedMaxLcu >> 6) << 1);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_lcuLevelInputData,
size,
"Lcu Level Data Input Surface"));
}
// Concurrent Thread Group Data Surface
if (Mos_ResourceIsNull(&m_concurrentThreadGroupData.sResource))
{
// Maximum of 16 Thread Groups, Each Thread Group has 64 bytes of data
size = CODECHAL_MEDIA_WALKER_MAX_COLORS * sizeof(CODECHAL_ENC_HEVC_CONCURRENT_THREAD_GROUP_DATA_G10);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_concurrentThreadGroupData,
size,
"Concurrent Thread Group Data Input Surface"));
}
// Cu Split Surface
if (Mos_ResourceIsNull(&m_cuSplitSurface.OsResource))
{
width = (m_widthAlignedMaxLcu >> 4); // ((W + 63)/64)*4
height = (m_heightAlignedMaxLcu >> 4); // ((H + 63)/64)*4
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_cuSplitSurface,
width,
height,
"Cu Split Surface"));
}
// Kernel Debug Surface for B-kernel
if (Mos_ResourceIsNull(&m_kernelDebug.sResource))
{
size = CODECHAL_PAGE_SIZE; // 4K bytes for the debug surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_kernelDebug,
size,
"Kernel 1D Debug Surface"));
}
m_allocator->AllocateResource(m_standard, m_brcCombinedEncBufferSize, 1, brcInputForEncKernel, true);
// Surfaces used by I-kernel
// Enc Constant Table for I
if (Mos_ResourceIsNull(&m_encConstantTableForI.sResource))
{
size = m_encIConstantDataLutSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_encConstantTableForI,
size,
"Enc Constant Table Surface For I"));
// Initialize the Enc Constant Table Surface
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&m_encConstantTableForI.sResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
MOS_SecureMemcpy(data, m_encIConstantDataLutSize, (const void*) m_encIConstantDataLut, m_encIConstantDataLutSize);
m_osInterface->pfnUnlockResource(
m_osInterface,
&m_encConstantTableForI.sResource);
}
// Scartch Surface for I-kernel
if (Mos_ResourceIsNull(&m_scratchSurface.OsResource))
{
width = m_widthAlignedLcu32 >> 3;
height = m_heightAlignedLcu32 >> 5;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_scratchSurface,
width,
height,
"Scratch Surface for I Kernel"));
}
// Surfaces used by B-kernel
// Second Intermediate CU Record Surface
if (Mos_ResourceIsNull(&m_secondIntermediateCuRecordSurfaceLcu32.OsResource))
{
width = m_widthAlignedMaxLcu;
height = m_heightAlignedMaxLcu >> 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_secondIntermediateCuRecordSurfaceLcu32,
width,
height,
"Second Intermediate CU record Surface"));
}
// Scratch Surface for Internal use
if (Mos_ResourceIsNull(&m_lcuEncodingScratchSurface.sResource))
{
size = 13312 * ((m_widthAlignedMaxLcu >> 6) << 1) * ((m_heightAlignedMaxLcu >> 6) << 1);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_lcuEncodingScratchSurface,
size,
"Lcu Encoding Scratch Surface"));
}
// Distortion surface for 64x64
if (Mos_ResourceIsNull(&m_64x64DistortionSurface.sResource))
{
size = (m_widthAlignedMaxLcu >> 6) * (m_heightAlignedMaxLcu >> 6) * 32;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_64x64DistortionSurface,
size,
"Distortion surface for 64x64"));
}
// Enc constant table for B LCU32
if (Mos_ResourceIsNull(&m_encConstantTableForB.sResource))
{
size = m_encBConstantDataLutSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_encConstantTableForB,
size,
"Enc Constant Table Surface For B LCU32"));
// Initialize the Enc Constant Table Surface
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
uint8_t* data = (uint8_t*)m_osInterface->pfnLockResource(
m_osInterface,
&m_encConstantTableForB.sResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(data);
MOS_SecureMemcpy(data, m_encBConstantDataLutSize, (const void*) m_encBConstantDataLut, m_encBConstantDataLutSize);
m_osInterface->pfnUnlockResource(
m_osInterface,
&m_encConstantTableForB.sResource);
}
// Job Queue Scratch Surface for multi-threading for LCU32 B-kernel
if( Mos_ResourceIsNull(&m_jobQueueHeaderSurfaceForB.sResource) )
{
size = (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5) * m_jobQueueSizeFor32x32Block;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_jobQueueHeaderSurfaceForB,
size,
"Job Queue Header Surface for multi-thread LCU32 B"));
}
// Job Queue Header Surface for multi-threading for LCU64 B-kernel
if (Mos_ResourceIsNull(&m_jobQueueHeaderSurfaceForBLcu64.sResource))
{
size = (m_widthAlignedMaxLcu >> 5) * (m_heightAlignedMaxLcu >> 5) * 32;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer(
&m_jobQueueHeaderSurfaceForBLcu64,
size,
"Job Queue Header Surface for multi-thread LCU64 B"));
}
// Residual Data Scratch Surface LCU32
if (Mos_ResourceIsNull(&m_residualDataScratchSurfaceForBLcu32.OsResource))
{
width = (m_widthAlignedLcu32 << 1);
height = (m_heightAlignedLcu32 << 2);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_residualDataScratchSurfaceForBLcu32,
width,
height,
"Residual Data Scratch Surface"));
}
// MB statistics surface
if (Mos_ResourceIsNull(&m_mbStatisticsSurface.OsResource))
{
width = MOS_ALIGN_CEIL(m_picWidthInMb * 4, 64);
height = 2 * MOS_ALIGN_CEIL(m_picHeightInMb, 8);
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_mbStatisticsSurface,
width,
height,
"MB stats surface"));
}
// For 10 bit HEVC support
if (m_is10BitHevc)
{
//Output surface for format conversion from 10bit to 8 bit
for(auto i = 0 ; i < NUM_FORMAT_CONV_FRAMES ; i++)
{
if (Mos_ResourceIsNull(&m_formatConvertedSurface[i].OsResource))
{
width = m_frameWidth;
height = m_frameHeight;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateSurface(
&m_formatConvertedSurface[i],
width,
height,
"Format Converted Surface"));
}
}
}
// MB split surface
if(Mos_ResourceIsNull(&m_mbSplitSurface.OsResource))
{
width = m_widthAlignedMaxLcu >> 2;
height = m_heightAlignedMaxLcu >> 4;
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateBuffer2D(
&m_mbSplitSurface,
width,
height,
"MB split surface"));
}
if (m_hmeSupported)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateMeResources());
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::DestroyMeResources()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
m_osInterface->pfnFreeResource(
m_osInterface,
&m_s16XMeMvDataBuffer.OsResource);
m_osInterface->pfnFreeResource(
m_osInterface,
&m_s4XMeDistortionBuffer.OsResource);
m_osInterface->pfnFreeResource(
m_osInterface,
&m_s4XMeMvDataBuffer.OsResource);
m_osInterface->pfnFreeResource(
m_osInterface,
&m_mvAndDistortionSumSurface.sResource);
m_osInterface->pfnFreeResource(
m_osInterface,
&m_brcBuffers.sMeBrcDistortionBuffer.OsResource);
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::FreeEncResources()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
MOS_Delete(m_scalingAndConversionKernelState);
m_scalingAndConversionKernelState = nullptr;
MOS_FreeMemory(m_scalingAndConversionKernelBindingTable);
m_scalingAndConversionKernelBindingTable = nullptr;
MOS_Delete(m_meKernelState);
m_meKernelState = nullptr;
MOS_FreeMemory(m_meKernelBindingTable);
m_meKernelBindingTable = nullptr;
MOS_DeleteArray(m_brcKernelStates);
m_brcKernelStates = nullptr;
MOS_FreeMemory(m_brcKernelBindingTable);
m_brcKernelBindingTable = nullptr;
MOS_DeleteArray(m_mbEncKernelStates);
m_mbEncKernelStates = nullptr;
MOS_FreeMemory(m_mbEncKernelBindingTable);
m_mbEncKernelBindingTable = nullptr;
// Surfaces used by I and B-kernels
// Release Intermediate CU Record Surface
m_osInterface->pfnFreeResource(
m_osInterface,
&m_intermediateCuRecordSurfaceLcu32.OsResource);
// Release LCU Level Input Data
m_osInterface->pfnFreeResource(
m_osInterface,
&m_lcuLevelInputData.sResource);
// Release Concurrent Thread Group Input Data
m_osInterface->pfnFreeResource(
m_osInterface,
&m_concurrentThreadGroupData.sResource);
// Release CU split surface
m_osInterface->pfnFreeResource(
m_osInterface,
&m_cuSplitSurface.OsResource);
// Release Kernel Debug Surface
m_osInterface->pfnFreeResource(
m_osInterface,
&m_kernelDebug.sResource);
// Surfaces used by I-kernel
// Release Enc Constant Table for I
m_osInterface->pfnFreeResource(
m_osInterface,
&m_encConstantTableForI.sResource);
// Release Scartch Surface for I-kernel
m_osInterface->pfnFreeResource(
m_osInterface,
&m_scratchSurface.OsResource);
// Surfaces used by B-kernel
// Release Second Intermediate CU Record Surface
m_osInterface->pfnFreeResource(
m_osInterface,
&m_secondIntermediateCuRecordSurfaceLcu32.OsResource);
// Release Intermediate CU Record Surface for Lcu64 B-kernel
m_osInterface->pfnFreeResource(
m_osInterface,
&m_intermediateCuRecordSurfaceLcu64B.OsResource);
// Release Scratch Surface for Internal use
m_osInterface->pfnFreeResource(
m_osInterface,
&m_lcuEncodingScratchSurface.sResource);
// Release Scratch Surface for Internal use Lcu64B
m_osInterface->pfnFreeResource(
m_osInterface,
&m_lcuEncodingScratchSurfaceLcu64B.sResource);
// Release Enc constant table for B LCU64
m_osInterface->pfnFreeResource(
m_osInterface,
&m_encConstantTableForLcu64B.sResource);
// Release Distortion surface for 64x64
m_osInterface->pfnFreeResource(
m_osInterface,
&m_64x64DistortionSurface.sResource);
// Release Enc constant table for B LCU32
m_osInterface->pfnFreeResource(
m_osInterface,
&m_encConstantTableForB.sResource);
// Release Job Queue Header Surface for multi-threading for LCU 32 B-kernel
m_osInterface->pfnFreeResource(
m_osInterface,
&m_jobQueueHeaderSurfaceForB.sResource);
// Release Job Queue Header Surface for multi-threading for LCU 64 B-kernel
m_osInterface->pfnFreeResource(
m_osInterface,
&m_jobQueueHeaderSurfaceForBLcu64.sResource);
// Release Job Queue Data Surface for LCU64 CU32
m_osInterface->pfnFreeResource(
m_osInterface,
&m_jobQueueDataSurfaceForBLcu64Cu32.OsResource);
// Release Job Queue Data Surface for LCU64
m_osInterface->pfnFreeResource(
m_osInterface,
&m_jobQueueDataSurfaceForBLcu64.OsResource);
// Release Residual Data Scratch Surface for LCU 32 B-kernel
m_osInterface->pfnFreeResource(
m_osInterface,
&m_residualDataScratchSurfaceForBLcu32.OsResource);
// Release Residual Data Scratch Surface for LCU 64 B-kernel
m_osInterface->pfnFreeResource(
m_osInterface,
&m_residualDataScratchSurfaceForBLcu64.OsResource);
// Release Output surfaces for format conversion from 10bit to 8 bit
for(auto i = 0 ; i < NUM_FORMAT_CONV_FRAMES; i++)
{
m_osInterface->pfnFreeResource(
m_osInterface,
&m_formatConvertedSurface[i].OsResource);
}
// Release Mb Statistics Surface
m_osInterface->pfnFreeResource(
m_osInterface,
&m_mbStatisticsSurface.OsResource);
// Release Mb Split Surface
m_osInterface->pfnFreeResource(
m_osInterface,
&m_mbSplitSurface.OsResource);
// Release Frame Statistics Streamout Data Destination Buffer
m_osInterface->pfnFreeResource(
m_osInterface,
&m_resFrameStatStreamOutBuffer);
// Release SSE Source Pixel Row Store Buffer
m_osInterface->pfnFreeResource(
m_osInterface,
&m_resSseSrcPixelRowStoreBuffer);
CODECHAL_ENCODE_CHK_STATUS_RETURN(DestroyMeResources());
#if (_DEBUG || _RELEASE_INTERNAL)
if (m_swBrcMode != nullptr)
{
m_osInterface->pfnFreeLibrary(m_swBrcMode);
m_swBrcMode = nullptr;
}
#endif // (_DEBUG || _RELEASE_INTERNAL)
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::AllocatePakResources()
{
MOS_STATUS eStatus = CodechalEncHevcState::AllocatePakResources();
if (eStatus != MOS_STATUS_SUCCESS)
{
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate PAK resources");
return eStatus;
}
MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
allocParamsForBufferLinear.Format = Format_Buffer;
// Allocate Frame Statistics Streamout Data Destination Buffer = HEVC Frame Statistics
uint32_t size = m_sizeOfHcpPakFrameStats;
allocParamsForBufferLinear.dwBytes = size;
allocParamsForBufferLinear.pBufName = "FrameStatStreamOutBuffer";
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_resFrameStatStreamOutBuffer);
if (eStatus != MOS_STATUS_SUCCESS)
{
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate FrameStat StreamOut Buffer.");
return eStatus;
}
// Allocate SSE Source Pixel Row Store Buffer
size = ((m_widthAlignedMaxLcu + 2) * 64) * (4 + 4);
size <<= 1;
MOS_ALIGN_CEIL(size, CODECHAL_CACHELINE_SIZE);
allocParamsForBufferLinear.dwBytes = size;
allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
eStatus = (MOS_STATUS)m_osInterface->pfnAllocateResource(
m_osInterface,
&allocParamsForBufferLinear,
&m_resSseSrcPixelRowStoreBuffer);
if (eStatus != MOS_STATUS_SUCCESS)
{
CODECHAL_ENCODE_ASSERTMESSAGE("Failed to allocate SseSrcPixelRowStore Buffer.");
return eStatus;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::FreePakResources()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::FreePakResources());
m_osInterface->pfnFreeResource(
m_osInterface,
&m_resFrameStatStreamOutBuffer);
m_osInterface->pfnFreeResource(
m_osInterface,
&m_resSseSrcPixelRowStoreBuffer);
return eStatus;
}
void CodechalEncHevcStateG10::LoadCosts(uint8_t sliceType, uint8_t qp, uint16_t *lambdaMd, uint32_t *lambdaRd, uint32_t *tuSadThreshold)
{
if (sliceType >= CODECHAL_ENCODE_HEVC_NUM_SLICE_TYPES)
{
CODECHAL_ENCODE_NORMALMESSAGE("Invalid slice type");
sliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
}
int32_t qpMinus12 = qp - 12;
double qpScale = (sliceType == CODECHAL_ENCODE_HEVC_I_SLICE) ? 5.0 : 0.55;
double lambda = sqrt(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0));
double costScale = (sliceType == CODECHAL_ENCODE_HEVC_B_SLICE) ? 2.0 : 1.0;
*lambdaMd = (uint16_t)(lambda * 256 + 0.5);
*lambdaRd = (uint32_t)(qpScale * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0) * 256 + 0.5);
*tuSadThreshold = (uint32_t)(sqrt(0.85 * pow(2.0, MOS_MAX(0, qpMinus12) / 3.0)) * 0.4 * 256 + 0.5);
uint8_t lcuIdx = ((m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3) == 6) ? 1 : 0;
double interWeighingFactor = costScale * lambda;
double intraWeighingFactor = interWeighingFactor * m_lambdaScaling[sliceType][qp];
m_modeCost[LUTMODE_INTRA_NONPRED] = Map44LutValue((uint32_t)(intraWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTRA_NONPRED]), 0x6f);
m_modeCost[LUTMODE_INTRA_32x32] = Map44LutValue((uint32_t)(intraWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTRA_32x32]), 0x8f);
m_modeCost[LUTMODE_INTRA_16x16] = Map44LutValue((uint32_t)(intraWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTRA_16x16]), 0x8f);
m_modeCost[LUTMODE_INTRA_8x8] = Map44LutValue((uint32_t)(intraWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTRA_8x8]), 0x8f);
m_modeCost[LUTMODE_INTRA_CHROMA] = Map44LutValue((uint32_t)(intraWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTRA_CHROMA]), 0x6f);
m_modeCost[LUTMODE_INTER_32x32] = Map44LutValue((uint32_t)(interWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTER_32x32]), 0x8f);
m_modeCost[LUTMODE_INTER_32x16] = Map44LutValue((uint32_t)(interWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTER_32x16]), 0x8f);
m_modeCost[LUTMODE_INTER_16x16] = Map44LutValue((uint32_t)(interWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTER_16x16]), 0x6f);
m_modeCost[LUTMODE_INTER_16x8] = Map44LutValue((uint32_t)(interWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTER_16x8]), 0x6f);
m_modeCost[LUTMODE_INTER_8x8] = Map44LutValue((uint32_t)(0.45 * m_modeBits[lcuIdx][sliceType][LUTMODE_INTER_8x8]), 0x6f);
m_modeCost[LUTMODE_INTER_BIDIR] = Map44LutValue((uint32_t)(interWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_INTER_BIDIR]), 0x6f);
if (!m_hevcSliceParams->num_ref_idx_l0_active_minus1)
{
m_modeCost[LUTMODE_REF_ID] = Map44LutValue((uint32_t)(interWeighingFactor * m_modeBits[lcuIdx][sliceType][LUTMODE_REF_ID]), 0x6f);
}
return;
}
// ------------------------------------------------------------------------------
//| Purpose: Setup curbe for HEVC ME Kernels
//| Return: N/A
//------------------------------------------------------------------------------
MOS_STATUS CodechalEncHevcStateG10::SetCurbeMe(
HmeLevel hmeLevel,
HEVC_ME_DIST_TYPE distType)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(m_meKernelState);
// Initialize the CURBE data
CODECHAL_ENC_HEVC_ME_CURBE_G10 curbe = m_meCurbeInit;
//Mostly using defaults as of now.
curbe.DW0_RoundedFrameWidthInMvUnitsfor4X = m_downscaledWidth4x >> 3;
curbe.DW0_RoundedFrameHeightInMvUnitsfor4X = m_downscaledHeight4x >> 3;
curbe.DW3_ImeRefWindowSize = IME_REF_WINDOW_MODE_BIG;
curbe.DW5_StartCenter0_X = ((m_imeRefWindowSize[curbe.DW3_ImeRefWindowSize][0] - 32) >> 3) & 0xF;
curbe.DW5_StartCenter0_Y = ((m_imeRefWindowSize[curbe.DW3_ImeRefWindowSize][1] - 32) >> 3) & 0xF;
curbe.DW6_SliceType = (distType == HEVC_ME_DIST_TYPE_INTER_BRC_DIST) ? 1 : 0;
curbe.DW6_HmeStage = (distType != HEVC_ME_DIST_TYPE_INTER_BRC_DIST) ?
HME_STAGE_4x_NO_16x :
((hmeLevel == HME_LEVEL_4x) ?
(m_16xMeSupported ? HME_STAGE_4x_AFTER_16x : HME_STAGE_4x_NO_16x) :
HME_STAGE_16x);
curbe.DW6_NumRefL0 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
curbe.DW6_NumRefL1 = !m_lowDelay ? (m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1) : 0;
curbe.DW7_RoundedFrameWidthInMvUnitsFor16x = m_downscaledWidth16x >> 3;
curbe.DW7_RoundedFrameHeightInMvUnitsfor16X = m_downscaledHeight16x >> 3;
curbe.DW25_FrameWidthInSamplesOfCurrentStage = (hmeLevel == HME_LEVEL_4x) ? (m_frameWidth >> 2) : (m_frameWidth >> 4);
curbe.DW25_FrameHeightInSamplesOfCurrentStage = (hmeLevel == HME_LEVEL_4x) ? (m_frameHeight >> 2) : (m_frameHeight >> 4);
curbe.DW33_SicLog2MinCuSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
curbe.DW34_BTI_HmeOutputMvDataSurface = HME_OUTPUT_MV_DATA;
curbe.DW35_BTI_16xInputMvDataSurface = HME_16xINPUT_MV_DATA;
curbe.DW36_BTI_4xOutputDistortionSurface = HME_4xOUTPUT_DISTORTION;
curbe.DW37_BTI_VmeSurfaceIndex = HME_VME_PRED_CURR_PIC_IDX0;
curbe.DW38_BTI_4xDsSurface = HME_4xDS_INPUT;
curbe.DW39_BTI_BrcDistortionSurface = HME_BRC_DISTORTION;
curbe.DW40_BTI_Mv_And_Distortion_SumSurface = HME_MV_AND_DISTORTION_SUM;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_meKernelState->m_dshRegion.AddData(
&curbe,
m_meKernelState->dwCurbeOffset,
sizeof(curbe)));
return eStatus;
}
// ------------------------------------------------------------------------------
//| Purpose: Setup curbe for HEVC BrcInitReset Kernel
//| Return: N/A
//------------------------------------------------------------------------------
MOS_STATUS CodechalEncHevcStateG10::SetCurbeBrcInitReset(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_ASSERT(brcKrnIdx == CODECHAL_HEVC_BRC_INIT || brcKrnIdx == CODECHAL_HEVC_BRC_RESET);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
// Initialize the CURBE data
CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G10 curbe = m_brcInitResetCurbeInit;
if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR ||
m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR ||
m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
{
if (m_hevcSeqParams->InitVBVBufferFullnessInBit == 0)
{
CODECHAL_ENCODE_ASSERT(false);
}
if (m_hevcSeqParams->VBVBufferSizeInBit == 0)
{
CODECHAL_ENCODE_ASSERT(false);
}
}
curbe.DW0_ProfileLevelMaxFrame = GetProfileLevelMaxFrameSize();
curbe.DW1_InitBufFull = m_hevcSeqParams->InitVBVBufferFullnessInBit;
curbe.DW2_BufSize = m_hevcSeqParams->VBVBufferSizeInBit;
curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
curbe.DW4_MaximumBitRate = m_hevcSeqParams->MaxBitRate * CODECHAL_ENCODE_BRC_KBPS;
curbe.DW5_MinimumBitRate = 0;
curbe.DW6_FrameRateM = m_hevcSeqParams->FrameRate.Numerator;
curbe.DW7_FrameRateD = m_hevcSeqParams->FrameRate.Denominator;
curbe.DW8_BRCFlag = 0;
curbe.DW8_BRCFlag = (m_lcuBrcEnabled) ? 0 : BRCINIT_DISABLE_MBBRC;
curbe.DW25_ACQPBuffer = 1;
curbe.DW25_Log2MaxCuSize = (m_isMaxLcu64) ? 6 : 5;
curbe.DW25_SlidingWindowSize = m_slidingWindowSize;
curbe.DW8_BRCFlag |= BRCINIT_IGNORE_PICTURE_HEADER_SIZE;
if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CBR)
{
curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
curbe.DW8_BRCFlag |= BRCINIT_ISCBR;
}
else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VBR)
{
if (curbe.DW4_MaximumBitRate < curbe.DW3_TargetBitRate)
{
curbe.DW4_MaximumBitRate = 2 * curbe.DW3_TargetBitRate;
}
curbe.DW8_BRCFlag |= BRCINIT_ISVBR;
}
else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
{
curbe.DW8_BRCFlag |= BRCINIT_ISAVBR;
// For AVBR, max bitrate = target bitrate,
curbe.DW3_TargetBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
curbe.DW4_MaximumBitRate = m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
}
else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_ICQ)
{
curbe.DW8_BRCFlag |= BRCINIT_ISICQ;
curbe.DW25_ACQPBuffer = m_hevcSeqParams->ICQQualityFactor;
}
else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_VCM)
{
curbe.DW4_MaximumBitRate = curbe.DW3_TargetBitRate;
curbe.DW8_BRCFlag |= BRCINIT_ISVCM;
}
else if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
{
curbe.DW8_BRCFlag = BRCINIT_ISCQP;
}
curbe.DW9_FrameWidth = m_oriFrameWidth;
curbe.DW10_FrameHeight = m_oriFrameHeight;
curbe.DW10_AVBRAccuracy = m_usAvbrAccuracy;
curbe.DW11_AVBRConvergence = m_usAvbrConvergence;
/**********************************************************************
In case of non-HB/BPyramid Structure
BRC_Param_A = GopP
BRC_Param_B = GopB
In case of HB/BPyramid GOP Structure
BRC_Param_A, BRC_Param_B, BRC_Param_C, BRC_Param_D are
BRC Parameters set as follows as per CModel equation
***********************************************************************/
// BPyramid GOP
if (m_hevcSeqParams->NumOfBInGop[1] != 0 || m_hevcSeqParams->NumOfBInGop[2] != 0)
{
curbe.DW8_BRC_Param_A = ((m_hevcSeqParams->GopPicSize) / m_hevcSeqParams->GopRefDist);
curbe.DW9_BRC_Param_B = curbe.DW8_BRC_Param_A;
curbe.DW13_BRC_Param_C = curbe.DW8_BRC_Param_A * 2;
curbe.DW14_BRC_Param_D = ((m_hevcSeqParams->GopPicSize) - (curbe.DW8_BRC_Param_A) - (curbe.DW13_BRC_Param_C) - (curbe.DW9_BRC_Param_B));
// B1 Level GOP
if (m_hevcSeqParams->NumOfBInGop[2] == 0)
{
curbe.DW14_MaxBRCLevel = 3;
}
// B2 Level GOP
else
{
curbe.DW14_MaxBRCLevel = 4;
}
}
// For Regular GOP - No BPyramid
else
{
curbe.DW14_MaxBRCLevel = 1;
curbe.DW8_BRC_Param_A = (m_hevcSeqParams->GopRefDist) ? ((m_hevcSeqParams->GopPicSize - 1) / m_hevcSeqParams->GopRefDist) : 0;
curbe.DW9_BRC_Param_B = m_hevcSeqParams->GopPicSize - 1 - curbe.DW8_BRC_Param_A;
}
// Set dynamic thresholds
double inputBitsPerFrame =
((double)(curbe.DW4_MaximumBitRate) * (double)(curbe.DW7_FrameRateD) /
(double)(curbe.DW6_FrameRateM));
if (curbe.DW2_BufSize < (uint32_t)inputBitsPerFrame * 4)
{
curbe.DW2_BufSize = (uint32_t)inputBitsPerFrame * 4;
}
if (curbe.DW1_InitBufFull == 0)
{
curbe.DW1_InitBufFull = 7 * curbe.DW2_BufSize/8;
}
if (curbe.DW1_InitBufFull < (uint32_t)(inputBitsPerFrame*2))
{
curbe.DW1_InitBufFull = (uint32_t)(inputBitsPerFrame*2);
}
if (curbe.DW1_InitBufFull > curbe.DW2_BufSize)
{
curbe.DW1_InitBufFull = curbe.DW2_BufSize;
}
if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
{
// For AVBR, Buffer size = 2*Bitrate, InitVBV = 0.75 * BufferSize
curbe.DW2_BufSize = 2 * m_hevcSeqParams->TargetBitRate * CODECHAL_ENCODE_BRC_KBPS;
curbe.DW1_InitBufFull = (uint32_t)(0.75 * curbe.DW2_BufSize);
}
double bpsRatio = inputBitsPerFrame / ((double)(curbe.DW2_BufSize)/30);
bpsRatio = (bpsRatio < 0.1) ? 0.1 : (bpsRatio > 3.5) ? 3.5 : bpsRatio;
curbe.DW19_DeviationThreshold0_PBframe = (uint32_t) (-50 * pow(0.90, bpsRatio));
curbe.DW19_DeviationThreshold1_PBframe = (uint32_t) (-50 * pow(0.66, bpsRatio));
curbe.DW19_DeviationThreshold2_PBframe = (uint32_t) (-50 * pow(0.46, bpsRatio));
curbe.DW19_DeviationThreshold3_PBframe = (uint32_t) (-50 * pow(0.3, bpsRatio));
curbe.DW20_DeviationThreshold4_PBframe = (uint32_t) (50 * pow(0.3, bpsRatio));
curbe.DW20_DeviationThreshold5_PBframe = (uint32_t) (50 * pow(0.46, bpsRatio));
curbe.DW20_DeviationThreshold6_PBframe = (uint32_t) (50 * pow(0.7, bpsRatio));
curbe.DW20_DeviationThreshold7_PBframe = (uint32_t) (50 * pow(0.9, bpsRatio));
curbe.DW21_DeviationThreshold0_VBRcontrol = (uint32_t) (-50 * pow(0.9, bpsRatio));
curbe.DW21_DeviationThreshold1_VBRcontrol = (uint32_t) (-50 * pow(0.7, bpsRatio));
curbe.DW21_DeviationThreshold2_VBRcontrol = (uint32_t) (-50 * pow(0.5, bpsRatio));
curbe.DW21_DeviationThreshold3_VBRcontrol = (uint32_t) (-50 * pow(0.3, bpsRatio));
curbe.DW22_DeviationThreshold4_VBRcontrol = (uint32_t) (100 * pow(0.4, bpsRatio));
curbe.DW22_DeviationThreshold5_VBRcontrol = (uint32_t) (100 * pow(0.5, bpsRatio));
curbe.DW22_DeviationThreshold6_VBRcontrol = (uint32_t) (100 * pow(0.75, bpsRatio));
curbe.DW22_DeviationThreshold7_VBRcontrol = (uint32_t) (100 * pow(0.9, bpsRatio));
curbe.DW23_DeviationThreshold0_Iframe = (uint32_t) (-50 * pow(0.8, bpsRatio));
curbe.DW23_DeviationThreshold1_Iframe = (uint32_t) (-50 * pow(0.6, bpsRatio));
curbe.DW23_DeviationThreshold2_Iframe = (uint32_t) (-50 * pow(0.34, bpsRatio));
curbe.DW23_DeviationThreshold3_Iframe = (uint32_t) (-50 * pow(0.2, bpsRatio));
curbe.DW24_DeviationThreshold4_Iframe = (uint32_t) (50 * pow(0.2, bpsRatio));
curbe.DW24_DeviationThreshold5_Iframe = (uint32_t) (50 * pow(0.4, bpsRatio));
curbe.DW24_DeviationThreshold6_Iframe = (uint32_t) (50 * pow(0.66, bpsRatio));
curbe.DW24_DeviationThreshold7_Iframe = (uint32_t) (50 * pow(0.9, bpsRatio));
if (m_brcInit)
{
m_dBrcInitCurrentTargetBufFullInBits = curbe.DW1_InitBufFull;
}
m_brcInitResetBufSizeInBits = curbe.DW2_BufSize;
m_dBrcInitResetInputBitsPerFrame = inputBitsPerFrame;
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
&curbe,
kernelState->dwCurbeOffset,
sizeof(curbe)));
return eStatus;
}
// ------------------------------------------------------------------------------
//| Purpose: Setup curbe for HEVC BrcUpdate Kernel
//| Return: N/A
//------------------------------------------------------------------------------
MOS_STATUS CodechalEncHevcStateG10::SetCurbeBrcUpdate(CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_ASSERT(brcKrnIdx == CODECHAL_HEVC_BRC_FRAME_UPDATE || brcKrnIdx == CODECHAL_HEVC_BRC_LCU_UPDATE);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
// Initialize the CURBE data
CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G10 curbe = m_brcUpdateCurbeInit;
curbe.DW5_TargetSize_Flag = 0;
if (m_dBrcInitCurrentTargetBufFullInBits > (double)m_brcInitResetBufSizeInBits)
{
m_dBrcInitCurrentTargetBufFullInBits -= (double)m_brcInitResetBufSizeInBits;
curbe.DW5_TargetSize_Flag = 1;
}
if (m_numSkipFrames)
{
// pass num/size of skipped frames to update BRC
curbe.DW6_NumSkippedFrames = m_numSkipFrames;
curbe.DW15_SizeOfSkippedFrames = m_sizeSkipFrames;
// account for skipped frame in calculating CurrentTargetBufFullInBits
m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame * m_numSkipFrames;
}
curbe.DW0_TargetSize = (uint32_t)(m_dBrcInitCurrentTargetBufFullInBits);
curbe.DW1_FrameNumber = m_storeData - 1;
curbe.DW2_PictureHeaderSize = GetPicHdrSize();
if(m_pictureCodingType == I_TYPE)
{
curbe.DW5_CurrFrameBrcLevel = HEVC_BRC_FRAME_TYPE_I;
}
else if(m_pictureCodingType == B_TYPE)
{
curbe.DW5_CurrFrameBrcLevel = (m_lowDelay) ? HEVC_BRC_FRAME_TYPE_P_OR_LB : HEVC_BRC_FRAME_TYPE_B;
}
else if (m_pictureCodingType == B1_TYPE)
{
curbe.DW5_CurrFrameBrcLevel = HEVC_BRC_FRAME_TYPE_B1;
}
else if (m_pictureCodingType == B2_TYPE)
{
curbe.DW5_CurrFrameBrcLevel = HEVC_BRC_FRAME_TYPE_B2;
}
else
{
CODECHAL_ENCODE_ASSERT(false);
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
curbe.DW5_MaxNumPAKs = m_mfxInterface->GetBrcNumPakPasses();
if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
{
curbe.DW6_CqpValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
}
curbe.DW6_LCUQPAverageEnable = (m_hevcPicParams->diff_cu_qp_delta_depth == 0) ? 1 : 0;
curbe.DW6_SlidingWindowEnable = (m_hevcSeqParams->FrameSizeTolerance == EFRAMESIZETOL_LOW);
curbe.DW14_ParallelMode = m_hevcSeqParams->ParallelBRC;
if (brcKrnIdx == CODECHAL_HEVC_BRC_LCU_UPDATE)
{
m_dBrcInitCurrentTargetBufFullInBits += m_dBrcInitResetInputBitsPerFrame;
}
if (m_hevcSeqParams->RateControlMethod == RATECONTROL_AVBR)
{
curbe.DW3_StartGAdjFrame0 = (uint32_t)((10 * m_usAvbrConvergence) / (double)150);
curbe.DW3_StartGAdjFrame1 = (uint32_t)((50 * m_usAvbrConvergence) / (double)150);
curbe.DW4_StartGAdjFrame2 = (uint32_t)((100 * m_usAvbrConvergence) / (double)150);
curbe.DW4_StartGAdjFrame3 = (uint32_t)((150 * m_usAvbrConvergence) / (double)150);
curbe.DW11_gRateRatioThreshold0 =
(uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 40)));
curbe.DW11_gRateRatioThreshold1 =
(uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 75)));
curbe.DW12_gRateRatioThreshold2 = (uint32_t)((100 - (m_usAvbrAccuracy / (double)30) * (100 - 97)));
curbe.DW12_gRateRatioThreshold3 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (103 - 100)));
curbe.DW12_gRateRatioThreshold4 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (125 - 100)));
curbe.DW12_gRateRatioThreshold5 = (uint32_t)((100 + (m_usAvbrAccuracy / (double)30) * (160 - 100)));
}
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
&curbe,
kernelState->dwCurbeOffset,
sizeof(curbe)));
return eStatus;
}
// ------------------------------------------------------------------------------
//| Purpose: Setup curbe for HEVC MbEnc I Kernels
//| Return: N/A
//------------------------------------------------------------------------------
MOS_STATUS CodechalEncHevcStateG10::SetCurbeMbEncIKernel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
// Initialize the CURBE data
CODECHAL_ENC_HEVC_MBENC_I_CURBE_G10 curbe = m_mbencICurbeInit;
curbe.DW0_FrameWidthInSamples = m_frameWidth;
curbe.DW0_FrameHeightInSamples = m_frameHeight;
uint32_t sliceQp = m_hevcSliceParams->slice_qp_delta + m_hevcPicParams->QpY;
CODECHAL_ENCODE_ASSERT(sliceQp < QP_NUM);
uint16_t lambdaMd = 0;
uint32_t lambdaRd = 0, tuSadThreshold = 0;
LoadCosts(CODECHAL_ENCODE_HEVC_I_SLICE, (uint8_t)sliceQp, &lambdaMd, &lambdaRd, &tuSadThreshold);
curbe.DW3_ModeCost_0 = m_modeCost[0];
curbe.DW3_ModeCost_1 = m_modeCost[1];
curbe.DW3_ModeCost_2 = m_modeCost[2];
curbe.DW3_ModeCost_3 = m_modeCost[3];
curbe.DW4_ModeCost_4 = m_modeCost[4];
curbe.DW4_ModeCost_5 = m_modeCost[5];
curbe.DW4_ModeCost_6 = m_modeCost[6];
curbe.DW4_ModeCost_7 = m_modeCost[7];
curbe.DW5_ModeCost_8 = m_modeCost[8];
curbe.DW5_ModeCost_9 = m_modeCost[9];
curbe.DW5_RefIDCost = m_modeCost[10];
curbe.DW5_ChromaIntraModeCost = m_modeCost[11];
uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage + 1) / 4) % 3; // Map TU 1,4,7 to 0,1,2
curbe.DW6_Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
curbe.DW6_Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
curbe.DW6_Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
curbe.DW6_Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
curbe.DW6_MaxTransformDepthIntra = m_hevcSeqParams->max_transform_hierarchy_depth_intra ? m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping] : 0; // 2 is not supported in Enc
curbe.DW7_ConcurrentGroupNum = 1;
curbe.DW7_SliceQP = sliceQp;
curbe.DW8_Lambda_Rd = lambdaRd;
curbe.DW9_Lambda_Md = lambdaMd;
curbe.DW10_IntraTuDThres = tuSadThreshold;
curbe.DW11_SliceType = CODECHAL_ENCODE_HEVC_I_SLICE;
if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
{
curbe.DW11_QPType = QP_TYPE_CONSTANT;
}
else
{
curbe.DW11_QPType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
}
//TU based settings
curbe.DW7_EncTuDecisionMode = m_tuSettings[EncTuDecisionModeTuParam][tuMapping];
curbe.DW11_EncQtDecisionMode = m_tuSettings[EncQtDecisionModeTuParam][tuMapping];
curbe.DW16_BTI_VmeIntraPredictionSurface = MBENC_I_FRAME_VME_PRED_CURR_PIC_IDX0;
curbe.DW17_BTI_CurrentPictureY = MBENC_I_FRAME_CURR_Y;
curbe.DW18_BTI_EncCuRecordSurface = MBENC_I_FRAME_INTERMEDIATE_CU_RECORD;
curbe.DW19_BTI_PakObjectCommandSurface = MBENC_I_FRAME_PAK_OBJ;
curbe.DW20_BTI_CuPacketForPakSurface = MBENC_I_FRAME_PAK_CU_RECORD;
curbe.DW21_BTI_InternalScratchSurface = MBENC_I_FRAME_SCRATCH_SURFACE;
curbe.DW22_BTI_CuBasedQpSurface = MBENC_I_FRAME_CU_QP_DATA;
curbe.DW23_BTI_ConstantDataLutSurface = MBENC_I_FRAME_CONST_DATA_LUT;
curbe.DW24_BTI_LcuLevelDataInputSurface = MBENC_I_FRAME_LCU_LEVEL_DATA_INPUT;
curbe.DW25_BTI_ConcurrentThreadGroupDataSurface = MBENC_I_FRAME_CONCURRENT_TG_DATA;
curbe.DW26_BTI_BrcCombinedEncParameterSurface = MBENC_I_FRAME_BRC_COMBINED_ENC_PARAMETER_SURFACE;
curbe.DW27_BTI_CuSplitSurface = MBENC_I_FRAME_CU_SPLIT_SURFACE,
curbe.DW28_BTI_DebugSurface = MBENC_I_FRAME_DEBUG_DUMP;
PMHW_KERNEL_STATE kernelState = &m_mbEncKernelStates[MBENC_I_KRNIDX];
CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
&curbe,
kernelState->dwCurbeOffset,
sizeof(curbe)));
return eStatus;
}
// ------------------------------------------------------------------------------
//| Purpose: Setup curbe for HEVC MbEnc B LCU32 and LCU64_32 Kernels
//| Return: N/A
//------------------------------------------------------------------------------
MOS_STATUS CodechalEncHevcStateG10::SetCurbeMbEncBKernel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
// Initialize the CURBE data
CODECHAL_ENC_HEVC_MBENC_B_CURBE_G10 curbe = m_mbencBCurbeInit;
curbe.DW0_FrameWidthInSamples = m_frameWidth;
curbe.DW0_FrameHeightInSamples = m_frameHeight;
uint8_t tuMapping = ((m_hevcSeqParams->TargetUsage + 1) / 4) % 3; // Map TU 1,4,7 to 0,1,2
curbe.DW1_Log2MaxCUSize = m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3;
curbe.DW1_Log2MinCUSize = m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
curbe.DW1_Log2MaxTUSize = m_hevcSeqParams->log2_max_transform_block_size_minus2 + 2;
curbe.DW1_Log2MinTUSize = m_hevcSeqParams->log2_min_transform_block_size_minus2 + 2;
curbe.DW1_MaxTransformDepthInter = m_hevcSeqParams->max_transform_hierarchy_depth_inter ? m_tuSettings[Log2TUMaxDepthInterTuParam][tuMapping] : 0;
curbe.DW1_MaxTransformDepthIntra = m_hevcSeqParams->max_transform_hierarchy_depth_intra ? m_tuSettings[Log2TUMaxDepthIntraTuParam][tuMapping] : 0; // 2 is not supported in Enc
curbe.DW2_HMECoarseShape = 1;
curbe.DW2_HMESubPelMode = 3;
curbe.DW2_RegionsInSliceEnable = (m_numRegionsInSlice > 1);
if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
{
curbe.DW2_QpType = QP_TYPE_CONSTANT;
}
else
{
curbe.DW2_QpType = m_lcuBrcEnabled ? QP_TYPE_CU_LEVEL : QP_TYPE_FRAME;
}
curbe.DW2_HMEFlag = m_hmeSupported ? 0x3 : 0;
curbe.DW2_SuperHME = m_16xMeSupported;
// activeNumChildThreads params are used to control the active number of child threads per CU64/CU32 to fine-tune the performance on silicon.
// Set zero (not used) by default for the Beta6 release.
curbe.DW3_ActiveNumChildThreads_CU64 = 0;
curbe.DW3_ActiveNumChildThreads_CU32_0 = 0;
curbe.DW3_ActiveNumChildThreads_CU32_1 = 0;
curbe.DW3_ActiveNumChildThreads_CU32_2 = 0;
curbe.DW3_ActiveNumChildThreads_CU32_3 = 0;
curbe.DW3_SliceQp = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
curbe.DW4_SkipModeEn = 1;
curbe.DW4_HEVCMinCUControl = m_hevcSeqParams->log2_min_coding_block_size_minus3;
curbe.DW7_SliceType = CODECHAL_ENCODE_HEVC_B_SLICE;
curbe.DW7_TemporalMvpEnableFlag = m_hevcSeqParams->sps_temporal_mvp_enable_flag;
curbe.DW7_CollocatedFromL0Flag = m_hevcSliceParams->collocated_from_l0_flag;
curbe.DW7_TheSameRefList = m_sameRefList;
curbe.DW7_IsLowDelay = m_lowDelay;
curbe.DW7_MaxNumMergeCand = m_hevcSliceParams->MaxNumMergeCand;
curbe.DW7_NumRefIdxL0 = m_hevcSliceParams->num_ref_idx_l0_active_minus1 + 1;
curbe.DW7_NumRefIdxL1 = m_hevcSliceParams->num_ref_idx_l1_active_minus1 + 1;
curbe.DW8_FwdPocNumber_L0_mTb_0 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][0]);
curbe.DW8_BwdPocNumber_L1_mTb_0 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][0]);
curbe.DW8_FwdPocNumber_L0_mTb_1 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][1]);
curbe.DW8_BwdPocNumber_L1_mTb_1 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][1]);
curbe.DW9_FwdPocNumber_L0_mTb_2 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][2]);
curbe.DW9_BwdPocNumber_L1_mTb_2 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][2]);
curbe.DW9_FwdPocNumber_L0_mTb_3 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[0][3]);
curbe.DW9_BwdPocNumber_L1_mTb_3 = ComputeTemporalDifferent(m_hevcSliceParams->RefPicList[1][3]);
curbe.DW13_RefFrameHorizontalSize = m_frameWidth;
curbe.DW13_RefFrameVerticalSize = m_frameHeight;
curbe.DW15_ConcurrentGroupNum = m_walkingPatternParam.dwNumRegion;
curbe.DW15_TotalThreadNumPerLCU = m_totalNumThreadsPerLcu;
curbe.DW15_NumRegions = m_numRegionsInSlice;
// TU based settings
curbe.DW1_MaxNumIMESearchCenter = m_tuSettings[MaxNumIMESearchCenterTuParam][tuMapping];
curbe.DW2_EnableCu64Check = m_isMaxLcu64 ? m_tuSettings[EnableCu64CheckTuParam][tuMapping] : 0;
curbe.DW2_EncTransformSimplify = m_tuSettings[EncTransformSimplifyTuParam][tuMapping];
curbe.DW2_EncTuDecisionMode = m_tuSettings[EncTuDecisionModeTuParam][tuMapping];
curbe.DW2_EncTuDecisionForAllQt = m_tuSettings[EncTuDecisionForAllQtTuParam][tuMapping];
curbe.DW2_CoefBitEstMode = m_tuSettings[CoefBitEstModeTuParam][tuMapping];
curbe.DW2_EncSkipDecisionMode = m_tuSettings[EncSkipDecisionModeTuParam][tuMapping];
curbe.DW2_EncQtDecisionMode = m_tuSettings[EncQtDecisionModeTuParam][tuMapping];
curbe.DW2_LCU32_EncRdDecisionModeForAllQt = m_tuSettings[EncRdDecisionModeForAllQtTuParam][tuMapping];
curbe.DW2_LCU64_Cu64SkipCheckOnly = (tuMapping == 1); // 1 for TU4, 0 for others
curbe.DW2_SICDynamicRunPathMode = m_tuSettings[SICDynamicRunPathMode][tuMapping]; // 0,1 for mode 1, 2 for mode 2, 3 for mode 3 --- TU1,TU4 :0 ; TU7: 2 -- as per beta6
if (m_isMaxLcu64)
{
curbe.DW16_BTI_CurrentPictureY = MBENC_B_FRAME_LCU64_CURR_Y;
curbe.DW17_BTI_EncCuRecordSurface = MBENC_B_FRAME_LCU64_CU32_ENC_CU_RECORD;
curbe.DW18_BTI_LCU64_SecEncCuRecordSurface = MBENC_B_FRAME_LCU64_SECOND_CU32_ENC_CU_RECORD;
curbe.DW19_BTI_LCU64_PAKObjectCommandSurface = MBENC_B_FRAME_LCU64_PAK_OBJ;
curbe.DW20_BTI_LCU64_PAKCURecordSurface = MBENC_B_FRAME_LCU64_PAK_CU_RECORD;
curbe.DW21_BTI_LCU64_VMEIntra_InterPredictionSurface = MBENC_B_FRAME_LCU64_VME_PRED_CURR_PIC_IDX0;
curbe.DW22_BTI_LCU64_CU16x16QpDataInputSurface = MBENC_B_FRAME_LCU64_CU16x16_QP_DATA;
curbe.DW23_BTI_LCU64_CU32_HEVCEncConstantTableSurface = MBENC_B_FRAME_LCU64_CU32_ENC_CONST_TABLE;
curbe.DW24_BTI_LCU64_ColocatedCUMotionVectorDataSurface = MBENC_B_FRAME_LCU64_COLOCATED_CU_MV_DATA;
curbe.DW25_BTI_LCU64_HmeMotionPredictorDataSurface = MBENC_B_FRAME_LCU64_HME_MOTION_PREDICTOR_DATA;
curbe.DW26_BTI_LCU64_LcuLevelDataInputSurface = MBENC_B_FRAME_LCU64_LCU_LEVEL_DATA_INPUT;
curbe.DW27_BTI_LCU64_CU32_LcuEncodingScratchSurface = MBENC_B_FRAME_LCU64_CU32_LCU_ENC_SCRATCH_SURFACE;
curbe.DW28_BTI_LCU64_64x64_DistortionSurface = MBENC_B_FRAME_LCU64_64X64_DISTORTION_SURFACE;
curbe.DW29_BTI_LCU64_ConcurrentThreadGroupDataSurface = MBENC_B_FRAME_LCU64_CONCURRENT_TG_DATA;
curbe.DW30_BTI_LCU64_BrcCombinedEncParameterSurface = MBENC_B_FRAME_LCU64_BRC_COMBINED_ENC_PARAMETER_SURFACE;
curbe.DW31_BTI_LCU64_CU32_JobQueue1DBufferSurface = MBENC_B_FRAME_LCU64_CU32_JOB_QUEUE_1D_SURFACE;
curbe.DW32_BTI_LCU64_CU32_JobQueue2DBufferSurface = MBENC_B_FRAME_LCU64_CU32_JOB_QUEUE_2D_SURFACE;
curbe.DW33_BTI_LCU64_CU32_ResidualDataScratchSurface = MBENC_B_FRAME_LCU64_CU32_RESIDUAL_DATA_SCRATCH_SURFACE;
curbe.DW34_BTI_LCU64_CuSplitSurface = MBENC_B_FRAME_LCU64_CU_SPLIT_DATA_SURFACE;
curbe.DW35_BTI_LCU64_CurrentPictureY2xDS = MBENC_B_FRAME_LCU64_CURR_Y_2xDS;
curbe.DW36_BTI_LCU64_IntermediateCuRecordSurface = MBENC_B_FRAME_LCU64_INTERMEDIATE_CU_RECORD;
curbe.DW37_BTI_Lcu64_ConstantDataLutSurface = MBENC_B_FRAME_LCU64_CONST64_DATA_LUT;
curbe.DW38_BTI_LCU64_LcuDataStorageSurface = MBENC_B_FRAME_LCU64_LCU_STORAGE_SURFACE;
curbe.DW39_BTI_LCU64_VmeInterPredictionSurface2xDS = MBENC_B_FRAME_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
curbe.DW40_BTI_LCU64_JobQueue1DBufferSurface = MBENC_B_FRAME_LCU64_JOB_QUEUE_1D_SURFACE;
curbe.DW41_BTI_LCU64_JobQueue2DBufferSurface = MBENC_B_FRAME_LCU64_JOB_QUEUE_2D_SURFACE;
curbe.DW42_BTI_LCU64_ResidualDataScratchSurface = MBENC_B_FRAME_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE;
curbe.DW43_BTI_LCU64_DebugFeatureSurface = MBENC_B_FRAME_LCU64_DEBUG_SURFACE;
}
else
{
curbe.DW16_BTI_CurrentPictureY = MBENC_B_FRAME_LCU32_CURR_Y;
curbe.DW17_BTI_EncCuRecordSurface = MBENC_B_FRAME_LCU32_ENC_CU_RECORD;
curbe.DW18_BTI_LCU32_PAKObjectCommandSurface = MBENC_B_FRAME_LCU32_PAK_OBJ;
curbe.DW19_BTI_LCU32_PAKCURecordSurface = MBENC_B_FRAME_LCU32_PAK_CU_RECORD;
curbe.DW20_BTI_LCU32_VMEIntra_InterPredictionSurface = MBENC_B_FRAME_LCU32_VME_PRED_CURR_PIC_IDX0;
curbe.DW21_BTI_LCU32_CU16x16QpDataInputSurface = MBENC_B_FRAME_LCU32_CU16x16_QP_DATA;
curbe.DW22_BTI_LCU32_HEVCEncConstantTableSurface = MBENC_B_FRAME_LCU32_ENC_CONST_TABLE;
curbe.DW23_BTI_LCU32_ColocatedCUMotionVectorDataSurface = MBENC_B_FRAME_LCU32_COLOCATED_CU_MV_DATA;
curbe.DW24_BTI_LCU32_HmeMotionPredictorDataSurface = MBENC_B_FRAME_LCU32_HME_MOTION_PREDICTOR_DATA;
curbe.DW25_BTI_LCU32_LcuLevelDataInputSurface = MBENC_B_FRAME_LCU32_LCU_LEVEL_DATA_INPUT;
curbe.DW26_BTI_LCU32_LcuEncodingScratchSurface = MBENC_B_FRAME_LCU32_LCU_ENC_SCRATCH_SURFACE;
curbe.DW27_BTI_LCU32_ConcurrentThreadGroupDataSurface = MBENC_B_FRAME_LCU32_CONCURRENT_TG_DATA;
curbe.DW28_BTI_LCU32_BrcCombinedEncParameterSurface = MBENC_B_FRAME_LCU32_BRC_COMBINED_ENC_PARAMETER_SURFACE;
curbe.DW29_BTI_LCU32_JobQueueScratchBufferSurface = MBENC_B_FRAME_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
curbe.DW30_BTI_LCU32_CuSplitDataSurface = MBENC_B_FRAME_LCU32_CU_SPLIT_DATA_SURFACE,
curbe.DW31_BTI_LCU32_ResidualDataScratchSurface = MBENC_B_FRAME_LCU32_RESIDUAL_DATA_SCRATCH_SURFACE,
curbe.DW32_BTI_LCU32_DebugSurface = MBENC_B_FRAME_LCU32_DEBUG_SURFACE;
}
PMHW_KERNEL_STATE kernelState = m_isMaxLcu64 ? &m_mbEncKernelStates[MBENC_B_LCU64_KRNIDX] : &m_mbEncKernelStates[MBENC_B_LCU32_KRNIDX];
CODECHAL_ENCODE_CHK_STATUS_RETURN(kernelState->m_dshRegion.AddData(
&curbe,
kernelState->dwCurbeOffset,
sizeof(curbe)));
return eStatus;
}
//------------------------------------------------------------------------------------
// Send surfaces for the scaling kernel
//------------------------------------------------------------------------------------
MOS_STATUS CodechalEncHevcStateG10::SendScalingAndConversionSurfaces(
PMOS_COMMAND_BUFFER cmdBuffer, SurfaceParamsDsConv* params)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
CODECHAL_ENCODE_CHK_NULL_RETURN(params);
uint32_t startBti = 0;
CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
PMHW_KERNEL_STATE kernelState = params->pKernelState;
CODECHAL_ENCODE_CHK_NULL_RETURN(kernelState);
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = params->pBindingTable;
CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
MOS_SURFACE inputSurface = *params->psInputSurface;
inputSurface.dwWidth = params->dwInputFrameWidth;
inputSurface.dwHeight = params->dwInputFrameHeight;
inputSurface.UPlaneOffset.iYOffset = inputSurface.dwHeight;
if(params->downScaleConversionType & convFromOrig)
{
params->psOutputConvertedSurface->dwWidth = params->dwOutputConvertedFrameWidth;
params->psOutputConvertedSurface->dwHeight = params->dwOutputConvertedFrameHeight;
params->psOutputConvertedSurface->UPlaneOffset.iYOffset = params->psOutputConvertedSurface->dwHeight;
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
surfaceCodecParams.bUseUVPlane = true;
surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBti++];
surfaceCodecParams.bUse32UnormSurfaceFormat = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
//Source Y and UV
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
params->psOutputConvertedSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
surfaceCodecParams.bUseUVPlane = true;
surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBti++];
surfaceCodecParams.bUse32UnormSurfaceFormat = false;
surfaceCodecParams.bUse16UnormSurfaceFormat = false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
// Increment the binding table index
startBti += 2;
//Source Y and UV
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
surfaceCodecParams.bUseUVPlane = !(params->downScaleConversionType & ds16xFromOrig); // UV plane not available for 16x DS
surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBti++];
surfaceCodecParams.bUse32UnormSurfaceFormat = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
switch (DsStage(params->downScaleConversionType & ~convFromOrig))
{
case ds4xFromOrig:
case ds2x4xFromOrig:
case ds16xFromOrig:
{
PMOS_SURFACE scaledSurface4x = params->psOutputScaledSurface4x;
scaledSurface4x->dwWidth = params->dwOutputScaledFrameWidth4x;
scaledSurface4x->dwHeight = params->dwOutputScaledFrameHeight4x;
//Dest Y
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
scaledSurface4x,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE_DST].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
surfaceCodecParams.bUse32UnormSurfaceFormat = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
break;
}
case ds2xFromOrig:
case dsConvUnknown:
startBti++;
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported Scaling/Conversion type");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
// MB stats surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_mbStatisticsSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE_DST].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
surfaceCodecParams.bRawSurface = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
if(params->downScaleConversionType & ds2xFromOrig)
{
PMOS_SURFACE scaledSurface2x = params->psOutputScaledSurface2x;
scaledSurface2x->dwWidth = params->dwOutputScaledFrameWidth2x;
scaledSurface2x->dwHeight = params->dwOutputScaledFrameHeight2x;
// 2xDS Dest Y
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
scaledSurface2x,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE_DST].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
surfaceCodecParams.bUse32UnormSurfaceFormat = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
startBti++;
}
// MB split surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_mbSplitSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Job Queue Header Surface for multi-threading for LCU32 B-kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_jobQueueHeaderSurfaceForB.sResource,
MOS_BYTES_TO_DWORDS(m_jobQueueHeaderSurfaceForB.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Job Queue Header Surface for multi-threading for LCU64 B-kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_jobQueueHeaderSurfaceForBLcu64.sResource,
MOS_BYTES_TO_DWORDS(m_jobQueueHeaderSurfaceForBLcu64.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Distortion surface for 64x64
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_64x64DistortionSurface.sResource,
MOS_BYTES_TO_DWORDS(m_64x64DistortionSurface.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SendMeSurfaces(
PMOS_COMMAND_BUFFER cmdBuffer,
HmeLevel hmeLevel,
HEVC_ME_DIST_TYPE distType)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_meKernelState);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_meKernelBindingTable);
PMOS_SURFACE currScaledSurface = nullptr, meMvDataBuffer = nullptr;
bool use16xMvInputMvDataFor4x = false, is4xHmeStage = false;
switch(hmeLevel)
{
case HME_LEVEL_4x:
currScaledSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
meMvDataBuffer = &m_s4XMeMvDataBuffer;
use16xMvInputMvDataFor4x = m_b16XMeEnabled;
is4xHmeStage = true;
break;
case HME_LEVEL_16x:
currScaledSurface = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
meMvDataBuffer = &m_s16XMeMvDataBuffer;
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported HME level");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
uint32_t startBti = 0;
PMHW_KERNEL_STATE kernelState = m_meKernelState;
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = m_meKernelBindingTable;
CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
// HME Output MV Data Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
meMvDataBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
if(use16xMvInputMvDataFor4x)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_s16XMeMvDataBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
startBti++;
}
if(is4xHmeStage)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_s4XMeDistortionBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
startBti++;
}
// VME surfaces
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
currScaledSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
for(auto surface_idx = 0; surface_idx < 4; surface_idx++)
{
CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surface_idx];
if (!CodecHal_PictureIsInvalid(refPic) && m_picIdx[refPic.FrameIdx].bValid)
{
uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
// Picture Y VME
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
(hmeLevel == HME_LEVEL_4x) ? m_trackedBuf->Get4xDsSurface(scaledIdx) : m_trackedBuf->Get16xDsSurface(scaledIdx),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
// Providing Dummy surface as per VME requirement.
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
currScaledSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
refPic = m_hevcSliceParams->RefPicList[LIST_1][surface_idx];
if (!CodecHal_PictureIsInvalid(refPic) &&
!CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
{
uint8_t refPicIdx = m_picIdx[refPic.FrameIdx].ucPicIdx;
uint8_t scaledIdx = m_refList[refPicIdx]->ucScalingIdx;
// Picture Y VME
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
(hmeLevel == HME_LEVEL_4x) ? m_trackedBuf->Get4xDsSurface(scaledIdx) : m_trackedBuf->Get16xDsSurface(scaledIdx),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
// Providing Dummy surface as per VME requirement.
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
currScaledSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
}
if(is4xHmeStage)
{
// 4x Downscaled Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
currScaledSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC Distortion Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
distType ? &m_brcBuffers.sMeBrcDistortionBuffer : &m_brcBuffers.sBrcIntraDistortionBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
// Mv and Distortion summation surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_mvAndDistortionSumSurface.sResource,
m_mvAndDistortionSumSurface.dwSize,
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
surfaceCodecParams.bRawSurface = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SendBrcInitResetSurfaces(
PMOS_COMMAND_BUFFER cmdBuffer,
CODECHAL_HEVC_BRC_KRNIDX krnIdx)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_ASSERT(krnIdx == CODECHAL_HEVC_BRC_INIT || krnIdx == CODECHAL_HEVC_BRC_RESET);
CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
uint32_t startBti = 0;
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[krnIdx];
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[krnIdx];
// BRC History Buffer
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_brcBuffers.resBrcHistoryBuffer,
MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC Distortion Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_brcBuffers.sMeBrcDistortionBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SendBrcFrameUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
PMOS_RESOURCE brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
PMOS_SURFACE brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
uint32_t startBti = 0;
CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE];
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_FRAME_UPDATE];
// BRC History Buffer
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_brcBuffers.resBrcHistoryBuffer,
MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC Prev PAK statistics output buffer
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
MOS_BYTES_TO_DWORDS(m_hevcBrcPakStatisticsSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC HCP_PIC_STATE read
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
brcHcpStateReadBuffer,
MOS_BYTES_TO_DWORDS(m_brcBuffers.dwBrcHcpPicStateSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC HCP_PIC_STATE write
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
MOS_BYTES_TO_DWORDS(m_brcBuffers.dwBrcHcpPicStateSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Combined ENC-parameter buffer
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
(MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
MOS_BYTES_TO_DWORDS(m_allocator->GetResourceSize(m_standard, brcInputForEncKernel)),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC Distortion Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_brcBuffers.sMeBrcDistortionBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC Data Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
brcConstantData,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Pixel MB Statistics surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_mbStatisticsSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Mv and Distortion summation surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_mvAndDistortionSumSurface.sResource,
MOS_BYTES_TO_DWORDS(m_mvAndDistortionSumSurface.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SendBrcLcuUpdateSurfaces(PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t startBti = 0;
CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE];
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_brcKernelBindingTable[CODECHAL_HEVC_BRC_LCU_UPDATE];
// BRC History Buffer
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_brcBuffers.resBrcHistoryBuffer,
MOS_BYTES_TO_DWORDS(m_brcHistoryBufferSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// BRC Distortion Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_brcBuffers.sMeBrcDistortionBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Pixel MB Statistics surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_mbStatisticsSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// MB QP surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_brcBuffers.sBrcMbQpBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// MB Split surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_mbSplitSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Intra DISTORTION surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_brcBuffers.sBrcIntraDistortionBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// CU Split Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_cuSplitSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SendMbEncSurfacesIKernel(PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
uint32_t startBti = 0;
CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
PMHW_KERNEL_STATE kernelState = &m_mbEncKernelStates[MBENC_I_KRNIDX];
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_mbEncKernelBindingTable[MBENC_I_KRNIDX];
PMOS_SURFACE inputSurface = m_is10BitHevc ? &m_formatConvertedSurface[0] : m_rawSurfaceToEnc;
// VME surfaces
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Programming dummy surfaces even if not used (VME requirement), currently setting to input surface
for (auto surface_idx = 0; surface_idx < 8; surface_idx++)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
//Source Y and UV
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
surfaceCodecParams.bUseUVPlane = true;
surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBti++];
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Intermediate cu record surface -- changing Width and height to LCU32 aligned
MOS_SURFACE tempSurface = m_intermediateCuRecordSurfaceLcu32;
tempSurface.dwWidth = m_widthAlignedLcu32;
tempSurface.dwHeight = m_heightAlignedLcu32 >> 1;
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&tempSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// PAK object command surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_resMbCodeSurface,
MOS_BYTES_TO_DWORDS(m_mvOffset),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// CU packet for PAK surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_resMbCodeSurface,
MOS_BYTES_TO_DWORDS(m_mbCodeSize - m_mvOffset),
m_mvOffset,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Scratch Surface for Internal Use Only
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_scratchSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// MB QP data input surface from Output of LCU BRC
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_brcBuffers.sBrcMbQpBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Enc I Constant Table Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_encConstantTableForI.sResource,
MOS_BYTES_TO_DWORDS(m_encConstantTableForI.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Lcu level data input
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_lcuLevelInputData.sResource,
MOS_BYTES_TO_DWORDS(m_lcuLevelInputData.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Concurrent Thread Group Data Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_concurrentThreadGroupData.sResource,
MOS_BYTES_TO_DWORDS(m_concurrentThreadGroupData.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Brc Combined Enc parameter Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
(MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
MOS_BYTES_TO_DWORDS(m_allocator->GetResourceSize(m_standard, brcInputForEncKernel)),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// CU Split Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_cuSplitSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_BRC_ME_DISTORTION_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Kernel debug surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_kernelDebug.sResource,
MOS_BYTES_TO_DWORDS(m_kernelDebug.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SendMbEncSurfacesBKernel(PMOS_COMMAND_BUFFER cmdBuffer)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
uint32_t startBti = 0;
CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
PMOS_SURFACE inputSurface = m_is10BitHevc ? &m_formatConvertedSurface[0] : m_rawSurfaceToEnc;
CODECHAL_ENC_HEVC_MBENC_KRNIDX_G10 krnIdx = m_isMaxLcu64 ? MBENC_B_LCU64_KRNIDX : MBENC_B_LCU32_KRNIDX;
PMHW_KERNEL_STATE kernelState = &m_mbEncKernelStates[krnIdx];
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable = &m_mbEncKernelBindingTable[krnIdx];
//Source Y and UV
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
surfaceCodecParams.bUseUVPlane = true;
surfaceCodecParams.dwUVBindingTableOffset = bindingTable->dwBindingTableEntries[startBti++];
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
MOS_SURFACE tempSurface = m_intermediateCuRecordSurfaceLcu32;
if (!m_isMaxLcu64)
{
// Intermediate cu record surface -- changing Width and height to LCU32 aligned
tempSurface.dwWidth = m_widthAlignedLcu32;
tempSurface.dwHeight = m_heightAlignedLcu32 >> 1;
// Intermediate/Enc cu record surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&tempSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
// Intermediate/Enc cu record surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&tempSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Second Intermediate/Enc cu record surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_secondIntermediateCuRecordSurfaceLcu32,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
// PAK object command surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_resMbCodeSurface,
m_mvOffset,
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
surfaceCodecParams.bRawSurface = m_isMaxLcu64;
surfaceCodecParams.dwSize = surfaceCodecParams.bRawSurface ? surfaceCodecParams.dwSize : MOS_BYTES_TO_DWORDS(surfaceCodecParams.dwSize);
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// CU packet for PAK surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_resMbCodeSurface,
MOS_BYTES_TO_DWORDS(m_mbCodeSize - m_mvOffset),
m_mvOffset,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// VME surfaces
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
for(auto surface_idx = 0; surface_idx < 4; surface_idx++)
{
CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surface_idx];
if (!CodecHal_PictureIsInvalid(refPic) &&
!CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
{
uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
// Picture Y VME
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
&m_refList[idx]->sRefBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
CODECHAL_DEBUG_TOOL(
m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
&m_refList[idx]->sRefBuffer,
CodechalDbgAttr::attrReferenceSurfaces,
refSurfName.data())));
}
else
{
// Providing Dummy surface as per VME requirement.
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
refPic = m_hevcSliceParams->RefPicList[LIST_1][surface_idx];
if (!CodecHal_PictureIsInvalid(refPic) &&
!CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
{
uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
// Picture Y VME
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
&m_refList[idx]->sRefBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
CODECHAL_DEBUG_TOOL(
m_debugInterface->m_refIndex = (uint16_t)refPic.FrameIdx;
std::string refSurfName = "RefSurf" + std::to_string(static_cast<uint32_t>(m_debugInterface->m_refIndex));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
&m_refList[idx]->sRefBuffer,
CodechalDbgAttr::attrReferenceSurfaces,
refSurfName.data())));
}
else
{
// Providing Dummy surface as per VME requirement.
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
inputSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
}
// MB QP data input surface from Output of LCU BRC
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_brcBuffers.sBrcMbQpBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Enc B 32x32 Constant Table Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_encConstantTableForB.sResource,
MOS_BYTES_TO_DWORDS(m_encConstantTableForB.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Colocated CU Motion Vector Data Surface
uint8_t mbCodeIdxForTempMVP = 0xFF;
if (m_hevcPicParams->CollocatedRefPicIndex != 0xFF && m_hevcPicParams->CollocatedRefPicIndex < CODEC_MAX_NUM_REF_FRAME_HEVC)
{
uint8_t frameIdx = m_hevcPicParams->RefFrameList[m_hevcPicParams->CollocatedRefPicIndex].FrameIdx;
mbCodeIdxForTempMVP = m_refList[frameIdx]->ucScalingIdx;
}
if (mbCodeIdxForTempMVP == 0xFF && m_hevcSliceParams->slice_temporal_mvp_enable_flag)
{
// Temporal reference MV index is invalid and so disable the temporal MVP
CODECHAL_ENCODE_ASSERT(false);
m_hevcSliceParams->slice_temporal_mvp_enable_flag = false;
}
if(mbCodeIdxForTempMVP == 0xFF)
{
startBti++;
}
else
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
m_trackedBuf->GetMvTemporalBuffer(mbCodeIdxForTempMVP),
MOS_BYTES_TO_DWORDS(m_sizeOfMvTemporalBuffer),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
// HME motion predictor data
if(m_hmeSupported)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_s4XMeMvDataBuffer,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_MV_DATA_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
startBti++;
}
// Lcu level data input
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_lcuLevelInputData.sResource,
MOS_BYTES_TO_DWORDS(m_lcuLevelInputData.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Lcu encoding scratch surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_lcuEncodingScratchSurface.sResource,
MOS_BYTES_TO_DWORDS(m_lcuEncodingScratchSurface.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
if (m_isMaxLcu64)
{
// 64x64 Distortion Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_64x64DistortionSurface.sResource,
m_64x64DistortionSurface.dwSize,
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
surfaceCodecParams.bRawSurface = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
// Concurrent Thread Group Data Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_concurrentThreadGroupData.sResource,
MOS_BYTES_TO_DWORDS(m_concurrentThreadGroupData.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Brc Combined Enc parameter Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
(MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
MOS_BYTES_TO_DWORDS(m_allocator->GetResourceSize(m_standard, brcInputForEncKernel)),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Job Queue Header buffer surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_jobQueueHeaderSurfaceForB.sResource,
m_jobQueueHeaderSurfaceForB.dwSize,
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
surfaceCodecParams.bRawSurface = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
if (m_isMaxLcu64)
{
// Job Queue Data buffer surface for CU32
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_jobQueueDataSurfaceForBLcu64Cu32,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Residual Data Scratch Surface LCU32
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_residualDataScratchSurfaceForBLcu32,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// CU Split Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_cuSplitSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
// CU Split Surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_cuSplitSurface,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Residual Data Scratch Surface LCU32
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_residualDataScratchSurfaceForBLcu32,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
if (m_isMaxLcu64)
{
//Source Y 2xDS
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_HME_DOWNSAMPLED_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Intermediate/Enc cu record surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_intermediateCuRecordSurfaceLcu64B,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_PAK_OBJECT_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Lcu64 constant data surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_encConstantTableForLcu64B.sResource,
MOS_BYTES_TO_DWORDS(m_encConstantTableForLcu64B.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
false));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Lcu storage surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_lcuEncodingScratchSurfaceLcu64B.sResource,
MOS_BYTES_TO_DWORDS(m_lcuEncodingScratchSurfaceLcu64B.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// VME surfaces
// 2xDS Source VME surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_CURR_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
for(auto surface_idx = 0; surface_idx < 4; surface_idx++)
{
CODEC_PICTURE refPic = m_hevcSliceParams->RefPicList[LIST_0][surface_idx];
if (!CodecHal_PictureIsInvalid(refPic) &&
!CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
{
uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
uint8_t scaledIdx = m_refList[idx]->ucScalingIdx;
// Picture Y VME
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
m_trackedBuf->Get2xDsSurface(scaledIdx),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
// Providing Dummy surface as per VME requirement.
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
refPic = m_hevcSliceParams->RefPicList[LIST_1][surface_idx];
if (!CodecHal_PictureIsInvalid(refPic) &&
!CodecHal_PictureIsInvalid(m_hevcPicParams->RefFrameList[refPic.FrameIdx]))
{
uint8_t idx = m_hevcPicParams->RefFrameList[refPic.FrameIdx].FrameIdx;
uint8_t scaledIdx = m_refList[idx]->ucScalingIdx;
// Picture Y VME
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
m_trackedBuf->Get2xDsSurface(scaledIdx),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
else
{
// Providing Dummy surface as per VME requirement.
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParamsVME(
&surfaceCodecParams,
m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER),
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_REF_ENCODE].Value,
bindingTable->dwBindingTableEntries[startBti++]));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
}
// Job Queue Header buffer surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_jobQueueHeaderSurfaceForBLcu64.sResource,
m_jobQueueHeaderSurfaceForBLcu64.dwSize,
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
surfaceCodecParams.bRawSurface = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Job Queue Data buffer surface LCU64
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_jobQueueDataSurfaceForBLcu64,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
// Residual Data Scratch Surface LCU64
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams2D(
&surfaceCodecParams,
&m_residualDataScratchSurfaceForBLcu64,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
0,
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
}
// Kernel debug surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitSurfaceCodecParams1D(
&surfaceCodecParams,
&m_kernelDebug.sResource,
MOS_BYTES_TO_DWORDS(m_kernelDebug.dwSize),
0,
m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_ELLC_LLC_ONLY].Value,
bindingTable->dwBindingTableEntries[startBti++],
true));
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
m_hwInterface,
cmdBuffer,
&surfaceCodecParams,
kernelState));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::GenerateLcuLevelData()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if(!Mos_ResourceIsNull(&m_lcuLevelInputData.sResource))
{
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
lockFlags.WriteOnly = 1;
PCODECHAL_ENC_HEVC_LCU_LEVEL_DATA_G10 pLcuLevelData = (PCODECHAL_ENC_HEVC_LCU_LEVEL_DATA_G10) m_osInterface->pfnLockResource(
m_osInterface,
&m_lcuLevelInputData.sResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(pLcuLevelData);
PCODEC_HEVC_ENCODE_SLICE_PARAMS slcPrams = m_hevcSliceParams;
for (uint32_t startLcu = 0, slcCount = 0; slcCount < m_numSlices; slcCount++, slcPrams++)
{
for (uint32_t i = 0; i < slcPrams->NumLCUsInSlice; i++, pLcuLevelData++)
{
pLcuLevelData->SliceStartLcuIndex = (uint16_t ) startLcu;
pLcuLevelData->SliceEndLcuIndex = (uint16_t ) (startLcu + slcPrams->NumLCUsInSlice); // this should be next slice start index
pLcuLevelData->SliceId = (uint16_t ) slcCount + 1;
pLcuLevelData->SliceLevelQP = (uint16_t)(m_hevcPicParams->QpY + slcPrams->slice_qp_delta);
}
startLcu += slcPrams->NumLCUsInSlice;
}
m_osInterface->pfnUnlockResource(
m_osInterface,
&m_lcuLevelInputData.sResource);
}
else
{
eStatus = MOS_STATUS_NULL_POINTER;
CODECHAL_ENCODE_ASSERTMESSAGE("Null pointer exception\n");
return eStatus;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SetCurbeScalingAndConversion(
CodechalEncodeCscDs::CurbeParams* params)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(params);
CODECHAL_ENCODE_CHK_NULL_RETURN(params->pKernelState);
// Initialize the curbe data
DsConvCurbeDataG10 cmd = DsConvCurbeDataG10();
cmd.DW1_ConvertFlag = params->bConvertFlag;
cmd.DW1_DownscaleStage = params->downscaleStage;
cmd.DW1_MbStatisticsDumpFlag = !m_brcEnabled ? 0 : ((params->downscaleStage == dsStage4x || params->downscaleStage == dsStage2x4x) ? 1 : 0);
if( !params->bUseLCU32 )
{
cmd.DW1_LcuSize = 0; // LCU64
cmd.DW1_JobQueueSize = 32;
}
else
{
cmd.DW1_LcuSize = 1; // LCU32
}
cmd.DW2_OriginalPicWidthInSamples = params->dwInputPictureWidth;
cmd.DW2_OriginalPicHeightInSamples = params->dwInputPictureHeight;
cmd.DW3_BTI_InputConversionSurface = SCALING_CONVERSION_10BIT_Y;
cmd.DW4_BTI_Value = SCALING_CONVERSION_8BIT_Y;
cmd.DW5_BTI_4xDsSurface = SCALING_CONVERSION_4xDS; // 4xDS for both 4x and 16x
cmd.DW6_BTI_MBStatsSurface = SCALING_CONVERSION_MB_STATS;
cmd.DW7_BTI_2xDsSurface = SCALING_CONVERSION_2xDS;
cmd.DW8_BTI_MB_Split_Surface = SCALING_CONVERSION_MB_SPLIT_SURFACE;
cmd.DW9_BTI_LCU32_JobQueueScratchBufferSurface = SCALING_CONVERSION_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
cmd.DW10_BTI_LCU64_CU32_JobQueueScratchBufferSurface = SCALING_CONVERSION_LCU64_JOB_QUEUE_SCRATCH_SURFACE;
cmd.DW11_BTI_LCU64_CU32_64x64_DistortionSurface = SCALING_CONVERSION_LCU64_64x64_DISTORTION_SURFACE;
CODECHAL_ENCODE_CHK_STATUS_RETURN(params->pKernelState->m_dshRegion.AddData(
&cmd,
params->pKernelState->dwCurbeOffset,
sizeof(cmd)));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::EncodeScalingAndConversionKernel(
CodechalEncodeCscDs::KernelParams* params)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
CODECHAL_ENCODE_CHK_NULL_RETURN(params);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalingAndConversionKernelState);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalingAndConversionKernelBindingTable);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_refList);
if (!m_firstField)
{
// Both fields are scaled when the first field comes in, no need to scale again
return eStatus;
}
if (m_scalingEnabled)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_trackedBuf->AllocateSurfaceDS());
}
if (m_2xScalingEnabled)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_trackedBuf->AllocateSurface2xDS());
}
PerfTagSetting perfTag;
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_SCALING_KERNEL);
PMHW_KERNEL_STATE kernelState = m_scalingAndConversionKernelState;
// If Single Task Phase is not enabled, use BT count for the kernel state.
if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
{
uint32_t maxBtCount = m_singleTaskPhaseSupported ?
m_maxBtCount : kernelState->KernelParams.iBTCount;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
m_stateHeapInterface,
maxBtCount));
m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
CODECHAL_MEDIA_STATE_TYPE encFunctionType;
PCODEC_REF_LIST currRefList = m_refList[m_currReconstructedPic.FrameIdx];
DsStage downscaleStage;
bool convertFlag = false;
uint32_t inputFrameWidth = 0, inputFrameHeight = 0;
uint32_t outputConvertedFrameWidth = 0, outputConvertedFrameHeight = 0;
uint32_t outputScaledFrameWidth2x = 0, outputScaledFrameHeight2x = 0;
uint32_t outputScaledFrameWidth4x = 0, outputScaledFrameHeight4x = 0;
PMOS_SURFACE inputSurface = nullptr, outputConvertedSurface = nullptr, outputScaledSurface4x = nullptr, outputScaledSurface2x = nullptr;
switch(params->stageDsConversion)
{
case convDs2xFromOrig:
convertFlag = true;
outputConvertedSurface = params->psFormatConvertedSurface;
outputConvertedFrameWidth = m_oriFrameWidth;
outputConvertedFrameHeight = m_oriFrameHeight;
// break omitted on purpose
case ds2xFromOrig:
encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
downscaleStage = dsStage2x;
inputSurface = m_rawSurfaceToEnc;
inputFrameWidth = m_oriFrameWidth;
inputFrameHeight = m_oriFrameHeight;
outputScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
outputScaledFrameWidth2x = m_downscaledWidth2x;
outputScaledFrameHeight2x = m_downscaledHeight2x;
currRefList->b2xScalingUsed = true;
break;
case convDs2x4xFromOrig:
convertFlag = true;
outputConvertedSurface = params->psFormatConvertedSurface;
outputConvertedFrameWidth = m_oriFrameWidth;
outputConvertedFrameHeight = m_oriFrameHeight;
// break omitted on purpose
case ds2x4xFromOrig:
encFunctionType = CODECHAL_MEDIA_STATE_2X_4X_SCALING;
downscaleStage = dsStage2x4x;
inputSurface = m_rawSurfaceToEnc;
inputFrameWidth = m_oriFrameWidth;
inputFrameHeight = m_oriFrameHeight;
outputScaledSurface4x = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
outputScaledFrameWidth4x = m_downscaledWidth4x;
outputScaledFrameHeight4x = m_downscaledHeight4x;
outputScaledSurface2x = m_trackedBuf->Get2xDsSurface(CODEC_CURR_TRACKED_BUFFER);
outputScaledFrameWidth2x = m_downscaledWidth2x;
outputScaledFrameHeight2x = m_downscaledHeight2x;
currRefList->b4xScalingUsed = true;
currRefList->b2xScalingUsed = true;
break;
case convDs4xFromOrig:
convertFlag = true;
outputConvertedSurface = params->psFormatConvertedSurface;
outputConvertedFrameWidth = m_oriFrameWidth;
outputConvertedFrameHeight = m_oriFrameHeight;
// break omitted on purpose
case ds4xFromOrig:
encFunctionType = CODECHAL_MEDIA_STATE_4X_SCALING;
downscaleStage = dsStage4x;
inputSurface = m_rawSurfaceToEnc;
inputFrameWidth = m_oriFrameWidth;
inputFrameHeight = m_oriFrameHeight;
outputScaledSurface4x = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
outputScaledFrameWidth4x = m_downscaledWidth4x;
outputScaledFrameHeight4x = m_downscaledHeight4x;
currRefList->b4xScalingUsed = true;
break;
case ds16xFromOrig:
encFunctionType = CODECHAL_MEDIA_STATE_16X_SCALING;
downscaleStage = dsStage16x;
inputSurface = m_trackedBuf->Get4xDsSurface(CODEC_CURR_TRACKED_BUFFER);
inputFrameWidth = m_downscaledWidth4x;
inputFrameHeight = m_downscaledHeight4x;
outputScaledSurface4x = m_trackedBuf->Get16xDsSurface(CODEC_CURR_TRACKED_BUFFER);
outputScaledFrameWidth4x = m_downscaledWidth16x;
outputScaledFrameHeight4x = m_downscaledHeight16x;
currRefList->b16xScalingUsed = true;
break;
case convFromOrig:
encFunctionType = CODECHAL_MEDIA_STATE_2X_SCALING;
downscaleStage = dsDisabled;
convertFlag = true;
inputSurface = params->psFormatConversionOnlyInputSurface;
inputFrameWidth = m_oriFrameWidth;
inputFrameHeight = m_oriFrameHeight;
outputConvertedSurface = params->psFormatConvertedSurface;
outputConvertedFrameWidth = m_oriFrameWidth;
outputConvertedFrameHeight = m_oriFrameHeight;
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported DownScale or Conversion type requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
if(convertFlag)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(
m_osInterface,
params->psFormatConvertedSurface));
}
//Setup Scaling DSH
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
m_stateHeapInterface,
kernelState,
false,
0,
false,
m_storeData));
MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
MOS_ZeroMemory(&idParams, sizeof(idParams));
idParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
m_stateHeapInterface,
1,
&idParams));
// Call Scaling-Conversion curbe
CodechalEncodeCscDs::CurbeParams scalingConversionCurbeParams;
MOS_ZeroMemory(&scalingConversionCurbeParams, sizeof(scalingConversionCurbeParams));
scalingConversionCurbeParams.pKernelState = kernelState;
scalingConversionCurbeParams.bConvertFlag = convertFlag;
scalingConversionCurbeParams.bUseLCU32 = !m_isMaxLcu64;
scalingConversionCurbeParams.downscaleStage = downscaleStage;
scalingConversionCurbeParams.dwInputPictureWidth = m_oriFrameWidth;
scalingConversionCurbeParams.dwInputPictureHeight = m_oriFrameHeight;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeScalingAndConversion(&scalingConversionCurbeParams));
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_DSH_TYPE,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
encFunctionType,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_ISH_TYPE,
kernelState));
)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
sendKernelCmdsParams.EncFunctionType = encFunctionType;
sendKernelCmdsParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
// Add binding table
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
m_stateHeapInterface,
kernelState));
// set binding table surfaces
SurfaceParamsDsConv scalingConversionSurfaceParams;
MOS_ZeroMemory(&scalingConversionSurfaceParams, sizeof(scalingConversionSurfaceParams));
scalingConversionSurfaceParams.psInputSurface = inputSurface;
scalingConversionSurfaceParams.dwInputFrameWidth = inputFrameWidth;
scalingConversionSurfaceParams.dwInputFrameHeight = inputFrameHeight;
scalingConversionSurfaceParams.psOutputScaledSurface2x = outputScaledSurface2x;
scalingConversionSurfaceParams.dwOutputScaledFrameWidth2x = outputScaledFrameWidth2x;
scalingConversionSurfaceParams.dwOutputScaledFrameHeight2x = outputScaledFrameHeight2x;
scalingConversionSurfaceParams.psOutputScaledSurface4x = outputScaledSurface4x;
scalingConversionSurfaceParams.dwOutputScaledFrameWidth4x = outputScaledFrameWidth4x;
scalingConversionSurfaceParams.dwOutputScaledFrameHeight4x = outputScaledFrameHeight4x;
scalingConversionSurfaceParams.pKernelState = kernelState;
scalingConversionSurfaceParams.pBindingTable = m_scalingAndConversionKernelBindingTable;
scalingConversionSurfaceParams.downScaleConversionType = params->stageDsConversion;
if(convertFlag)
{
scalingConversionSurfaceParams.psOutputConvertedSurface = outputConvertedSurface;
scalingConversionSurfaceParams.dwOutputConvertedFrameWidth = outputConvertedFrameWidth;
scalingConversionSurfaceParams.dwOutputConvertedFrameHeight = outputConvertedFrameHeight;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendScalingAndConversionSurfaces(
&cmdBuffer,
&scalingConversionSurfaceParams));
// Add dump for scaling surface state heap here
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState));
)
//Walker programming goes here
if (!m_hwWalker)
{
eStatus = MOS_STATUS_UNKNOWN;
CODECHAL_ENCODE_ASSERTMESSAGE("HW walker should be enabled.");
return eStatus;
}
uint32_t resolutionX = (params->stageDsConversion == ds16xFromOrig) ? m_downscaledWidth16x : m_downscaledWidth4x;
uint32_t resolutionY = (params->stageDsConversion == ds16xFromOrig) ? m_downscaledHeight16x : m_downscaledHeight4x;
CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
walkerCodecParams.WalkerMode = m_walkerMode;
walkerCodecParams.dwResolutionX = resolutionX >> 3; /* looping for Walker is needed at 8x8 block level */
walkerCodecParams.dwResolutionY = resolutionY >> 3;
walkerCodecParams.bNoDependency = true; /* Enforce no dependency dispatch order for Scaling kernel, */
walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
walkerCodecParams.ucGroupId = m_groupId;
MHW_WALKER_PARAMS walkerParams;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
m_hwInterface,
&walkerParams,
&walkerCodecParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
&cmdBuffer,
&walkerParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
// Add dump for scaling surface state heap here
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
m_stateHeapInterface,
kernelState));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
m_stateHeapInterface));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
encFunctionType,
nullptr)));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
m_lastTaskInPhase = false;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::PerformScalingAndConversion()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
// Walker must be used for HME call and scaling one
CODECHAL_ENCODE_ASSERT(m_hwWalker);
// Scaling occurs regardless of whether HME is in use for the current frame
CodechalEncodeCscDs::KernelParams params;
MOS_ZeroMemory(&params, sizeof(params));
params.stageDsConversion = m_hmeSupported ? (m_isMaxLcu64 ? ds2x4xFromOrig : ds4xFromOrig) : (m_isMaxLcu64 ? ds2xFromOrig : dsConvUnknown);
if (m_hevcSeqParams->bit_depth_luma_minus8)
{
params.stageDsConversion = DsStage(params.stageDsConversion | convFromOrig);
params.psFormatConvertedSurface = &m_formatConvertedSurface[0];
if(params.stageDsConversion == convFromOrig)
{
params.psFormatConversionOnlyInputSurface = m_rawSurfaceToEnc;
}
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeScalingAndConversionKernel(&params));
if(m_16xMeSupported)
{
params.stageDsConversion = ds16xFromOrig;
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeScalingAndConversionKernel(&params));
}
return eStatus;
}
bool CodechalEncHevcStateG10::CheckSupportedFormat(PMOS_SURFACE surface)
{
CODECHAL_ENCODE_FUNCTION_ENTER;
bool isColorFormatSupported = false;
if (nullptr == surface)
{
CODECHAL_ENCODE_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
return isColorFormatSupported;
}
switch (surface->Format)
{
case Format_NV12:
isColorFormatSupported = IS_Y_MAJOR_TILE_FORMAT(surface->TileType);
break;
case Format_P010:
isColorFormatSupported = true;
case Format_YUY2:
case Format_YUYV:
case Format_A8R8G8B8:
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Input surface color format = %d not supported!", surface->Format);
break;
}
return isColorFormatSupported;
}
MOS_STATUS CodechalEncHevcStateG10::EncodeMeKernel(
HmeLevel hmeLevel,
HEVC_ME_DIST_TYPE distType)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
PerfTagSetting perfTag;
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_ME_KERNEL);
CODECHAL_MEDIA_STATE_TYPE encFunctionType = (hmeLevel == HME_LEVEL_4x) ? CODECHAL_MEDIA_STATE_4X_ME : CODECHAL_MEDIA_STATE_16X_ME;
// Initialize DSH kernel state
PMHW_KERNEL_STATE kernelState = m_meKernelState;
// If Single Task Phase is not enabled, use BT count for the kernel state.
if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
{
uint32_t maxBtCount = m_singleTaskPhaseSupported ?
m_maxBtCount : kernelState->KernelParams.iBTCount;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
m_stateHeapInterface,
maxBtCount));
m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
// Set up the DSH/SSH as normal
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
m_stateHeapInterface,
kernelState,
false,
0,
false,
m_storeData));
MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
MOS_ZeroMemory(&idParams, sizeof(idParams));
idParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
m_stateHeapInterface,
1,
&idParams));
// Setup curbe for Me kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMe(
hmeLevel,
distType));
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_DSH_TYPE,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
encFunctionType,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_ISH_TYPE,
kernelState));
)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
sendKernelCmdsParams.EncFunctionType = encFunctionType;
sendKernelCmdsParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
// Add binding table
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
m_stateHeapInterface,
kernelState));
// Send surfaces for Me Kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMeSurfaces(&cmdBuffer, hmeLevel, distType));
uint32_t resolutionX = (hmeLevel == HME_LEVEL_4x) ? m_downscaledWidth4x : m_downscaledWidth16x;
uint32_t resolutionY = (hmeLevel == HME_LEVEL_4x) ? m_downscaledHeight4x : m_downscaledHeight16x;
// Work on 32x32 blocks
resolutionX >>= 5;
resolutionY >>= 5;
CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
walkerCodecParams.WalkerMode = m_walkerMode;
walkerCodecParams.dwResolutionX = resolutionX;
walkerCodecParams.dwResolutionY = resolutionY;
walkerCodecParams.bNoDependency = true;
walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
walkerCodecParams.ucGroupId = m_groupId;
MHW_WALKER_PARAMS walkerParams;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
m_hwInterface,
&walkerParams,
&walkerCodecParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
&cmdBuffer,
&walkerParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
// Add dump for Me surface state heap here
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
m_stateHeapInterface,
kernelState));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
m_stateHeapInterface));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
&cmdBuffer,
nullptr));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
encFunctionType,
nullptr)));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
m_lastTaskInPhase = false;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::EncodeBrcInitResetKernel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
PerfTagSetting perfTag;
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_INIT_RESET);
// Initialize DSH kernel state
CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = m_brcInit ? CODECHAL_HEVC_BRC_INIT : CODECHAL_HEVC_BRC_RESET;
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
// If Single Task Phase is not enabled, use BT count for the kernel state.
if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
{
uint32_t maxBtCount = m_singleTaskPhaseSupported ?
m_maxBtCount : kernelState->KernelParams.iBTCount;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
m_stateHeapInterface,
maxBtCount));
m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
// Set up the DSH/SSH as normal
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
m_stateHeapInterface,
kernelState,
false,
0,
false,
m_storeData));
MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
MOS_ZeroMemory(&idParams, sizeof(idParams));
idParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
m_stateHeapInterface,
1,
&idParams));
// Setup curbe for BrcInitReset kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcInitReset(brcKrnIdx));
CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_INIT_RESET;
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_DSH_TYPE,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
encFunctionType,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_ISH_TYPE,
kernelState));
)
//#if (_DEBUG || _RELEASE_INTERNAL)
// if (m_swBrcMode != nullptr)
// {
// CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcSwBrcImpl(
// m_debugInterface,
// encFunctionType,
// this,
// bBrcReset,
// kernelState,
// kernelState));
//
// return eStatus;
// }
//#endif // (_DEBUG || _RELEASE_INTERNAL)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
sendKernelCmdsParams.EncFunctionType = encFunctionType;
sendKernelCmdsParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
// Add binding table
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
m_stateHeapInterface,
kernelState));
// Send surfaces for BrcInitReset Kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcInitResetSurfaces(&cmdBuffer, brcKrnIdx));
MediaObjectInlineData mediaObjectInlineData;
MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
mediaObjectParams.pInlineData = &mediaObjectInlineData;
mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObject(
&cmdBuffer,
nullptr,
&mediaObjectParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
// Add dump for BrcInitReset surface state heap here
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
m_stateHeapInterface,
kernelState));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
m_stateHeapInterface));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
&cmdBuffer,
nullptr));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
encFunctionType,
nullptr)));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
m_lastTaskInPhase = false;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::EncodeBrcFrameUpdateKernel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
PerfTagSetting perfTag;
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE);
// Initialize DSH kernel state
CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_FRAME_UPDATE;
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
// If Single Task Phase is not enabled, use BT count for the kernel state.
if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
{
uint32_t maxBtCount = m_singleTaskPhaseSupported ?
m_maxBtCount : kernelState->KernelParams.iBTCount;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
m_stateHeapInterface,
maxBtCount));
m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
// Fill HCP_IMG_STATE so that BRC kernel can use it to generate the write buffer for PAK
MHW_VDBOX_HEVC_PIC_STATE mhwHevcPicState;
MOS_ZeroMemory(&mhwHevcPicState, sizeof(mhwHevcPicState));
mhwHevcPicState.pHevcEncSeqParams = m_hevcSeqParams;
mhwHevcPicState.pHevcEncPicParams = m_hevcPicParams;
mhwHevcPicState.bUseVDEnc = false;
mhwHevcPicState.brcNumPakPasses = m_mfxInterface->GetBrcNumPakPasses();
mhwHevcPicState.bSAOEnable = m_hevcSeqParams->SAO_enabled_flag ? (m_hevcSliceParams->slice_sao_luma_flag || m_hevcSliceParams->slice_sao_chroma_flag) : 0;
PMOS_RESOURCE brcHcpStateReadBuffer = &m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx];
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hcpInterface->AddHcpHevcPicBrcBuffer(brcHcpStateReadBuffer, &mhwHevcPicState));
PMOS_SURFACE brcConstantData = &m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx];
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetupBrcConstantTable(brcConstantData));
// Set up the DSH/SSH as normal
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
m_stateHeapInterface,
kernelState,
false,
0,
false,
m_storeData));
MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
MOS_ZeroMemory(&idParams, sizeof(idParams));
idParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
m_stateHeapInterface,
1,
&idParams));
// Setup curbe for BrcFrameUpdate kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(brcKrnIdx));
CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_BRC_UPDATE;
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_DSH_TYPE,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
encFunctionType,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_ISH_TYPE,
kernelState));
)
//#if (_DEBUG || _RELEASE_INTERNAL)
// if (m_swBrcMode != nullptr)
// {
// CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHal_DbgCallHevcSwBrcImpl(
// m_debugInterface,
// encFunctionType,
// this,
// false,
// kernelState,
// kernelState));
// return eStatus;
// }
//#endif // (_DEBUG || _RELEASE_INTERNAL)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
sendKernelCmdsParams.EncFunctionType = encFunctionType;
sendKernelCmdsParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
// Add binding table
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
m_stateHeapInterface,
kernelState));
// Send surfaces for BrcFrameUpdate Kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcFrameUpdateSurfaces(&cmdBuffer));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcHistoryBuffer,
CodechalDbgAttr::attrOutput,
"Input_HistoryBuffer",
m_brcHistoryBufferSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
CodechalDbgAttr::attrOutput,
"Input_PakStats",
m_brcPakStatisticsSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
CodechalDbgAttr::attrOutput,
"Input_ImgStateRead",
m_brcBuffers.dwBrcHcpPicStateSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
CodechalDbgAttr::attrOutput,
"Input_ImgStateWrite",
m_brcBuffers.dwBrcHcpPicStateSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
(MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
CodechalDbgAttr::attrOutput,
"Output_CombinedEnc",
m_brcCombinedEncBufferSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
CodechalDbgAttr::attrOutput,
"Input_ConstData",
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_brcBuffers.sMeBrcDistortionBuffer,
CodechalDbgAttr::attrOutput,
"Input_Distortion",
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
MHW_MEDIA_OBJECT_PARAMS mediaObjectParams;
MediaObjectInlineData mediaObjectInlineData;
MOS_ZeroMemory(&mediaObjectParams, sizeof(mediaObjectParams));
MOS_ZeroMemory(&mediaObjectInlineData, sizeof(mediaObjectInlineData));
mediaObjectParams.pInlineData = &mediaObjectInlineData;
mediaObjectParams.dwInlineDataSize = sizeof(mediaObjectInlineData);
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObject(
&cmdBuffer,
nullptr,
&mediaObjectParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
// Add dump for BrcFrameUpdate surface state heap here
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
m_stateHeapInterface,
kernelState));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
m_stateHeapInterface));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
&cmdBuffer,
nullptr));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
encFunctionType,
nullptr)));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
m_lastTaskInPhase = false;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::EncodeBrcLcuUpdateKernel()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
PerfTagSetting perfTag;
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_BRC_UPDATE_LCU);
CODECHAL_MEDIA_STATE_TYPE encFunctionType = CODECHAL_MEDIA_STATE_HEVC_BRC_LCU_UPDATE;
// Initialize DSH kernel state
CODECHAL_HEVC_BRC_KRNIDX brcKrnIdx = CODECHAL_HEVC_BRC_LCU_UPDATE;
PMHW_KERNEL_STATE kernelState = &m_brcKernelStates[brcKrnIdx];
// If Single Task Phase is not enabled, use BT count for the kernel state.
if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
{
uint32_t maxBtCount = m_singleTaskPhaseSupported ?
m_maxBtCount : kernelState->KernelParams.iBTCount;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
m_stateHeapInterface,
maxBtCount));
m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
// Set up the DSH/SSH as normal
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
m_stateHeapInterface,
kernelState,
false,
0,
false,
m_storeData));
MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
MOS_ZeroMemory(&idParams, sizeof(idParams));
idParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
m_stateHeapInterface,
1,
&idParams));
// Setup curbe for BrcLcuUpdate kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeBrcUpdate(brcKrnIdx));
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_DSH_TYPE,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
encFunctionType,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_ISH_TYPE,
kernelState));
)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
sendKernelCmdsParams.EncFunctionType = encFunctionType;
sendKernelCmdsParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
// Add binding table
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
m_stateHeapInterface,
kernelState));
// Send surfaces for BrcFrameUpdate Kernel
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendBrcLcuUpdateSurfaces(&cmdBuffer));
// For CNL thread space is 16x8 MB (regardless of LCU32 or LCU64)
uint32_t resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
resolutionX = MOS_ROUNDUP_SHIFT(resolutionX, 4);
uint32_t resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameHeight);
resolutionY = MOS_ROUNDUP_SHIFT(resolutionY, 3);
CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
walkerCodecParams.WalkerMode = m_walkerMode;
walkerCodecParams.dwResolutionX = resolutionX;
walkerCodecParams.dwResolutionY = resolutionY;
walkerCodecParams.bNoDependency = true;
walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
walkerCodecParams.ucGroupId = m_groupId;
walkerCodecParams.wPictureCodingType = m_pictureCodingType;
walkerCodecParams.bUseScoreboard = m_useHwScoreboard;
MHW_WALKER_PARAMS walkerParams;
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
m_hwInterface,
&walkerParams,
&walkerCodecParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
&cmdBuffer,
&walkerParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
// Add dump for BrcFrameUpdate surface state heap here
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
m_stateHeapInterface,
kernelState));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
m_stateHeapInterface));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
&cmdBuffer,
nullptr));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
encFunctionType,
nullptr)));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
m_lastTaskInPhase = false;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::GenerateWalkingControlRegion()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
uint32_t frameWidthInUnits = 0, frameHeightInUnits = 0;
int32_t copyBlockSize = 0, log2LCUSize = 0;
if (!m_isMaxLcu64)
{
frameWidthInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 32);
frameHeightInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameHeight, 32);
log2LCUSize = 5;
copyBlockSize = 18;
}
else
{
frameWidthInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameWidth, 64);
frameHeightInUnits = CODECHAL_ENCODE_HEVC_GET_SIZE_IN_LCU(m_frameHeight, 64);
log2LCUSize = 6;
copyBlockSize = 22;
}
int32_t sliceStartY[CODECHAL_HEVC_MAX_NUM_SLICES_LVL_5 + 1] = { 0 }; // Allocate +1 of max num slices
bool isArbitrarySlices = false;
for (uint32_t slice = 0; slice < m_numSlices; slice++)
{
if (m_hevcSliceParams[slice].slice_segment_address % frameWidthInUnits)
{
isArbitrarySlices = true;
}
else
{
sliceStartY[slice] = m_hevcSliceParams[slice].slice_segment_address / frameWidthInUnits;
}
}
sliceStartY[m_numSlices] = frameHeightInUnits;
const uint32_t regionStartYOffset = 32;
uint16_t regionsStartTable[64] = { 0 };
uint32_t numRegions = 1;
int32_t maxHeight = 0;
uint32_t numUnitInRegion = 0, height = 0, numSlices = 0;
if (isArbitrarySlices)
{
height = frameHeightInUnits;
numSlices = 1;
maxHeight = height;
if (m_numRegionsInSlice > 1)
{
numUnitInRegion =
(frameWidthInUnits + 2 * (frameHeightInUnits - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
numRegions = m_numRegionsInSlice;
for (uint32_t i = 1; i < m_numRegionsInSlice; i++)
{
uint32_t front = i*numUnitInRegion;
if (front < frameWidthInUnits)
{
regionsStartTable[i] = (uint16_t)front;
}
else if (((front - frameWidthInUnits + 1) & 1) == 0)
{
regionsStartTable[i] = (uint16_t)frameWidthInUnits - 1;
}
else
{
regionsStartTable[i] = (uint16_t)frameWidthInUnits - 2;
}
regionsStartTable[regionStartYOffset + i] = (uint16_t)((front - regionsStartTable[i]) >> 1);
}
}
}
else
{
maxHeight = 0;
numSlices = m_numSlices;
for (uint32_t slice = 0; slice < numSlices; slice++)
{
int32_t sliceHeight = sliceStartY[slice + 1] - sliceStartY[slice];
if (sliceHeight > maxHeight)
{
maxHeight = sliceHeight;
}
}
bool sliceIsMerged = false;
while (!sliceIsMerged)
{
int32_t newNumSlices = 1;
int32_t startY = 0;
for (uint32_t slice = 1; slice < numSlices; slice++)
{
if ((sliceStartY[slice + 1] - startY) <= maxHeight)
{
sliceStartY[slice] = -1;
}
else
{
startY = sliceStartY[slice];
}
}
for (uint32_t slice = 1; slice < numSlices; slice++)
{
if (sliceStartY[slice] > 0)
{
sliceStartY[newNumSlices] = sliceStartY[slice];
newNumSlices++;
}
}
numSlices = newNumSlices;
sliceStartY[numSlices] = frameHeightInUnits;
/* very rough estimation */
if (numSlices * m_numRegionsInSlice <= CODECHAL_MEDIA_WALKER_MAX_COLORS)
{
sliceIsMerged = true;
}
else
{
int32_t num = 1;
maxHeight = frameHeightInUnits;
for (uint32_t slice = 0; slice < numSlices - 1; slice++)
{
if ((sliceStartY[slice + 2] - sliceStartY[slice]) <= maxHeight)
{
maxHeight = sliceStartY[slice + 2] - sliceStartY[slice];
num = slice + 1;
}
}
for (uint32_t slice = num; slice < numSlices; slice++)
{
sliceStartY[slice] = sliceStartY[slice + 1];
}
numSlices--;
}
}
numUnitInRegion =
(frameWidthInUnits + 2 * (maxHeight - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
numRegions = numSlices * m_numRegionsInSlice;
CODECHAL_ENCODE_ASSERT(numRegions != 0 && numRegions <= CODECHAL_MEDIA_WALKER_MAX_COLORS); // Making sure that the number of regions is at least 1
for (uint32_t slice = 0; slice < numSlices; slice++)
{
regionsStartTable[slice * m_numRegionsInSlice] = 0;
regionsStartTable[regionStartYOffset + (slice * m_numRegionsInSlice)] = (uint16_t)sliceStartY[slice];
for (uint32_t i = 1; i < m_numRegionsInSlice; i++)
{
uint32_t front = i*numUnitInRegion;
if (front < frameWidthInUnits)
{
regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)front;
}
else if (((front - frameWidthInUnits + 1) & 1) == 0)
{
regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)frameWidthInUnits - 1;
}
else
{
regionsStartTable[slice * m_numRegionsInSlice + i] = (uint16_t)frameWidthInUnits - 2;
}
regionsStartTable[regionStartYOffset + (slice * m_numRegionsInSlice + i)] = (uint16_t)sliceStartY[slice] +
((front - regionsStartTable[i]) >> 1);
}
}
height = maxHeight;
}
uint16_t datatmp[32][32] = { {0} };
for (uint32_t k = 0; k < numSlices; k++)
{
int32_t nearestReg = 0;
int32_t minDelta = m_frameHeight;
int32_t curLcuPelY = regionsStartTable[regionStartYOffset + (k * m_numRegionsInSlice)] << log2LCUSize;
int32_t tsWidth = frameWidthInUnits;
int32_t tsHeight = height;
int32_t offsetY = -((tsWidth + 1) >> 1);
int32_t offsetDelta = ((tsWidth + ((tsHeight - 1) << 1)) + (m_numRegionsInSlice - 1)) / (m_numRegionsInSlice);
for (int32_t i = 0; i < (int32_t)numRegions; i++)
{
if (regionsStartTable[i] == 0)
{
int32_t delta = curLcuPelY - (regionsStartTable[regionStartYOffset + i] << log2LCUSize);
if (delta >= 0)
{
if (delta < minDelta)
{
minDelta = delta;
nearestReg = i;
}
}
}
}
for (uint32_t i = 0; i < m_numRegionsInSlice; i++)
{
datatmp[k * m_numRegionsInSlice + i][0] = (uint16_t)(sliceStartY[k] * frameWidthInUnits);
datatmp[k * m_numRegionsInSlice + i][1] = (uint16_t)((k == (numSlices - 1)) ? (frameWidthInUnits * frameHeightInUnits) : sliceStartY[k + 1] * frameWidthInUnits); //m_info.SliceStartAddr[k+1]
datatmp[k * m_numRegionsInSlice + i][2] = (uint16_t)(k * m_numRegionsInSlice + i);
if (!m_isMaxLcu64 && m_numRegionsInSlice == 1)
{
continue;
}
datatmp[k * m_numRegionsInSlice + i][3] = (uint16_t)height;
datatmp[k * m_numRegionsInSlice + i][4] = regionsStartTable[nearestReg + i];
datatmp[k * m_numRegionsInSlice + i][5] = regionsStartTable[regionStartYOffset + (nearestReg + i)];
datatmp[k * m_numRegionsInSlice + i][6] = regionsStartTable[regionStartYOffset + nearestReg];
int32_t tmpY = regionsStartTable[regionStartYOffset + (nearestReg + m_numRegionsInSlice)];
datatmp[k * m_numRegionsInSlice + i][7] = (uint16_t)((tmpY != 0) ? tmpY : (frameHeightInUnits));
datatmp[k * m_numRegionsInSlice + i][8] = (uint16_t)(offsetY + regionsStartTable[regionStartYOffset + nearestReg] + ((i * offsetDelta) >> 1));
if (m_isMaxLcu64)
{
datatmp[k * m_numRegionsInSlice + i][9] = (uint16_t)((frameWidthInUnits + 2 * (maxHeight - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice);
datatmp[k * m_numRegionsInSlice + i][10] = (uint16_t)numRegions;
}
}
}
MOS_LOCK_PARAMS lockFlags;
MOS_ZeroMemory(&lockFlags, sizeof(lockFlags));
lockFlags.WriteOnly = true;
PCODECHAL_ENC_HEVC_CONCURRENT_THREAD_GROUP_DATA_G10 region = (PCODECHAL_ENC_HEVC_CONCURRENT_THREAD_GROUP_DATA_G10)m_osInterface->pfnLockResource(
m_osInterface,
&m_concurrentThreadGroupData.sResource,
&lockFlags);
CODECHAL_ENCODE_CHK_NULL_RETURN(region);
MOS_ZeroMemory(region, m_concurrentThreadGroupData.dwSize);
//Concurrent Thread Group Surface size = 16*64
for (auto i = 0; i < CODECHAL_MEDIA_WALKER_MAX_COLORS; i++)
{
MOS_SecureMemcpy((uint8_t*)region, copyBlockSize, (uint8_t*)datatmp[i], copyBlockSize);
region++;
}
MOS_ZeroMemory(&m_walkingPatternParam, sizeof(m_walkingPatternParam));
m_walkingPatternParam.dwMaxHeightInRegion = maxHeight;
m_walkingPatternParam.dwNumRegion = numRegions;
m_walkingPatternParam.dwNumUnitsInRegion = (frameWidthInUnits + 2 * (maxHeight - 1) + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
m_osInterface->pfnUnlockResource(
m_osInterface,
&m_concurrentThreadGroupData.sResource);
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_concurrentThreadGroupData.sResource,
CodechalDbgAttr::attrInput,
"ConcurrentThreadGroupData_In",
m_concurrentThreadGroupData.dwSize,
0,
m_pictureCodingType == I_TYPE ? CODECHAL_MEDIA_STATE_HEVC_I_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
)
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::GetCustomDispatchPattern(
PMHW_WALKER_PARAMS walkerParams,
PMHW_VFE_SCOREBOARD scoreBoard,
PCODECHAL_WALKER_CODEC_PARAMS walkerCodecParams)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(walkerParams);
CODECHAL_ENCODE_CHK_NULL_RETURN(scoreBoard);
CODECHAL_ENCODE_CHK_NULL_RETURN(walkerCodecParams);
MOS_ZeroMemory(walkerParams, sizeof(*walkerParams));
walkerParams->ColorCountMinusOne = m_walkingPatternParam.dwNumRegion - 1;
CODECHAL_ENCODE_ASSERT(walkerParams->ColorCountMinusOne <= CODECHAL_MEDIA_WALKER_MAX_COLORS);
walkerParams->WalkerMode =
(MHW_WALKER_MODE)walkerCodecParams->WalkerMode;
walkerParams->UseScoreboard = walkerCodecParams->bUseScoreboard;
walkerParams->dwLocalLoopExecCount = 0xFFF; //MAX VALUE
walkerParams->dwGlobalLoopExecCount = 0xFFF; //MAX VALUE
MOS_ZeroMemory(scoreBoard, sizeof(*scoreBoard));
switch(walkerCodecParams->WalkerDegree)
{
case CODECHAL_26_DEGREE:
// Walker Params
if (m_numRegionsInSlice > 1)
{
int32_t threadSpaceWidth = walkerCodecParams->dwResolutionX;
int32_t threadSpaceHeight = m_walkingPatternParam.dwMaxHeightInRegion;
int32_t tsWidth = threadSpaceWidth;
int32_t tsHeight = threadSpaceHeight;
int32_t tmpHeight = (tsHeight + 1) & 0xfffe;
tsHeight = tmpHeight;
tmpHeight = ((tsWidth + 1) >> 1) + ((tsWidth + ((tmpHeight - 1) << 1)) + (2 * m_numRegionsInSlice - 1)) / (2 * m_numRegionsInSlice);
walkerParams->BlockResolution.x = tsWidth;
walkerParams->BlockResolution.y = tmpHeight;
walkerParams->GlobalStart.x = 0;
walkerParams->GlobalStart.y = 0;
walkerParams->GlobalResolution.x = tsWidth;
walkerParams->GlobalResolution.y = tmpHeight;
walkerParams->LocalStart.x = (tsWidth + 1) & 0xfffe;
walkerParams->LocalStart.y = 0;
walkerParams->LocalEnd.x = 0;
walkerParams->LocalEnd.y = 0;
walkerParams->GlobalOutlerLoopStride.x = tsWidth;
walkerParams->GlobalOutlerLoopStride.y = 0;
walkerParams->GlobalInnerLoopUnit.x = 0;
walkerParams->GlobalInnerLoopUnit.y = tmpHeight;
// 26 degree walking pattern
walkerParams->ScoreboardMask = 0x7F;
walkerParams->LocalOutLoopStride.x = 1;
walkerParams->LocalOutLoopStride.y = 0;
walkerParams->LocalInnerLoopUnit.x = MOS_BITFIELD_VALUE((uint32_t)-2, 16); // Gen9: 0xFFE Gen6,8: 0x3FE
walkerParams->LocalInnerLoopUnit.y = 1;
walkerParams->dwGlobalLoopExecCount = 0;
walkerParams->dwLocalLoopExecCount = (threadSpaceWidth + (tsHeight - 1) * 2 + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
}
else
{
walkerParams->BlockResolution.x = walkerCodecParams->dwResolutionX;
walkerParams->BlockResolution.y = walkerCodecParams->dwResolutionY;
walkerParams->GlobalResolution.x = walkerParams->BlockResolution.x;
walkerParams->GlobalResolution.y = walkerParams->BlockResolution.y;
walkerParams->GlobalOutlerLoopStride.x = walkerParams->BlockResolution.x;
walkerParams->GlobalOutlerLoopStride.y = 0;
walkerParams->GlobalInnerLoopUnit.x = 0;
walkerParams->GlobalInnerLoopUnit.y = walkerParams->BlockResolution.y;
// 26 degree walking pattern
walkerParams->ScoreboardMask = 0x7F;
walkerParams->LocalOutLoopStride.x = 1;
walkerParams->LocalOutLoopStride.y = 0;
walkerParams->LocalInnerLoopUnit.x = MOS_BITFIELD_VALUE((uint32_t)-2, 16); // Gen9: 0xFFE Gen6,8: 0x3FE
walkerParams->LocalInnerLoopUnit.y = 1;
}
// Scoreboard Settings
scoreBoard->ScoreboardMask = 0x7F;
scoreBoard->ScoreboardEnable = true;
// Scoreboard 0
scoreBoard->ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[0].y = 0;
// Scoreboard 1
scoreBoard->ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[1].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 2
scoreBoard->ScoreboardDelta[2].x = 0;
scoreBoard->ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 3
scoreBoard->ScoreboardDelta[3].x = 1;
scoreBoard->ScoreboardDelta[3].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 4
scoreBoard->ScoreboardDelta[4].x = 0;
scoreBoard->ScoreboardDelta[4].y = 0;
// Scoreboard 5
scoreBoard->ScoreboardDelta[5].x = 0;
scoreBoard->ScoreboardDelta[5].y = 0;
// Scoreboard 6
scoreBoard->ScoreboardDelta[6].x = 0;
scoreBoard->ScoreboardDelta[6].y = 0;
// Scoreboard 7
scoreBoard->ScoreboardDelta[7].x = 0;
scoreBoard->ScoreboardDelta[7].y = 0;
break;
case CODECHAL_26Z_DEGREE:
// Walker Params
// 26z degree walking pattern used for HEVC
walkerParams->ScoreboardMask = 0x7f;
walkerParams->GlobalResolution.x = walkerCodecParams->dwResolutionX;
walkerParams->GlobalResolution.y = walkerCodecParams->dwResolutionY;
// 26 degree in the global loop
walkerParams->GlobalOutlerLoopStride.x = 2;
walkerParams->GlobalOutlerLoopStride.y = 0;
walkerParams->GlobalInnerLoopUnit.x = 0xFFF -4 + 1; // -4 in 2's compliment format
walkerParams->GlobalInnerLoopUnit.y = 2;
// z-order in the local loop
walkerParams->LocalOutLoopStride.x = 0;
walkerParams->LocalOutLoopStride.y = 1;
walkerParams->LocalInnerLoopUnit.x = 1;
walkerParams->LocalInnerLoopUnit.y = 0;
// dispatch 4 threads together in one LCU
walkerParams->BlockResolution.x = 2;
walkerParams->BlockResolution.y = 2;
// Scoreboard Settings
scoreBoard->ScoreboardType = m_hwScoreboardType;
scoreBoard->ScoreboardMask = 0x7F;
scoreBoard->ScoreboardEnable = true;
// Scoreboard 0
scoreBoard->ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[0].y = 1;
// Scoreboard 1
scoreBoard->ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[1].y = 0;
// Scoreboard 2
scoreBoard->ScoreboardDelta[2].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 3
scoreBoard->ScoreboardDelta[3].x = 0;
scoreBoard->ScoreboardDelta[3].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 4
scoreBoard->ScoreboardDelta[4].x = 1;
scoreBoard->ScoreboardDelta[4].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
break;
case CODECHAL_26X_DEGREE:
// Walker Params
if (m_numRegionsInSlice > 1)
{
int32_t threadSpaceWidth = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, 32);
int32_t tsWidth = threadSpaceWidth;
int32_t tsHeight = m_walkingPatternParam.dwMaxHeightInRegion;
int32_t tmpHeight = (tsHeight + 1) & 0xfffe;
tsHeight = tmpHeight;
tmpHeight = ((tsWidth + 1) >> 1) + ((tsWidth + ((tmpHeight - 1) << 1)) + (2 * m_numRegionsInSlice - 1)) / (2 * m_numRegionsInSlice);
tmpHeight *= (walkerCodecParams->usTotalThreadNumPerLcu);
walkerParams->ScoreboardMask = 0xff;
walkerParams->GlobalResolution.x = tsWidth;
walkerParams->GlobalResolution.y = tmpHeight;
walkerParams->GlobalStart.x = 0;
walkerParams->GlobalStart.y = 0;
walkerParams->LocalStart.x = (tsWidth + 1) & 0xfffe;
walkerParams->LocalStart.y = 0;
walkerParams->LocalEnd.x = 0;
walkerParams->LocalEnd.y = 0;
walkerParams->GlobalOutlerLoopStride.x = tsWidth;
walkerParams->GlobalOutlerLoopStride.y = 0;
walkerParams->GlobalInnerLoopUnit.x = 0;
walkerParams->GlobalInnerLoopUnit.y = tmpHeight;
walkerParams->LocalOutLoopStride.x = 1;
walkerParams->LocalOutLoopStride.y = 0;
walkerParams->LocalInnerLoopUnit.x = MOS_BITFIELD_VALUE((uint32_t)-2, 16);
walkerParams->LocalInnerLoopUnit.y = walkerCodecParams->usTotalThreadNumPerLcu;
walkerParams->MiddleLoopExtraSteps = walkerCodecParams->usTotalThreadNumPerLcu - 1;
walkerParams->MidLoopUnitX = 0;
walkerParams->MidLoopUnitY = 1;
walkerParams->BlockResolution.x = walkerParams->GlobalResolution.x;
walkerParams->BlockResolution.y = walkerParams->GlobalResolution.y;
walkerParams->dwGlobalLoopExecCount = 0;
walkerParams->dwLocalLoopExecCount = (threadSpaceWidth + (tsHeight - 1) * 2 + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
}
else
{
walkerParams->ScoreboardMask = 0xff;
walkerParams->GlobalResolution.x = walkerCodecParams->dwResolutionX;
walkerParams->GlobalResolution.y = walkerCodecParams->dwResolutionY * walkerCodecParams->usTotalThreadNumPerLcu;
walkerParams->GlobalOutlerLoopStride.x = walkerParams->GlobalResolution.x;
walkerParams->GlobalOutlerLoopStride.y = 0;
walkerParams->GlobalInnerLoopUnit.x = 0;
walkerParams->GlobalInnerLoopUnit.y = walkerParams->GlobalResolution.y;
walkerParams->LocalOutLoopStride.x = 1;
walkerParams->LocalOutLoopStride.y = 0;
walkerParams->LocalInnerLoopUnit.x = 0xFFF - 2 + 1; // -2 in 2's compliment format;
walkerParams->LocalInnerLoopUnit.y = walkerCodecParams->usTotalThreadNumPerLcu;
walkerParams->MiddleLoopExtraSteps = walkerCodecParams->usTotalThreadNumPerLcu - 1;
walkerParams->MidLoopUnitX = 0;
walkerParams->MidLoopUnitY = 1;
walkerParams->BlockResolution.x = walkerParams->GlobalResolution.x;
walkerParams->BlockResolution.y = walkerParams->GlobalResolution.y;
}
// Scoreboard Settings
scoreBoard->ScoreboardType = m_hwScoreboardType;
scoreBoard->ScoreboardMask = 0xff;
scoreBoard->ScoreboardEnable = true;
// Scoreboard 0
scoreBoard->ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[0].y = walkerCodecParams->usTotalThreadNumPerLcu - 1;
// Scoreboard 1
scoreBoard->ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[1].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 2
scoreBoard->ScoreboardDelta[2].x = 0;
scoreBoard->ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 3
scoreBoard->ScoreboardDelta[3].x = 1;
scoreBoard->ScoreboardDelta[3].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 4
scoreBoard->ScoreboardDelta[4].x = 0;
scoreBoard->ScoreboardDelta[4].y = - walkerCodecParams->usTotalThreadNumPerLcu;
// Scoreboard 5
scoreBoard->ScoreboardDelta[5].x = 0;
scoreBoard->ScoreboardDelta[5].y = MOS_BITFIELD_VALUE((uint32_t)-2, 4);
// Scoreboard 6
scoreBoard->ScoreboardDelta[6].x = 0;
scoreBoard->ScoreboardDelta[6].y = MOS_BITFIELD_VALUE((uint32_t)-3, 4);
// Scoreboard 7
scoreBoard->ScoreboardDelta[7].x = 0;
scoreBoard->ScoreboardDelta[7].y = MOS_BITFIELD_VALUE((uint32_t)-4, 4);
break;
case CODECHAL_26ZX_DEGREE:
{
const int32_t Mw_26zx_H_Factor = 5;
// Walker Params
if (m_numRegionsInSlice > 1)
{
int32_t threadSpaceWidth = CODECHAL_GET_WIDTH_IN_BLOCKS(m_frameWidth, 64);
int32_t threadSpaceHeight = (m_walkingPatternParam.dwMaxHeightInRegion);
int32_t spWidth = (threadSpaceWidth + 1) & 0xfffe;
int32_t spHeight = (threadSpaceHeight + 1) & 0xfffe;
int32_t numUnitInRegion = (spWidth + (spHeight - 1) * 2 + m_numRegionsInSlice - 1) / m_numRegionsInSlice;
spHeight = ((spWidth + 1) >> 1) + ((spWidth + ((spHeight - 1) << 1)) + (2 * m_numRegionsInSlice - 1)) / (2 * m_numRegionsInSlice);
int32_t tsWidth = spWidth * Mw_26zx_H_Factor;
int32_t tsHeight = spHeight * (walkerCodecParams->usTotalThreadNumPerLcu);
walkerParams->ScoreboardMask = 0xff;
walkerParams->GlobalResolution.x = tsWidth;
walkerParams->GlobalResolution.y = tsHeight;
walkerParams->GlobalStart.x = 0;
walkerParams->GlobalStart.y = 0;
walkerParams->LocalStart.x = walkerParams->GlobalResolution.x;
walkerParams->LocalStart.y = 0;
walkerParams->LocalEnd.x = 0;
walkerParams->LocalEnd.y = 0;
walkerParams->GlobalOutlerLoopStride.x = walkerParams->GlobalResolution.x;
walkerParams->GlobalOutlerLoopStride.y = 0;
walkerParams->GlobalInnerLoopUnit.x = 0;
walkerParams->GlobalInnerLoopUnit.y = walkerParams->GlobalResolution.y;
walkerParams->LocalOutLoopStride.x = 1;
walkerParams->LocalOutLoopStride.y = 0;
walkerParams->LocalInnerLoopUnit.x = -Mw_26zx_H_Factor * 2;
walkerParams->LocalInnerLoopUnit.y = walkerCodecParams->usTotalThreadNumPerLcu;
walkerParams->MiddleLoopExtraSteps = walkerCodecParams->usTotalThreadNumPerLcu - 1;
walkerParams->MidLoopUnitX = 0;
walkerParams->MidLoopUnitY = 1;
walkerParams->BlockResolution.x = walkerParams->GlobalResolution.x;
walkerParams->BlockResolution.y = walkerParams->GlobalResolution.y;
walkerParams->dwGlobalLoopExecCount = 0;
walkerParams->dwLocalLoopExecCount = (numUnitInRegion + 1) * Mw_26zx_H_Factor;
}
else
{
walkerParams->ScoreboardMask = 0xff;
walkerParams->GlobalResolution.x = walkerCodecParams->dwResolutionX * Mw_26zx_H_Factor;
walkerParams->GlobalResolution.y = walkerCodecParams->dwResolutionY * walkerCodecParams->usTotalThreadNumPerLcu;
walkerParams->GlobalOutlerLoopStride.x = walkerParams->GlobalResolution.x;
walkerParams->GlobalOutlerLoopStride.y = 0;
walkerParams->GlobalInnerLoopUnit.x = 0;
walkerParams->GlobalInnerLoopUnit.y = walkerParams->GlobalResolution.y;
walkerParams->LocalOutLoopStride.x = 1;
walkerParams->LocalOutLoopStride.y = 0;
walkerParams->LocalInnerLoopUnit.x = 0xFFF - 10 + 1; // -10 in 2's compliment format;
walkerParams->LocalInnerLoopUnit.y = walkerCodecParams->usTotalThreadNumPerLcu;
walkerParams->MiddleLoopExtraSteps = walkerCodecParams->usTotalThreadNumPerLcu - 1;
walkerParams->MidLoopUnitX = 0;
walkerParams->MidLoopUnitY = 1;
walkerParams->BlockResolution.x = walkerParams->GlobalResolution.x;
walkerParams->BlockResolution.y = walkerParams->GlobalResolution.y;
}
// Scoreboard Settings
scoreBoard->ScoreboardType = m_hwScoreboardType;
scoreBoard->ScoreboardMask = 0xff;
scoreBoard->ScoreboardEnable = true;
// Scoreboard 0
scoreBoard->ScoreboardDelta[0].x = MOS_BITFIELD_VALUE((uint32_t)-5, 4);
scoreBoard->ScoreboardDelta[0].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 1
scoreBoard->ScoreboardDelta[1].x = MOS_BITFIELD_VALUE((uint32_t)-2, 4);
scoreBoard->ScoreboardDelta[1].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 2
scoreBoard->ScoreboardDelta[2].x = 3;
scoreBoard->ScoreboardDelta[2].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 3
scoreBoard->ScoreboardDelta[3].x = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
scoreBoard->ScoreboardDelta[3].y = 0;
// Scoreboard 4
scoreBoard->ScoreboardDelta[4].x = MOS_BITFIELD_VALUE((uint32_t)-2, 4);
scoreBoard->ScoreboardDelta[4].y = walkerCodecParams->usTotalThreadNumPerLcu - 1;
// Scoreboard 5
scoreBoard->ScoreboardDelta[5].x = MOS_BITFIELD_VALUE((uint32_t)-5, 4);
scoreBoard->ScoreboardDelta[5].y = walkerCodecParams->usTotalThreadNumPerLcu - 1;
// Scoreboard 6
scoreBoard->ScoreboardDelta[6].x = 0;
scoreBoard->ScoreboardDelta[6].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
// Scoreboard 7
scoreBoard->ScoreboardDelta[7].x = 5;
scoreBoard->ScoreboardDelta[7].y = MOS_BITFIELD_VALUE((uint32_t)-1, 4);
break;
}
default:
eStatus = MOS_STATUS_INVALID_PARAMETER;
}
#if 0
// leave this for future debug
CODECHAL_ENCODE_NORMALMESSAGE("Regions= = %d", dwNumRegionsInSlice);
CODECHAL_ENCODE_NORMALMESSAGE("threads = %d", walkerCodecParams->usTotalThreadNumPerLcu);
CODECHAL_ENCODE_NORMALMESSAGE("width,height = %d,%d", walkerCodecParams->dwResolutionX, walkerCodecParams->dwResolutionY);
CODECHAL_ENCODE_NORMALMESSAGE("InterfaceDescriptorOffset = %d", walkerParams->InterfaceDescriptorOffset);
CODECHAL_ENCODE_NORMALMESSAGE("UseScoreboard = %d", walkerParams->UseScoreboard);
CODECHAL_ENCODE_NORMALMESSAGE("ScoreboardMask = %d", walkerParams->ScoreboardMask);
CODECHAL_ENCODE_NORMALMESSAGE("ColorCountMinusOne = %d", walkerParams->ColorCountMinusOne);
CODECHAL_ENCODE_NORMALMESSAGE("GroupIdLoopSelect = %d", walkerParams->GroupIdLoopSelect);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalResolution.x = %d", walkerParams->GlobalResolution.x);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalResolution.y = %d", walkerParams->GlobalResolution.y);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalStart.x = %d", walkerParams->GlobalStart.x);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalStart.y = %d", walkerParams->GlobalStart.y);
CODECHAL_ENCODE_NORMALMESSAGE("LocalStart.x = %d", walkerParams->LocalStart.x);
CODECHAL_ENCODE_NORMALMESSAGE("LocalStart.y = %d", walkerParams->LocalStart.y);
CODECHAL_ENCODE_NORMALMESSAGE("LocalEnd.x = %d", walkerParams->LocalEnd.x);
CODECHAL_ENCODE_NORMALMESSAGE("LocalEnd.y = %d", walkerParams->LocalEnd.y);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalOutlerLoopStride.x = %d", walkerParams->GlobalOutlerLoopStride.x);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalOutlerLoopStride.y = %d", walkerParams->GlobalOutlerLoopStride.y);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalInnerLoopUnit.x = %d", walkerParams->GlobalInnerLoopUnit.x);
CODECHAL_ENCODE_NORMALMESSAGE("GlobalInnerLoopUnit.y = %d", walkerParams->GlobalInnerLoopUnit.y);
CODECHAL_ENCODE_NORMALMESSAGE("LocalOutLoopStride.x = %d", walkerParams->LocalOutLoopStride.x);
CODECHAL_ENCODE_NORMALMESSAGE("LocalOutLoopStride.y = %d", walkerParams->LocalOutLoopStride.y);
CODECHAL_ENCODE_NORMALMESSAGE("LocalInnerLoopUnit.x = %d", walkerParams->LocalInnerLoopUnit.x);
CODECHAL_ENCODE_NORMALMESSAGE("LocalInnerLoopUnit.y = %d", walkerParams->LocalInnerLoopUnit.y);
CODECHAL_ENCODE_NORMALMESSAGE("MiddleLoopExtraSteps = %d", walkerParams->MiddleLoopExtraSteps);
CODECHAL_ENCODE_NORMALMESSAGE("MidLoopUnitX = %d", walkerParams->MidLoopUnitX);
CODECHAL_ENCODE_NORMALMESSAGE("MidLoopUnitY = %d", walkerParams->MidLoopUnitY);
CODECHAL_ENCODE_NORMALMESSAGE("BlockResolution.x = %d", walkerParams->BlockResolution.x);
CODECHAL_ENCODE_NORMALMESSAGE("BlockResolution.y = %d", walkerParams->BlockResolution.y);
CODECHAL_ENCODE_NORMALMESSAGE("dwGlobalLoopExecCount = %d", walkerParams->dwGlobalLoopExecCount);
CODECHAL_ENCODE_NORMALMESSAGE("dwLocalLoopExecCount = %d", walkerParams->dwLocalLoopExecCount);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[0].x = %d", scoreBoard->ScoreboardDelta[0].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[0].y = %d", scoreBoard->ScoreboardDelta[0].y);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[1].x = %d", scoreBoard->ScoreboardDelta[1].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[1].y = %d", scoreBoard->ScoreboardDelta[1].y);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[2].x = %d", scoreBoard->ScoreboardDelta[2].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[2].y = %d", scoreBoard->ScoreboardDelta[2].y);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[3].x = %d", scoreBoard->ScoreboardDelta[3].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[3].y = %d", scoreBoard->ScoreboardDelta[3].y);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[4].x = %d", scoreBoard->ScoreboardDelta[4].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[4].y = %d", scoreBoard->ScoreboardDelta[4].y);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[5].x = %d", scoreBoard->ScoreboardDelta[5].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[5].y = %d", scoreBoard->ScoreboardDelta[5].y);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[6].x = %d", scoreBoard->ScoreboardDelta[6].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[6].y = %d", scoreBoard->ScoreboardDelta[6].y);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[7].x = %d", scoreBoard->ScoreboardDelta[7].x);
CODECHAL_ENCODE_NORMALMESSAGE("scoreBoard->ScoreboardDelta[7].y = %d", scoreBoard->ScoreboardDelta[7].y);
#endif
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::EncodeMbEncKernel(CODECHAL_MEDIA_STATE_TYPE encFunctionType)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
PerfTagSetting perfTag;
CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_MBENC_KERNEL);
// Initialize DSH kernel state
PMHW_KERNEL_STATE kernelState = nullptr;
CODECHAL_WALKER_DEGREE walkerDegree = CODECHAL_NO_DEGREE;
uint32_t totalThreadNumPerLcu = 1;
bool customDispatchPattern = false, verticalDispatch = false;
uint32_t walkerResolutionX = 0, walkerResolutionY = 0;
uint32_t numRegionsInSliceSave = m_numRegionsInSlice; // Save the original value in HEVC state
switch(encFunctionType)
{
case CODECHAL_MEDIA_STATE_HEVC_I_MBENC:
kernelState = &m_mbEncKernelStates[MBENC_I_KRNIDX];
if (m_isMaxLcu64)
{
m_numRegionsInSlice = 1;
walkerDegree = CODECHAL_26_DEGREE;
customDispatchPattern = false;
}
else
{
verticalDispatch = true;
}
walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
break;
case CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC:
kernelState = &m_mbEncKernelStates[MBENC_B_LCU64_KRNIDX];
walkerDegree = CODECHAL_26ZX_DEGREE;
walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, MAX_LCU_SIZE) >> 6;
walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, MAX_LCU_SIZE) >> 6;
totalThreadNumPerLcu = m_totalNumThreadsPerLcu;
customDispatchPattern = true;
break;
case CODECHAL_MEDIA_STATE_HEVC_B_MBENC:
kernelState = &m_mbEncKernelStates[MBENC_B_LCU32_KRNIDX];
walkerDegree = (m_hevcSeqParams->TargetUsage == 7) ? CODECHAL_26_DEGREE : CODECHAL_26X_DEGREE;
walkerResolutionX = MOS_ALIGN_CEIL(m_frameWidth, 32) >> 5;
walkerResolutionY = MOS_ALIGN_CEIL(m_frameHeight, 32) >> 5;
totalThreadNumPerLcu = m_totalNumThreadsPerLcu;
customDispatchPattern = true;
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MB Enc Media State type");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
walkerCodecParams.WalkerMode = m_walkerMode;
walkerCodecParams.bUseScoreboard = m_useHwScoreboard;
walkerCodecParams.dwResolutionX = walkerResolutionX;
walkerCodecParams.dwResolutionY = walkerResolutionY;
walkerCodecParams.dwNumSlices = m_numSlices;
walkerCodecParams.WalkerDegree = walkerDegree;
walkerCodecParams.bUseVerticalRasterScan = verticalDispatch;
walkerCodecParams.usTotalThreadNumPerLcu = (uint16_t)totalThreadNumPerLcu;
MHW_WALKER_PARAMS walkerParams;
MHW_VFE_SCOREBOARD scoreBoard;
m_numRegionsInSlice = numRegionsInSliceSave; // Restore the original value to HEVC state
if(customDispatchPattern)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetCustomDispatchPattern(&walkerParams, &scoreBoard, &walkerCodecParams));
}
else
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(
m_hwInterface,
&walkerParams,
&walkerCodecParams));
}
// If Single Task Phase is not enabled, use BT count for the kernel state.
if (m_firstTaskInPhase || !m_singleTaskPhaseSupported)
{
uint32_t maxBtCount = m_singleTaskPhaseSupported ?
m_maxBtCount : kernelState->KernelParams.iBTCount;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
m_stateHeapInterface,
maxBtCount));
m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
CODECHAL_ENCODE_CHK_STATUS_RETURN(VerifySpaceAvailable());
}
// Set up the DSH/SSH as normal
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
m_stateHeapInterface,
kernelState,
false,
0,
false,
m_storeData));
MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
MOS_ZeroMemory(&idParams, sizeof(idParams));
idParams.pKernelState = kernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
m_stateHeapInterface,
1,
&idParams));
// Generate Lcu Level Data
CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateLcuLevelData());
// setup curbe
switch(encFunctionType)
{
case CODECHAL_MEDIA_STATE_HEVC_I_MBENC:
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncIKernel());
break;
case CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC:
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncBKernel());
break;
case CODECHAL_MEDIA_STATE_HEVC_B_MBENC:
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbeMbEncBKernel());
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC type requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_DSH_TYPE,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
encFunctionType,
kernelState));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_ISH_TYPE,
kernelState));
)
MOS_COMMAND_BUFFER cmdBuffer;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
sendKernelCmdsParams.EncFunctionType = encFunctionType;
sendKernelCmdsParams.pKernelState = kernelState;
sendKernelCmdsParams.bEnableCustomScoreBoard = customDispatchPattern;
sendKernelCmdsParams.pCustomScoreBoard = customDispatchPattern ? &scoreBoard : nullptr;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
// Add binding table
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
m_stateHeapInterface,
kernelState));
// send surfaces
switch(encFunctionType)
{
case CODECHAL_MEDIA_STATE_HEVC_I_MBENC:
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesIKernel(&cmdBuffer));
break;
case CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC:
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesBKernel(&cmdBuffer));
break;
case CODECHAL_MEDIA_STATE_HEVC_B_MBENC:
CODECHAL_ENCODE_CHK_STATUS_RETURN(SendMbEncSurfacesBKernel(&cmdBuffer));
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported MBENC type requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
CODECHAL_DEBUG_TOOL(
const uint8_t dumpBufNum = 2;
PCODECHAL_ENCODE_BUFFER outBufs[dumpBufNum];
outBufs[0] = &m_lcuLevelInputData;
outBufs[1] = &m_concurrentThreadGroupData;
if (m_pictureCodingType == I_TYPE)
{
const char * bufNames[dumpBufNum];
bufNames[0] = "HEVC_I_MBENC_LcuLevelData_In";
bufNames[1] = "HEVC_I_MBENC_ConcurrentThreadGroupData_In";
for (uint8_t i = 0; i < dumpBufNum; i++)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&outBufs[i]->sResource,
CodechalDbgAttr::attrInput,
bufNames[i],
outBufs[i]->dwSize,
0,
CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
}
}
else
{
const char * bufNames[dumpBufNum];
bufNames[0] = "HEVC_B_MBENC_LcuLevelData_In";
bufNames[1] = "HEVC_B_MBENC_ConcurrentThreadGroupData_In";
for (uint8_t i = 0; i < dumpBufNum; i++)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&outBufs[i]->sResource,
CodechalDbgAttr::attrInput,
bufNames[i],
outBufs[i]->dwSize,
0,
CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
}
}
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderEngineInterface->AddMediaObjectWalkerCmd(
&cmdBuffer,
&walkerParams));
CODECHAL_ENCODE_CHK_STATUS_RETURN(EndStatusReport(&cmdBuffer, encFunctionType));
// Add dump for MBEnc surface state heap here
CODECHAL_DEBUG_TOOL(
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
encFunctionType,
MHW_SSH_TYPE,
kernelState));
)
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
m_stateHeapInterface,
kernelState));
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
m_stateHeapInterface));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
&cmdBuffer,
nullptr));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
&cmdBuffer,
encFunctionType,
nullptr)));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(&cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
{
m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
m_lastTaskInPhase = false;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::EncodeKernelFunctions()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
if (m_pictureCodingType == P_TYPE)
{
CODECHAL_ENCODE_ASSERTMESSAGE("GEN10 HEVC VME does not support P slice");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
if (m_cscDsState->RequireCsc())
{
m_firstTaskInPhase = true;
CodechalEncodeCscDs::KernelParams cscScalingKernelParams;
// Csc ARGB linear to NV12 Tile Y studio range
MOS_ZeroMemory(&cscScalingKernelParams, sizeof(cscScalingKernelParams));
cscScalingKernelParams.cscOrCopyOnly = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_cscDsState->CscKernel(&cscScalingKernelParams));
}
CODECHAL_DEBUG_TOOL(
if (!m_is10BitHevc){
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
m_rawSurfaceToEnc,
CodechalDbgAttr::attrEncodeRawInputSurface,
"SrcSurf"))})
if (m_pakOnlyTest)
{
// Skip all ENC kernel operations for now it is in the PAK only test mode.
// PAK and CU records will be passed via the app
return eStatus;
}
// BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface
// BRC init is called even for CQP mode hence also checking for first frame flag
if (m_brcInit || m_brcReset || m_firstFrame)
{
if (!m_cscDsState->RequireCsc())
{
m_firstTaskInPhase = m_lastTaskInPhase=true;
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcInitResetKernel());
m_brcInit = m_brcReset = false;
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcHistoryBuffer,
CodechalDbgAttr::attrOutput,
"Output_History",
m_brcHistoryBufferSize,
0,
CODECHAL_MEDIA_STATE_BRC_INIT_RESET)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_brcBuffers.sMeBrcDistortionBuffer,
CodechalDbgAttr::attrOutput,
"Output_Distortion",
CODECHAL_MEDIA_STATE_BRC_INIT_RESET)));
// Scaled surfaces are required to run both HME and IFrameDist
bool scalingEnabled = (m_scalingEnabled || m_isMaxLcu64);
if (scalingEnabled || m_is10BitHevc)
{
//Use a different performance tag ID for scaling and HME
m_osInterface->pfnResetPerfBufferID(m_osInterface);
m_firstTaskInPhase = true;
m_lastTaskInPhase = false;
CODECHAL_ENCODE_CHK_STATUS_RETURN(PerformScalingAndConversion());
}
if (m_hmeEnabled)
{
if (m_b16XMeEnabled)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_16x, HEVC_ME_DIST_TYPE_INTER_BRC_DIST));
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x, HEVC_ME_DIST_TYPE_INTER_BRC_DIST));
}
// Getting Intra distortion for I-frame
if(m_pictureCodingType == I_TYPE)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x, HEVC_ME_DIST_TYPE_INTRA_BRC_DIST));
}
// Calling the Me Kernel for both Intra and Inter Frames to get Intra distortion
m_lastTaskInPhase = true;
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMeKernel(HME_LEVEL_4x, HEVC_ME_DIST_TYPE_INTRA));
CODECHAL_DEBUG_TOOL(
if (m_is10BitHevc) {
//Dump format converted surface
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpYUVSurface(
&m_formatConvertedSurface[0],
CodechalDbgAttr::attrEncodeRawInputSurface,
"SrcSurf"))
}
if (m_hmeEnabled) {
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_s4XMeMvDataBuffer.OsResource,
CodechalDbgAttr::attrOutput,
"MvData",
m_s4XMeMvDataBuffer.dwHeight * m_s4XMeMvDataBuffer.dwPitch,
CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 32), 64) * (m_downscaledFrameFieldHeightInMb4x * 4) : 0,
CODECHAL_MEDIA_STATE_4X_ME));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_s4XMeDistortionBuffer.OsResource,
CodechalDbgAttr::attrOutput,
"MeDist",
m_s4XMeDistortionBuffer.dwHeight * m_s4XMeDistortionBuffer.dwPitch,
CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb4x * 8), 64) * MOS_ALIGN_CEIL((m_downscaledFrameFieldHeightInMb4x * 4 * 10), 8) : 0,
CODECHAL_MEDIA_STATE_4X_ME));
if (m_b16XMeEnabled)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_s16XMeMvDataBuffer.OsResource,
CodechalDbgAttr::attrOutput,
"MvData",
m_s16XMeMvDataBuffer.dwHeight * m_s16XMeMvDataBuffer.dwPitch,
CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb16x * 32), 64) * (m_downscaledFrameFieldHeightInMb16x * 4) : 0,
CODECHAL_MEDIA_STATE_16X_ME));
if (m_b32XMeEnabled)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_s32XMeMvDataBuffer.OsResource,
CodechalDbgAttr::attrOutput,
"MvData",
m_s32XMeMvDataBuffer.dwHeight * m_s32XMeMvDataBuffer.dwPitch,
CodecHal_PictureIsBottomField(m_currOriginalPic) ? MOS_ALIGN_CEIL((m_downscaledWidthInMb32x * 32), 64) * (m_downscaledFrameFieldHeightInMb32x * 4) : 0,
CODECHAL_MEDIA_STATE_32X_ME));
}
}
})
//Reset to use a different performance tag ID
m_osInterface->pfnResetPerfBufferID(m_osInterface);
m_firstTaskInPhase = true;
m_lastTaskInPhase = false;
// Wait for PAK if necessary
CODECHAL_ENCODE_CHK_STATUS_RETURN(WaitForPak());
// BrcFrameEncUpdate and BrcLcuUpdate kernels are called even if CQP
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcFrameUpdateKernel());
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeBrcLcuUpdateKernel());
// Reset to use a different performance tag ID
m_osInterface->pfnResetPerfBufferID(m_osInterface);
m_lastTaskInPhase = true;
if (m_hevcPicParams->CodingType == I_TYPE)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
}
else
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(GenerateWalkingControlRegion());
CODECHAL_ENCODE_CHK_STATUS_RETURN(EncodeMbEncKernel(
m_isMaxLcu64 ? CODECHAL_MEDIA_STATE_HEVC_LCU64_B_MBENC : CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
}
// Notify PAK engine once ENC is done
if (!m_pakOnlyTest && !Mos_ResourceIsNull(&m_resSyncObjectRenderContextInUse))
{
MOS_SYNC_PARAMS syncParams = g_cInitSyncParams;
syncParams.GpuContext = m_renderContext;
syncParams.presSyncResource = &m_resSyncObjectRenderContextInUse;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnEngineSignal(m_osInterface, &syncParams));
}
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcHistoryBuffer,
CodechalDbgAttr::attrOutput,
"Output_HistoryBuffer",
m_brcHistoryBufferSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcPakStatisticBuffer[m_brcBuffers.uiCurrBrcPakStasIdxForRead],
CodechalDbgAttr::attrOutput,
"Output_PakStats",
m_brcPakStatisticsSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcImageStatesReadBuffer[m_currRecycledBufIdx],
CodechalDbgAttr::attrOutput,
"Output_ImgStateRead",
m_brcBuffers.dwBrcHcpPicStateSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_brcBuffers.resBrcImageStatesWriteBuffer[m_currRecycledBufIdx],
CodechalDbgAttr::attrOutput,
"Output_ImgStateWrite",
m_brcBuffers.dwBrcHcpPicStateSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
(MOS_RESOURCE*)m_allocator->GetResource(m_standard, brcInputForEncKernel),
CodechalDbgAttr::attrOutput,
"Output_CombinedEnc",
m_brcCombinedEncBufferSize,
0,
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_brcBuffers.sBrcConstantDataBuffer[m_currRecycledBufIdx],
CodechalDbgAttr::attrOutput,
"Output_ConstData",
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_brcBuffers.sMeBrcDistortionBuffer,
CodechalDbgAttr::attrOutput,
"Output_Distortion",
CODECHAL_MEDIA_STATE_BRC_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_cuSplitSurface,
CodechalDbgAttr::attrOutput,
"CUSplitSurface",
CODECHAL_MEDIA_STATE_HEVC_B_MBENC)));
CODECHAL_DEBUG_TOOL(
const uint8_t dumpBufNum = 5;
PCODECHAL_ENCODE_BUFFER outBufs[dumpBufNum];
outBufs[0] = &m_64x64DistortionSurface;
outBufs[1] = &m_encConstantTableForB;
outBufs[2] = &m_jobQueueHeaderSurfaceForBLcu64;
outBufs[3] = &m_jobQueueHeaderSurfaceForB;
outBufs[4] = &m_encConstantTableForLcu64B;
if (m_pictureCodingType == I_TYPE)
{
const char * bufNames[dumpBufNum];
bufNames[0] = "DIST_64x64";
bufNames[1] = "LUT_LCU32";
bufNames[2] = "JobQueueSurfaceHeader_BLcu64";
bufNames[3] = "JobQueueSurfaceHeader_B";
bufNames[4] = "LUT_LCU64";
for (uint8_t i = 0; i < dumpBufNum; i++)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&outBufs[i]->sResource,
CodechalDbgAttr::attrOutput,
bufNames[i],
outBufs[i]->dwSize,
0,
CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_jobQueueDataSurfaceForBLcu64,
CodechalDbgAttr::attrOutput,
"JobQueueDataSurface_BLcu64",
CODECHAL_MEDIA_STATE_HEVC_I_MBENC));
}
else
{
const char * bufNames[dumpBufNum];
bufNames[0] = "DIST_64x64";
bufNames[1] = "LUT_LCU32";
bufNames[2] = "JobQueueSurfaceHeader_BLcu64";
bufNames[3] = "JobQueueSurfaceHeader_B";
bufNames[4] = "LUT_LCU64";
for (uint8_t i = 0; i < dumpBufNum; i++)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&outBufs[i]->sResource,
CodechalDbgAttr::attrOutput,
bufNames[i],
outBufs[i]->dwSize,
0,
CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
}
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_jobQueueDataSurfaceForBLcu64,
CodechalDbgAttr::attrOutput,
"JobQueueDataSurface_BLcu64",
CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
if (m_isMaxLcu64)
{
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_jobQueueDataSurfaceForBLcu64Cu32,
CodechalDbgAttr::attrOutput,
"JobQueueDataSurface_Lcu64_Cu32",
CODECHAL_MEDIA_STATE_HEVC_B_MBENC));
}
}
)
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_mbStatisticsSurface,
CodechalDbgAttr::attrOutput,
"Output_mbstats",
CODECHAL_MEDIA_STATE_HEVC_BRC_LCU_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpSurface(
&m_mbSplitSurface,
CodechalDbgAttr::attrOutput,
"Output_mbsplit",
CODECHAL_MEDIA_STATE_HEVC_BRC_LCU_UPDATE)));
CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpBuffer(
&m_mvAndDistortionSumSurface.sResource,
CodechalDbgAttr::attrOutput,
"Output_hmemv",
m_mvAndDistortionSumSurface.dwSize,
0,
CODECHAL_MEDIA_STATE_HEVC_BRC_LCU_UPDATE)));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::GetKernelHeaderAndSize(
void *binary,
EncOperation operation,
uint32_t krnStateIdx,
void *krnHeader,
uint32_t *krnSize)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(binary);
CODECHAL_ENCODE_CHK_NULL_RETURN(krnHeader);
CODECHAL_ENCODE_CHK_NULL_RETURN(krnSize);
PCODECHAL_ENC_HEVC_KERNEL_HEADER_G10 kernelHeaderTable = (PCODECHAL_ENC_HEVC_KERNEL_HEADER_G10)binary;
PCODECHAL_KERNEL_HEADER currKrnHeader = nullptr;
switch (operation)
{
case ENC_SCALING_CONVERSION:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_DS_Convert;
break;
case ENC_ME:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_HME;
break;
case ENC_MBENC:
{
switch (krnStateIdx)
{
case MBENC_I_KRNIDX:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_Intra;
break;
case MBENC_B_LCU32_KRNIDX:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_Enc_B;
break;
case MBENC_B_LCU64_KRNIDX:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_Enc_LCU64_B;
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
}
break;
case ENC_BRC:
{
switch (krnStateIdx)
{
case CODECHAL_HEVC_BRC_INIT:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_brc_init;
break;
case CODECHAL_HEVC_BRC_RESET:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_brc_reset;
break;
case CODECHAL_HEVC_BRC_FRAME_UPDATE:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_brc_update;
break;
case CODECHAL_HEVC_BRC_LCU_UPDATE:
currKrnHeader = &kernelHeaderTable->Gen10_HEVC_brc_lcuqp;
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported BRC mode requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
}
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
*((PCODECHAL_KERNEL_HEADER)krnHeader) = *currKrnHeader;
PCODECHAL_KERNEL_HEADER nextKrnHeader = (currKrnHeader + 1);
PCODECHAL_KERNEL_HEADER invalidEntry = &(kernelHeaderTable->Gen10_HEVC_brc_blockcopy) + 1;
uint32_t nextKrnOffset = *krnSize;
if (nextKrnHeader < invalidEntry)
{
nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
}
*krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SetKernelParams(
EncOperation encOperation,
PMHW_KERNEL_PARAM kernelParams,
uint32_t idx)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(kernelParams);
kernelParams->iThreadCount = m_renderEngineInterface->GetHwCaps()->dwMaxThreads;
kernelParams->iIdCount = 1;
auto curbeAlignment = m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment();
switch (encOperation)
{
case ENC_SCALING_CONVERSION:
kernelParams->iBTCount = SCALING_CONVERSION_END - SCALING_CONVERSION_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(DsConvCurbeDataG10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = 8;
kernelParams->iBlockHeight = 8;
break;
case ENC_ME:
kernelParams->iBTCount = HME_END - HME_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_ME_CURBE_G10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = 32;
kernelParams->iBlockHeight = 32;
break;
case ENC_MBENC:
{
switch (idx)
{
case MBENC_I_KRNIDX:
kernelParams->iBTCount = MBENC_I_FRAME_END - MBENC_I_FRAME_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_MBENC_I_CURBE_G10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = 32;
kernelParams->iBlockHeight = 32;
break;
case MBENC_B_LCU32_KRNIDX:
kernelParams->iBTCount = MBENC_B_FRAME_LCU32_END - MBENC_B_FRAME_LCU32_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_MBENC_B_CURBE_G10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = 32;
kernelParams->iBlockHeight = 32;
break;
case MBENC_B_LCU64_KRNIDX:
kernelParams->iBTCount = MBENC_B_FRAME_LCU64_END - MBENC_B_FRAME_LCU64_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_MBENC_B_CURBE_G10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = 64;
kernelParams->iBlockHeight = 64;
break;
default:
CODECHAL_ENCODE_ASSERT(false);
eStatus = MOS_STATUS_INVALID_PARAMETER;
}
}
break;
case ENC_BRC:
{
switch (idx)
{
case CODECHAL_HEVC_BRC_INIT:
case CODECHAL_HEVC_BRC_RESET:
kernelParams->iBTCount = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_INITRESET_CURBE_G10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
break;
case CODECHAL_HEVC_BRC_FRAME_UPDATE:
kernelParams->iBTCount = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
kernelParams->iBlockHeight = CODECHAL_HEVC_FRAME_BRC_BLOCK_SIZE;
break;
case CODECHAL_HEVC_BRC_LCU_UPDATE:
kernelParams->iBTCount = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
kernelParams->iCurbeLength = MOS_ALIGN_CEIL(sizeof(CODECHAL_ENC_HEVC_BRC_UPDATE_CURBE_G10), (size_t)curbeAlignment);
kernelParams->iBlockWidth = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
kernelParams->iBlockHeight = CODECHAL_HEVC_LCU_BRC_BLOCK_SIZE;
break;
default:
CODECHAL_ENCODE_ASSERT(false);
eStatus = MOS_STATUS_INVALID_PARAMETER;
}
}
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::SetBindingTable(
EncOperation encOperation,
PCODECHAL_ENCODE_BINDING_TABLE_GENERIC bindingTable,
uint32_t idx)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_CHK_NULL_RETURN(bindingTable);
MOS_ZeroMemory(bindingTable, sizeof(*bindingTable));
switch(encOperation)
{
case ENC_SCALING_CONVERSION:
bindingTable->dwNumBindingTableEntries = SCALING_CONVERSION_END - SCALING_CONVERSION_BEGIN;
bindingTable->dwBindingTableStartOffset = SCALING_CONVERSION_BEGIN;
break;
case ENC_ME:
bindingTable->dwNumBindingTableEntries = HME_END - HME_BEGIN;
bindingTable->dwBindingTableStartOffset = HME_BEGIN;
break;
case ENC_MBENC:
{
switch (idx)
{
case MBENC_I_KRNIDX:
bindingTable->dwNumBindingTableEntries = MBENC_I_FRAME_END - MBENC_I_FRAME_BEGIN;
bindingTable->dwBindingTableStartOffset = MBENC_I_FRAME_BEGIN;
break;
case MBENC_B_LCU32_KRNIDX:
bindingTable->dwNumBindingTableEntries = MBENC_B_FRAME_LCU32_END - MBENC_B_FRAME_LCU32_BEGIN;
bindingTable->dwBindingTableStartOffset = MBENC_B_FRAME_LCU32_BEGIN;
break;
case MBENC_B_LCU64_KRNIDX:
bindingTable->dwNumBindingTableEntries = MBENC_B_FRAME_LCU64_END - MBENC_B_FRAME_LCU64_BEGIN;
bindingTable->dwBindingTableStartOffset = MBENC_B_FRAME_LCU64_BEGIN;
break;
default:
CODECHAL_ENCODE_ASSERT(false);
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
}
break;
case ENC_BRC:
{
switch(idx)
{
case CODECHAL_HEVC_BRC_INIT:
bindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
bindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
break;
case CODECHAL_HEVC_BRC_RESET:
bindingTable->dwNumBindingTableEntries = BRC_INIT_RESET_END - BRC_INIT_RESET_BEGIN;
bindingTable->dwBindingTableStartOffset = BRC_INIT_RESET_BEGIN;
break;
case CODECHAL_HEVC_BRC_FRAME_UPDATE:
bindingTable->dwNumBindingTableEntries = BRC_UPDATE_END - BRC_UPDATE_BEGIN;
bindingTable->dwBindingTableStartOffset = BRC_UPDATE_BEGIN;
break;
case CODECHAL_HEVC_BRC_LCU_UPDATE:
bindingTable->dwNumBindingTableEntries = BRC_LCU_UPDATE_END - BRC_LCU_UPDATE_BEGIN;
bindingTable->dwBindingTableStartOffset = BRC_LCU_UPDATE_BEGIN;
break;
default:
CODECHAL_ENCODE_ASSERT(false);
eStatus = MOS_STATUS_INVALID_PARAMETER;
}
}
break;
default:
CODECHAL_ENCODE_ASSERTMESSAGE("Unsupported ENC mode requested");
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
for (uint32_t i = 0; i < bindingTable->dwNumBindingTableEntries; i++)
{
bindingTable->dwBindingTableEntries[i] = i;
}
return eStatus;
}
uint32_t CodechalEncHevcStateG10::GetMaxBtCount()
{
auto btIdxAlignment = m_stateHeapInterface->pStateHeapInterface->GetBtIdxAlignment();
uint32_t btCountPhase1 = MOS_ALIGN_CEIL(
m_brcKernelStates[CODECHAL_HEVC_BRC_INIT].KernelParams.iBTCount,
btIdxAlignment);
// 4x, 16x DS, 4x ME, 16x ME. Me in Intra Mode
uint32_t btCountPhase2 = 2 * (MOS_ALIGN_CEIL(m_scalingAndConversionKernelState->KernelParams.iBTCount, btIdxAlignment) +
MOS_ALIGN_CEIL(m_meKernelState->KernelParams.iBTCount, btIdxAlignment)) +
MOS_ALIGN_CEIL(m_meKernelState->KernelParams.iBTCount, btIdxAlignment);
// If 10 bit HEVC is supported, it might require 10 bit to 8 bit conversion of reference surface
if (m_is10BitHevc)
{
btCountPhase2 += MOS_ALIGN_CEIL(m_scalingAndConversionKernelState->KernelParams.iBTCount, btIdxAlignment);
}
// I kernel
uint32_t btCountPhase3 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_I_KRNIDX].KernelParams.iBTCount, btIdxAlignment);
// B kernel
uint32_t btCountPhase4 = MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_FRAME_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
MOS_ALIGN_CEIL(m_brcKernelStates[CODECHAL_HEVC_BRC_LCU_UPDATE].KernelParams.iBTCount, btIdxAlignment) +
MOS_MAX(MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_B_LCU64_KRNIDX].KernelParams.iBTCount, btIdxAlignment),
MOS_ALIGN_CEIL(m_mbEncKernelStates[MBENC_B_LCU32_KRNIDX].KernelParams.iBTCount, btIdxAlignment));
uint32_t maxBtCount = MOS_MAX(btCountPhase1, btCountPhase2);
maxBtCount = MOS_MAX(maxBtCount, btCountPhase3);
maxBtCount = MOS_MAX(maxBtCount, btCountPhase4);
return maxBtCount;
}
MOS_STATUS CodechalEncHevcStateG10::InitKernelStateScalingAndConversion()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
m_scalingAndConversionKernelState = MOS_New(MHW_KERNEL_STATE);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalingAndConversionKernelState);
m_scalingAndConversionKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(sizeof(GenericBindingTable));
CODECHAL_ENCODE_CHK_NULL_RETURN(m_scalingAndConversionKernelBindingTable);
PMHW_KERNEL_STATE kernelStatePtr = m_scalingAndConversionKernelState;
uint32_t kernelSize = m_combinedKernelSize;
CODECHAL_KERNEL_HEADER currKrnHeader;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
m_kernelBinary,
ENC_SCALING_CONVERSION,
0,
&currKrnHeader,
&kernelSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
ENC_SCALING_CONVERSION,
&kernelStatePtr->KernelParams,
0));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
ENC_SCALING_CONVERSION,
m_scalingAndConversionKernelBindingTable,
0));
kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
kernelStatePtr->KernelParams.iSize = kernelSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
m_stateHeapInterface,
kernelStatePtr->KernelParams.iBTCount,
&kernelStatePtr->dwSshSize,
&kernelStatePtr->dwBindingTableSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::InitKernelStateMe()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
m_meKernelState = MOS_New(MHW_KERNEL_STATE);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_meKernelState);
m_meKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(sizeof(GenericBindingTable));
CODECHAL_ENCODE_CHK_NULL_RETURN(m_meKernelBindingTable);
uint32_t kernelSize = m_combinedKernelSize;
CODECHAL_KERNEL_HEADER currKrnHeader;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
m_kernelBinary,
ENC_ME,
0,
&currKrnHeader,
&kernelSize));
PMHW_KERNEL_STATE kernelStatePtr = m_meKernelState;
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
ENC_ME,
&kernelStatePtr->KernelParams,
0));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
ENC_ME,
m_meKernelBindingTable,
0));
kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
kernelStatePtr->KernelParams.iSize = kernelSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
m_stateHeapInterface,
kernelStatePtr->KernelParams.iBTCount,
&kernelStatePtr->dwSshSize,
&kernelStatePtr->dwBindingTableSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::InitKernelStateBrc()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
m_numBrcKrnStates = CODECHAL_HEVC_BRC_NUM;
m_brcKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numBrcKrnStates);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_brcKernelStates);
PMHW_KERNEL_STATE kernelStatePtr = m_brcKernelStates;
kernelStatePtr++; // Skipping CODECHAL_HEVC_BRC_COARSE_INTRA as it not in Gen10
m_brcKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
sizeof(GenericBindingTable) * m_numBrcKrnStates);
// krnStateIdx initialization starts at 1 as Gen10 does not support CODECHAL_HEVC_BRC_COARSE_INTRA kernel in BRC
for (uint32_t krnStateIdx = 1; krnStateIdx < m_numBrcKrnStates; krnStateIdx++)
{
uint32_t kernelSize = m_combinedKernelSize;
CODECHAL_KERNEL_HEADER currKrnHeader;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
m_kernelBinary,
ENC_BRC,
krnStateIdx,
&currKrnHeader,
&kernelSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
ENC_BRC,
&kernelStatePtr->KernelParams,
krnStateIdx));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
ENC_BRC,
&m_brcKernelBindingTable[krnStateIdx],
krnStateIdx));
kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
kernelStatePtr->KernelParams.iSize = kernelSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
m_stateHeapInterface,
kernelStatePtr->KernelParams.iBTCount,
&kernelStatePtr->dwSshSize,
&kernelStatePtr->dwBindingTableSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
kernelStatePtr++;
}
kernelStatePtr = m_mbEncKernelStates;
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::InitKernelStateMbEnc()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
m_numMbEncEncKrnStates = MBENC_NUM_KRN;
m_mbEncKernelStates = MOS_NewArray(MHW_KERNEL_STATE, m_numMbEncEncKrnStates);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelStates);
m_mbEncKernelBindingTable = (PCODECHAL_ENCODE_BINDING_TABLE_GENERIC)MOS_AllocAndZeroMemory(
sizeof(GenericBindingTable) *
m_numMbEncEncKrnStates);
CODECHAL_ENCODE_CHK_NULL_RETURN(m_mbEncKernelBindingTable);
PMHW_KERNEL_STATE kernelStatePtr = m_mbEncKernelStates;
for (uint32_t krnStateIdx = 0; krnStateIdx < m_numMbEncEncKrnStates; krnStateIdx++)
{
uint32_t kernelSize = m_combinedKernelSize;
CODECHAL_KERNEL_HEADER currKrnHeader;
CODECHAL_ENCODE_CHK_STATUS_RETURN(GetKernelHeaderAndSize(
m_kernelBinary,
ENC_MBENC,
krnStateIdx,
&currKrnHeader,
&kernelSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetKernelParams(
ENC_MBENC,
&kernelStatePtr->KernelParams,
krnStateIdx));
CODECHAL_ENCODE_CHK_STATUS_RETURN(SetBindingTable(
ENC_MBENC,
&m_mbEncKernelBindingTable[krnStateIdx],
krnStateIdx));
kernelStatePtr->dwCurbeOffset = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
kernelStatePtr->KernelParams.pBinary = m_kernelBinary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
kernelStatePtr->KernelParams.iSize = kernelSize;
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
m_stateHeapInterface,
kernelStatePtr->KernelParams.iBTCount,
&kernelStatePtr->dwSshSize,
&kernelStatePtr->dwBindingTableSize));
CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_stateHeapInterface, kernelStatePtr));
kernelStatePtr++;
}
return eStatus;
}
void CodechalEncHevcStateG10::GetMaxRefFrames(uint8_t& maxNumRef0, uint8_t& maxNumRef1)
{
maxNumRef0 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L0_REF_G10;
maxNumRef1 = CODECHAL_ENCODE_HEVC_NUM_MAX_VME_L1_REF_G10;
return;
}
CodechalEncHevcStateG10::CodechalEncHevcStateG10(
CodechalHwInterface* hwInterface,
CodechalDebugInterface* debugInterface,
PCODECHAL_STANDARD_INFO standardInfo)
:CodechalEncHevcState(hwInterface, debugInterface, standardInfo)
{
m_combinedDownScaleAndDepthConversion = true;
m_2xMeSupported = true;
m_fieldScalingOutputInterleaved = false;
m_brcHistoryBufferSize = HEVC_BRC_HISTORY_BUFFER_SIZE_G10;
#ifndef _FULL_OPEN_SOURCE
m_kernelBase = (uint8_t*)IGCODECKRN_G10;
#else
m_kernelBase = nullptr;
#endif
pfnGetKernelHeaderAndSize = GetKernelHeaderAndSize;
MOS_ZeroMemory(&m_kernelDebug, sizeof(m_kernelDebug));
MOS_ZeroMemory(&m_intermediateCuRecordSurfaceLcu32, sizeof(m_intermediateCuRecordSurfaceLcu32));
MOS_ZeroMemory(&m_secondIntermediateCuRecordSurfaceLcu32, sizeof(m_secondIntermediateCuRecordSurfaceLcu32));
MOS_ZeroMemory(&m_intermediateCuRecordSurfaceLcu64B, sizeof(m_intermediateCuRecordSurfaceLcu64B));
MOS_ZeroMemory(&m_encConstantTableForI, sizeof(m_encConstantTableForI));
MOS_ZeroMemory(&m_encConstantTableForB, sizeof(m_encConstantTableForB));
MOS_ZeroMemory(&m_encConstantTableForLcu64B, sizeof(m_encConstantTableForLcu64B));
MOS_ZeroMemory(&m_lcuLevelInputData, sizeof(m_lcuLevelInputData));
MOS_ZeroMemory(&m_lcuEncodingScratchSurface, sizeof(m_lcuEncodingScratchSurface));
MOS_ZeroMemory(&m_lcuEncodingScratchSurfaceLcu64B, sizeof(m_lcuEncodingScratchSurfaceLcu64B));
MOS_ZeroMemory(&m_64x64DistortionSurface, sizeof(m_64x64DistortionSurface));
MOS_ZeroMemory(&m_scratchSurface, sizeof(m_scratchSurface));
MOS_ZeroMemory(&m_concurrentThreadGroupData, sizeof(m_concurrentThreadGroupData));
MOS_ZeroMemory(&m_jobQueueHeaderSurfaceForB , sizeof(m_jobQueueHeaderSurfaceForB)); // when used by LCU64 kernel, it is the 1D header surface with smaller size
MOS_ZeroMemory(&m_jobQueueHeaderSurfaceForBLcu64, sizeof(m_jobQueueHeaderSurfaceForBLcu64));
MOS_ZeroMemory(&m_jobQueueDataSurfaceForBLcu64Cu32, sizeof(m_jobQueueDataSurfaceForBLcu64Cu32));
MOS_ZeroMemory(&m_jobQueueDataSurfaceForBLcu64, sizeof(m_jobQueueDataSurfaceForBLcu64));
MOS_ZeroMemory(&m_cuSplitSurface, sizeof(m_cuSplitSurface));
MOS_ZeroMemory(&m_mbStatisticsSurface, sizeof(m_mbStatisticsSurface));
MOS_ZeroMemory(&m_mbSplitSurface, sizeof(m_mbSplitSurface));
MOS_ZeroMemory(&m_residualDataScratchSurfaceForBLcu32, sizeof(m_residualDataScratchSurfaceForBLcu32));
MOS_ZeroMemory(&m_residualDataScratchSurfaceForBLcu64, sizeof(m_residualDataScratchSurfaceForBLcu64));
MOS_ZeroMemory(&m_mvAndDistortionSumSurface, sizeof(m_mvAndDistortionSumSurface));
MOS_ZeroMemory(&m_walkingPatternParam, sizeof(m_walkingPatternParam));
m_hwInterface->GetStateHeapSettings()->dwNumSyncTags = CODECHAL_ENCODE_HEVC_NUM_SYNC_TAGS;
m_hwInterface->GetStateHeapSettings()->dwDshSize = CODECHAL_INIT_DSH_SIZE_HEVC_ENC;
m_kuid = IDR_CODEC_AllHEVCEnc;
MOS_STATUS eStatus = CodecHalGetKernelBinaryAndSize(
m_kernelBase,
m_kuid,
&m_kernelBinary,
&m_combinedKernelSize);
CODECHAL_ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
m_hwInterface->GetStateHeapSettings()->dwIshSize +=
MOS_ALIGN_CEIL(m_combinedKernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
}
MOS_STATUS CodechalEncHevcStateG10::Initialize(CodechalSetting * settings)
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
// Common initialization
CODECHAL_ENCODE_CHK_STATUS_RETURN(CodechalEncHevcState::Initialize(settings));
m_b2NdSaoPassNeeded = true;
m_brcBuffers.dwBrcConstantSurfaceWidth = HEVC_BRC_CONSTANT_SURFACE_WIDTH_G9;
m_brcBuffers.dwBrcConstantSurfaceHeight = m_brcConstantSurfaceHeight;
m_maxNumSlicesSupported = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_5;
m_brcBuffers.dwBrcHcpPicStateSize = BRC_IMG_STATE_SIZE_PER_PASS_G10 * CODECHAL_ENCODE_BRC_MAXIMUM_NUM_PASSES;
m_brcBuffers.pMbStatisticsSurface = &m_mbStatisticsSurface;
m_brcBuffers.pMvAndDistortionSumSurface = &m_mvAndDistortionSumSurface;
MOS_USER_FEATURE_VALUE_DATA userFeatureData;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_SINGLE_TASK_PHASE_ENABLE_ID,
&userFeatureData);
m_singleTaskPhaseSupported = (userFeatureData.i32Data) ? true : false;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_REGION_NUMBER_ID,
&userFeatureData);
// Region number must be greater than 1
m_numRegionsInSlice = (userFeatureData.i32Data < 1) ? 1 : userFeatureData.i32Data;
if (m_numRegionsInSlice > 16)
{
// Region number cannot be larger than 16
m_numRegionsInSlice = 16;
}
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_26Z_ENABLE_ID,
&userFeatureData);
m_enable26WalkingPattern = (userFeatureData.i32Data) ? false : true;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_RDOQ_ENABLE_ID,
&userFeatureData);
m_hevcRdoqEnabled = userFeatureData.i32Data ? true : false;
m_hwScoreboardType = 1;
if (m_codecFunction != CODECHAL_FUNCTION_PAK)
{
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
userFeatureData.i32Data = 1;
userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_ME_ENABLE_ID,
&userFeatureData);
m_hmeSupported = (userFeatureData.i32Data) ? true : false;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
userFeatureData.i32Data = 1;
userFeatureData.i32DataFlag = MOS_USER_FEATURE_VALUE_DATA_FLAG_CUSTOM_DEFAULT_VALUE_TYPE;
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_HEVC_ENCODE_16xME_ENABLE_ID,
&userFeatureData);
m_16xMeSupported = (userFeatureData.i32Data) ? true : false;
MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
MOS_UserFeature_ReadValue_ID(
nullptr,
__MEDIA_USER_FEATURE_VALUE_HEVC_NUM_THREADS_PER_LCU_ID,
&userFeatureData);
m_totalNumThreadsPerLcu = userFeatureData.i32Data;
if(m_totalNumThreadsPerLcu < m_minThreadsPerLcuB || m_totalNumThreadsPerLcu > m_maxThreadsPerLcuB)
{
eStatus = MOS_STATUS_INVALID_PARAMETER;
return eStatus;
}
}
// Overriding the defaults here with 32 aligned dimensions
// 2x Scaling WxH
m_downscaledWidth2x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameWidth);
m_downscaledHeight2x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_frameHeight);
// HME Scaling WxH
m_downscaledWidth4x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameWidth);
m_downscaledHeight4x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_frameHeight);
m_downscaledWidthInMb4x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth4x);
m_downscaledHeightInMb4x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight4x);
// SuperHME Scaling WxH
m_downscaledWidth16x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledWidth4x);
m_downscaledHeight16x =
CODECHAL_GET_4xDS_SIZE_32ALIGNED(m_downscaledHeight4x);
m_downscaledWidthInMb16x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth16x);
m_downscaledHeightInMb16x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight16x);
// UltraHME Scaling WxH
m_downscaledWidth32x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledWidth16x);
m_downscaledHeight32x =
CODECHAL_GET_2xDS_SIZE_32ALIGNED(m_downscaledHeight16x);
m_downscaledWidthInMb32x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledWidth32x);
m_downscaledHeightInMb32x =
CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_downscaledHeight32x);
// Overriding default minimum scaled dimension allowed with VME restriction
m_minScaledDimension = m_minScaledSurfaceSize;
m_minScaledDimensionInMb = (m_minScaledSurfaceSize + 15) >> 4;
if (m_frameWidth < 128 || m_frameHeight < 128)
{
m_16xMeSupported = false;
m_32xMeSupported = false;
}
else if (m_frameWidth < 512 || m_frameHeight < 512)
{
m_16xMeSupported = true;
m_32xMeSupported = false;
}
else
{
m_16xMeSupported = true;
m_32xMeSupported = false; //disabling since uhme is not supported on CNL
}
// disable MMCD if we enable Codechal dump. Because dump code changes the surface state from compressed to uncompressed,
// this causes mis-match issue between dump is enabled or disabled.
CODECHAL_DEBUG_TOOL(
if (CodecHalMmcState::IsMmcEnabled() && m_debugInterface && m_debugInterface->m_dbgCfgHead) {
if (m_mmcState)
{
m_mmcState->SetMmcDisabled();
}
})
return eStatus;
}
MOS_STATUS CodechalEncHevcStateG10::InitKernelState()
{
MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
CODECHAL_ENCODE_FUNCTION_ENTER;
// Init kernel state
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateScalingAndConversion());
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMe());
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateMbEnc());
CODECHAL_ENCODE_CHK_STATUS_RETURN(InitKernelStateBrc());
return eStatus;
}