blob: b763eaf50d5912b840df17fa0dc3afedfa176b0b [file] [log] [blame]
/*
* Copyright (c) 2008-2017, Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//!
//! \file hal_kerneldll.c
//! \brief Kernel Dynamic Linking / Loading routines
//!
#ifndef VPHAL_LIB
#if IMOLA
#include <stdlib.h>
#endif // IMOLA
#include <math.h> //for sin & cos
#endif // VPHAL_LIB
#if EMUL || VPHAL_LIB
#include <math.h>
#include "support.h"
#elif LINUX
#else // !(EMUL | VPHAL_LIB) && !LINUX
#endif // EMUL | VPHAL_LIB
#include "hal_kerneldll.h"
#include "vphal.h"
// Define _DEBUG symbol for KDLL Release build before loading the "vpkrnheader.h" file
// This is necessary for full kernels names in both Release/Debug versions of KDLL app
#if EMUL || VPHAL_LIB
#ifndef _DEBUG
#define _DEBUG 2
#endif // _DEBUG
#endif // EMUL || VPHAL_LIB
// Kernel IDs and Kernel Names
#include "vpkrnheader.h" // IDR_VP_TOTAL_NUM_KERNELS
// Undefine _DEBUG symbol for the remaining of the KDLL Release build
#if _DEBUG == 2
#undef _DEBUG
#endif // _DEBUG
#ifndef PI
#define PI 3.1415926535897932f
#endif // PI
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
const bool g_cIsFormatYUV[Format_Count] =
{
false, // Format_Any
false, // Format_A8R8G8B8
false, // Format_X8R8G8B8
false, // Format_A8B8G8R8
false, // Format_X8B8G8R8
false, // Format_A16B16G16R16
false, // Format_A16R16G16B16
false, // Format_R5G6B5
false, // Format_R32U
false, // Format_R32F
false, // Format_R8G8B8
false, // Format_RGBP
false, // Format_BGRP
true, // Format_YUY2
true, // Format_YUYV
true, // Format_YVYU
true, // Format_UYVY
true, // Format_VYUY
true, // Format_Y216
true, // Format_Y210
true, // Format_Y416
true, // Format_AYUV
true, // Format_AUYV
true, // Format_Y410
true, // Format_400P
true, // Format_NV12
true, // Format_NV12_UnAligned
true, // Format_NV21
true, // Format_NV11
true, // Format_NV11_UnAligned
true, // Format_P208
true, // Format_P208_UnAligned
true, // Format_IMC1
true, // Format_IMC2
true, // Format_IMC3
true, // Format_IMC4
true, // Format_422H
true, // Format_422V
true, // Format_444P
true, // Format_411P
true, // Format_411R
true, // Format_I420
true, // Format_IYUV
true, // Format_YV12
true, // Format_YVU9
true, // Format_AI44 (YUV originally, palette may be converted to RGB)
true, // Format_IA44 (same as above)
false, // Format_P8 (using RGB since P8 is uncommon in FC)
false, // Format_A8P8 (same as above)
false, // Format_A8
false, // Format_L8
false, // Format_A4L4
false, // Format_A8L8
true, // Format_IRW0
true, // Format_IRW1
true, // Format_IRW2
true, // Format_IRW3
true, // Format_IRW4
true, // Format_IRW5
true, // Format_IRW6
true, // Format_IRW7
false, // Format_STMM
false, // Format_Buffer
false, // Format_Buffer_2D
false, // Format_V8U8
false, // Format_R32S
false, // Format_R8U
false, // Format_R8G8UN
false, // Format_R8G8SN
false, // Format_G8R8_G8B8
false, // Format_R16U
false, // Format_R16S
false, // Format_R16UN
false, // Format_RAW
false, // Format_Y8
false, // Format_Y1
false, // Format_Y16U
false, // Format_Y16S
false, // Format_L16
false, // Format_D16
false, // Format_R10G10B10A2
false, // Format_B10G10R10A2
true, // Format_P016
true, // Format_P010
true // Format_YV12_Planar
};
const float g_cCSC_sRGB_stRGB[12] =
{
0.858824f, 0.000000f, 0.000000f, 16.000000f, // stR = C0 * sR + C1 * sG + C2 * sB + C3
0.000000f, 0.858824f, 0.000000f, 16.000000f, // stG = C4 * sR + C5 * sG + C6 * sB + C7
0.000000f, 0.000000f, 0.858824f, 16.000000f // stB = C8 * sR + C9 * sG + C10 * sB + C11
};
const float g_cCSC_stRGB_sRGB[12] =
{
1.164384f, 0.000000f, 0.000000f, -18.630137f, // sR = C0 * stR + C1 * stG + C2 * stB + C3
0.000000f, 1.164384f, 0.000000f, -18.630137f, // sG = C4 * stR + C5 * stG + C6 * stB + C7
0.000000f, 0.000000f, 1.164384f, -18.630137f // sB = C8 * stR + C9 * stG + C10 * stB + C11
};
const float g_cCSC_Identity[12] =
{
1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f
};
const float g_cCSC_GrayIdentity[12] =
{
1.0f, 0.0f, 0.0f, 0.000f,
0.0f, 0.0f, 0.0f, 128.0f,
0.0f, 0.0f, 0.0f, 128.0f
};
// Generic RGB to YUV conversion matrix from BT.601 standard
const float g_cCSC_BT601_RGB_YUV[9] =
{
0.299000f, 0.587000f, 0.114000f,
-0.168736f, -0.331264f, 0.500000f,
0.500000f, -0.418688f, -0.081312f
};
// Generic RGB to YUV conversion matrix from BT.709 standard
const float g_cCSC_BT709_RGB_YUV[9] =
{
0.212600f, 0.715200f, 0.072200f,
-0.114572f, -0.385428f, 0.500000f,
0.500000f, -0.454153f, -0.045847f
};
// Generic YUV to RGB conversion matrix from BT.601 standard
const float g_cCSC_BT601_YUV_RGB[9] =
{
1.000000f, 0.000000f, 1.402000f,
1.000000f, -0.344136f, -0.714136f,
1.000000f, 1.772000f, 0.000000f
};
// Generic YUV to RGB conversion matrix from BT.709 standard
const float g_cCSC_BT709_YUV_RGB[9] =
{
1.000000f, 0.000000f, 1.574800f,
1.000000f, -0.187324f, -0.468124f,
1.000000f, 1.855600f, 0.000000f
};
// BT2020 RGB to Non-constant YUV conversion matrix from R-REC-BT.2020-1-201406-I!!PDF-E.pdf
const float g_cCSC_BT2020_RGB_YUV[9] =
{
0.262700f, 0.678000f, 0.059300f, // Y
-0.139630f, -0.360370f, 0.500000f, // U
0.500000f, -0.459786f, -0.040214f // V
};
// BT2020 Non-constant YUV to RGB conversion matrix from R-REC-BT.2020-1-201406-I!!PDF-E.pdf
const float g_cCSC_BT2020_YUV_RGB[9] =
{
1.000000f, 0.000000f, 1.474600f, //R
1.000000f, -0.164553f, -0.571353f, //G
1.000000f, 1.881400f, 0.000000f //B
};
// BT2020 YUV Limited Range to BT2020 RGB full range conversion matrix
const float g_cCSC_BT2020_LimitedYUV_RGB[9] =
{
1.164383f, 0.000000f, 1.678680f, // R
1.164383f, -0.187332f, -0.650421f, // G
1.164383f, 2.141769f, 0.000000f // B
};
// BT2020 RGB full range to BT2020 YUV Limited Range tconversion matrix
const float g_cCSC_BT2020_RGB_LimitedYUV[9] =
{
0.225617f, 0.582275f, 0.050934f, // Y
-0.122650f, -0.316559f, 0.439209f, // U
0.439209f, -0.403885f, -0.035324f // V
};
const char *g_cInit_ComponentNames[] =
{
IDR_VP_KERNEL_NAMES
};
#define FOLD_HASH(folded_hash, hash) \
{ \
folded_hash = (((hash) >> 8) ^ (hash)) & 0x00ff00ff; \
folded_hash = ((folded_hash >> 16) ^ folded_hash) & 0xff; \
} \
#if _DEBUG || EMUL || VPHAL_LIB
#ifndef VPHAL_LIB
#pragma warning( push )
#pragma warning( disable : 4996 )
#endif // VPHAL_LIB
const char *KernelDll_GetLayerString(Kdll_Layer layer)
{
switch (layer)
{
case Layer_Invalid : return _T("Invalid");
case Layer_None : return _T("None");
case Layer_Background : return _T("Background");
case Layer_MainVideo : return _T("Main Video");
case Layer_SubVideo : return _T("Sub-Video");
case Layer_SubPicture1 : return _T("Sub-Picture 1");
case Layer_SubPicture2 : return _T("Sub-Picture 2");
case Layer_SubPicture3 : return _T("Sub-Picture 3");
case Layer_SubPicture4 : return _T("Sub-Picture 4");
case Layer_Graphics : return _T("Graphics");
case Layer_RenderTarget: return _T("Render Target");
}
return nullptr;
}
const char *KernelDll_GetFormatString(MOS_FORMAT format)
{
switch (format)
{
case Format_Invalid : return _T("Invalid");
case Format_Source : return _T("Current layer");
case Format_RGB : return _T("RGB");
case Format_RGB32 : return _T("RGB32");
case Format_PA : return _T("PA");
case Format_PL2 : return _T("PL2");
case Format_PL3 : return _T("PL3");
case Format_PL3_RGB : return _T("PL3_RGB");
case Format_PAL : return _T("PAL");
case Format_None : return _T("None");
case Format_Any : return _T("Any");
case Format_A8R8G8B8 : return _T("ARGB");
case Format_X8R8G8B8 : return _T("RGB");
case Format_A8B8G8R8 : return _T("ABGR");
case Format_X8B8G8R8 : return _T("BGR");
case Format_A16B16G16R16: return _T("A16B16G16R16");
case Format_A16R16G16B16: return _T("A16R16G16B16");
case Format_R5G6B5 : return _T("RGB16");
case Format_R8G8B8 : return _T("RGB24");
case Format_R32U : return _T("R32U");
case Format_R32F : return _T("R32F");
case Format_RGBP : return _T("RGBP");
case Format_BGRP : return _T("BGRP");
case Format_YUY2 : return _T("YUY2");
case Format_YUYV : return _T("YUYV");
case Format_YVYU : return _T("YVYU");
case Format_UYVY : return _T("UYVY");
case Format_VYUY : return _T("VYUY");
case Format_Y416 : return _T("Y416");
case Format_AYUV : return _T("AYUV");
case Format_AUYV : return _T("AUYV");
case Format_400P : return _T("400P");
case Format_NV12 : return _T("NV12");
case Format_NV12_UnAligned: return _T("NV12_UnAligned");
case Format_NV21 : return _T("NV21");
case Format_NV11 : return _T("NV11");
case Format_NV11_UnAligned: return _T("NV11_UnAligned");
case Format_P208 : return _T("P208");
case Format_P208_UnAligned: return _T("P208_UnAligned");
case Format_IMC1 : return _T("IMC1");
case Format_IMC2 : return _T("IMC2");
case Format_IMC3 : return _T("IMC3");
case Format_IMC4 : return _T("IMC4");
case Format_422H : return _T("422H");
case Format_422V : return _T("422V");
case Format_444P : return _T("444P");
case Format_411P : return _T("411P");
case Format_411R : return _T("411R");
case Format_I420 : return _T("I420");
case Format_IYUV : return _T("IYUV");
case Format_YV12 : return _T("YV12");
case Format_YVU9 : return _T("YVU9");
case Format_AI44 : return _T("AI44");
case Format_IA44 : return _T("IA44");
case Format_P8 : return _T("P8");
case Format_A8P8 : return _T("A8P8");
case Format_A8 : return _T("A8");
case Format_L8 : return _T("L8");
case Format_A4L4 : return _T("A4L4");
case Format_A8L8 : return _T("A8L8");
case Format_IRW0 : return _T("IRW0");
case Format_IRW1 : return _T("IRW1");
case Format_IRW2 : return _T("IRW2");
case Format_IRW3 : return _T("IRW3");
case Format_IRW4 : return _T("IRW4");
case Format_IRW5 : return _T("IRW5");
case Format_IRW6 : return _T("IRW6");
case Format_IRW7 : return _T("IRW7");
case Format_STMM : return _T("STMM");
case Format_Buffer : return _T("Buffer");
case Format_Buffer_2D : return _T("Buffer_2D");
case Format_V8U8 : return _T("V8U8");
case Format_R32S : return _T("R32S");
case Format_R8U : return _T("RU8");
case Format_R8G8UN : return _T("R8G8UN");
case Format_R8G8SN : return _T("R8S8UN");
case Format_G8R8_G8B8 : return _T("G8R8_G8B8");
case Format_R16U : return _T("R16U");
case Format_R16S : return _T("R16S");
case Format_R16UN : return _T("R16UN");
case Format_RAW : return _T("RAW");
case Format_Y8 : return _T("Y8");
case Format_Y1 : return _T("Y1");
case Format_Y16U : return _T("Y16U");
case Format_Y16S : return _T("Y16S");
case Format_L16 : return _T("L16");
case Format_D16 : return _T("D16");
case Format_R10G10B10A2 : return _T("R10G10B10A2");
case Format_B10G10R10A2 : return _T("B10G10R10A2");
case Format_P016 : return _T("P016");
case Format_P010 : return _T("P010");
case Format_YV12_Planar : return _T("YV12_Planar");
default : return _T("Invalid format");
}
return nullptr;
}
const char *KernelDll_GetCSpaceString(VPHAL_CSPACE cspace)
{
switch (cspace)
{
case CSpace_None : return _T("None");
case CSpace_Source : return _T("Current layer");
case CSpace_RGB : return _T("RGB");
case CSpace_YUV : return _T("YUV");
case CSpace_Any : return _T("Any");
case CSpace_sRGB : return _T("sRGB");
case CSpace_BT601 : return _T("BT.601");
case CSpace_BT601_FullRange : return _T("BT.601_FullRange");
case CSpace_BT709 : return _T("BT.709");
case CSpace_BT709_FullRange : return _T("BT.709_FullRange");
case CSpace_xvYCC601 : return _T("xvYCC.601");
case CSpace_xvYCC709 : return _T("xvYCC.709");
case CSpace_BT601Gray : return _T("BT.601Gray");
case CSpace_BT601Gray_FullRange : return _T("BT.601Gray_FullRange");
default : return _T("Invalid cspace");
}
return nullptr;
}
const char *KernelDll_GetSamplingString(Kdll_Sampling sampling)
{
switch (sampling)
{
case Sample_None : return _T("No Sampling");
case Sample_Source : return _T("Current layer");
case Sample_Any : return _T("Any Sampling");
case Sample_Scaling_Any : return _T("Any Scale");
case Sample_Scaling : return _T("Scale");
case Sample_Scaling_034x : return _T("0.34x");
case Sample_Scaling_AVS : return _T("AVS");
case Sample_iScaling : return _T("iScale");
case Sample_iScaling_034x : return _T("0.34x iScaling");
case Sample_iScaling_AVS : return _T("iAVS");
}
return nullptr;
}
const char *KernelDll_GetRotationString(VPHAL_ROTATION rotation)
{
switch (rotation)
{
case VPHAL_ROTATION_IDENTITY : return _T("0");
case VPHAL_ROTATION_90 : return _T("90");
case VPHAL_ROTATION_180 : return _T("180");
case VPHAL_ROTATION_270 : return _T("270");
case VPHAL_MIRROR_HORIZONTAL : return _T("Horizontal");
case VPHAL_MIRROR_VERTICAL : return _T("Vertical");
case VPHAL_ROTATE_90_MIRROR_VERTICAL : return _T("90 Mirror Vertical");
case VPHAL_ROTATE_90_MIRROR_HORIZONTAL : return _T("90 Mirror Horizontal");
}
return nullptr;
}
const char *KernelDll_GetProcessString(Kdll_Processing process)
{
switch (process)
{
case Process_None : return _T("No processing");
case Process_Source : return _T("Current layer");
case Process_Any : return _T("Any processing");
case Process_Composite : return _T("Composite");
case Process_XORComposite: return _T("XOR Mono Composite");
case Process_CBlend : return _T("Const Blend");
case Process_SBlend : return _T("Source Blend");
case Process_SBlend_4bits: return _T("Source Blend 4-bits");
case Process_PBlend : return _T("Part Blend");
case Process_CSBlend : return _T("ConstSource Blend");
case Process_CPBlend : return _T("ConstPart Blend");
case Process_DI : return _T("DI");
case Process_DN : return _T("DN");
case Process_DNDI : return _T("DNDI");
}
return nullptr;
}
const char *KernelDll_GetParserStateString(Kdll_ParserState state)
{
switch (state)
{
case Parser_Invalid : return _T("Invalid");
case Parser_Begin : return _T("Begin");
case Parser_SetRenderMethod : return _T("SetRenderMethod");
case Parser_SetupLayer0 : return _T("SetupLayer0");
case Parser_SetupLayer1 : return _T("SetupLayer1");
case Parser_SetParamsLayer0 : return _T("SetParamsLayer0");
case Parser_SetParamsLayer1 : return _T("SetParamsLayer1");
case Parser_SetParamsTarget : return _T("SetParamsTarget");
case Parser_SampleLayer0 : return _T("SampleLayer0");
case Parser_SampleLayer0Mix : return _T("SampleLayer0Mix");
case Parser_SampleLayer0ColorFill : return _T("SampleLayer0ColorFill");
case Parser_RotateLayer0Check : return _T("SampleRotateLayer0Check");
case Parser_RotateLayer0 : return _T("SampleRotateLayer0");
case Parser_SampleLayer0Done : return _T("SampleLayer0Done");
case Parser_ShuffleLayer0 : return _T("ShuffleLayer0");
case Parser_SampleLayer1 : return _T("SampleLayer1");
case Parser_SampleLayer1Done : return _T("SampleLayer1Done");
case Parser_ShuffleLayer1 : return _T("ShuffleLayer1");
case Parser_SampleLayer0SelectCSC : return _T("SampleLayer0SelectCSC");
case Parser_SetupCSC0 : return _T("SetupCSC0");
case Parser_ExecuteCSC0 : return _T("ExecuteCSC0");
case Parser_ExecuteCSC0Done : return _T("ExecuteCSC0Done");
case Parser_SetupCSC1 : return _T("SetupCSC1");
case Parser_ExecuteCSC1 : return _T("ExecuteCSC1");
case Parser_ExecuteCSC1Done : return _T("ExecuteCSC1Done");
case Parser_Lumakey : return _T("LumaKey");
case Parser_ProcessLayer : return _T("ProcessLayer");
case Parser_ProcessLayerDone : return _T("ProcessLayerDone");
case Parser_DualOutput : return _T("DualOutput");
case Parser_Rotation : return _T("Rotation");
case Parser_DestSurfIndex : return _T("DestSurfIndex");
case Parser_Colorfill : return _T("Colorfill");
case Parser_WriteOutput : return _T("WriteOutput");
case Parser_End : return _T("End");
default : return _T("Invalid parser state");
}
return nullptr;
}
const char *KernelDll_GetRuleIDString(Kdll_RuleID RID)
{
switch (RID)
{
case RID_Op_EOF : return _T("EOF");
case RID_Op_NewEntry : return _T("NewEntry");
case RID_IsTargetCspace : return _T("IsTargetCspace");
case RID_IsLayerID : return _T("IsLayerID");
case RID_IsLayerFormat : return _T("IsLayerFormat");
case RID_IsParserState : return _T("IsParserState");
case RID_IsRenderMethod : return _T("IsRenderMethod");
case RID_IsShuffling : return _T("IsShuffling");
case RID_IsLayerRotation : return _T("IsLayerRotation");
case RID_IsSrc0Format : return _T("IsSrc0Format");
case RID_IsSrc0Sampling : return _T("IsSrc0Sampling");
case RID_IsSrc0ColorFill : return _T("IsSrc0ColorFill");
case RID_IsSrc0LumaKey : return _T("IsSrc0LumaKey");
case RID_IsSrc0Procamp : return _T("IsSrc0Procamp");
case RID_IsSrc0Rotation : return _T("IsSrc0Rotation");
case RID_IsSrc0Coeff : return _T("IsSrc0Coeff");
case RID_IsSrc0Processing : return _T("IsSrc0Processing");
case RID_IsSrc1Format : return _T("IsSrc1Format");
case RID_IsSrc1Sampling : return _T("IsSrc1Sampling");
case RID_IsSrc1LumaKey : return _T("IsSrc1LumaKey");
case RID_IsSrc1SamplerLumaKey: return _T("IsSrc1SamplerLumaKey");
case RID_IsSrc1Coeff : return _T("IsSrc1Coeff");
case RID_IsSrc1Processing : return _T("IsSrc1Processing");
case RID_IsLayerNumber : return _T("IsLayerNumber");
case RID_IsQuadrant : return _T("IsQuadrant");
case RID_IsCSCBeforeMix : return _T("IsCSCBeforeMix");
case RID_IsDualOutput : return _T("IsDualOutput");
case RID_IsRTRotate : return _T("IsRTRotate");
case RID_IsTargetFormat : return _T("IsTargetFormat");
case RID_Is64BSaveEnabled : return _T("Is64BSaveEnabled");
case RID_IsTargetTileType : return _T("IsTargetTileType");
case RID_IsProcampEnabled : return _T("IsProcampEnabled");
case RID_SetTargetCspace : return _T("SetTargetCspace");
case RID_SetParserState : return _T("SetParserState");
case RID_SetSrc0Format : return _T("SetSrc0Format");
case RID_SetSrc0Sampling : return _T("SetSrc0Sampling");
case RID_SetSrc0ColorFill : return _T("SetSrc0ColorFill");
case RID_SetSrc0LumaKey : return _T("SetSrc0LumaKey");
case RID_SetSrc0Rotation : return _T("SetSrc0Rotation");
case RID_SetSrc0Coeff : return _T("SetSrc0Coeff");
case RID_SetSrc0Processing : return _T("SetSrc0Processing");
case RID_SetSrc1Format : return _T("SetSrc1Format");
case RID_SetSrc1Sampling : return _T("SetSrc1Sampling");
case RID_SetSrc1Rotation : return _T("SetSrc1Rotation");
case RID_SetSrc1LumaKey : return _T("SetSrc1LumaKey");
case RID_SetSrc1SamplerLumaKey: return _T("SetSrc1SamplerLumaKey");
case RID_SetSrc1Procamp : return _T("SetSrc1Procamp");
case RID_SetSrc1Coeff : return _T("SetSrc1Coeff");
case RID_SetSrc1Processing : return _T("SetSrc1Processing");
case RID_SetKernel : return _T("SetKernel");
case RID_SetNextLayer : return _T("SetNextLayer");
case RID_SetPatchData : return _T("SetPatchData");
case RID_SetQuadrant : return _T("SetQuadrant");
case RID_SetCSCBeforeMix : return _T("SetCSCBeforeMix");
case RID_SetPatch : return _T("SetPatch");
case RID_IsSrc0Chromasiting : return _T("IsSrc0Chromasiting");
case RID_IsSrc1Procamp : return _T("IsSrc1Procamp");
case RID_IsSrc1Chromasiting : return _T("IsSrc1Chromasiting");
case RID_IsSetCoeffMode : return _T("IsSetCoeffMode");
case RID_IsConstOutAlpha : return _T("IsConstOutAlpha");
case RID_IsDitherNeeded : return _T("IsDitherNeeded");
case RID_IsScalingRatio : return _T("IsScalingRatio");
case RID_SetSrc0Procamp : return _T("SetSrc0Procamp");
}
return nullptr;
}
const char *KernelDll_GetCoeffIDString(Kdll_CoeffID CID)
{
switch (CID)
{
case CoeffID_Src0 : return _T("Src0 coeff");
case CoeffID_Src1 : return _T("Src1 coeff");
case CoeffID_Source : return _T("Current layer");
case CoeffID_Any : return _T("Any coeff");
case CoeffID_None : return _T("No coeff");
case CoeffID_0 : return _T("Coeff 0");
case CoeffID_1 : return _T("Coeff 1");
case CoeffID_2 : return _T("Coeff 2");
case CoeffID_3 : return _T("Coeff 3");
case CoeffID_4 : return _T("Coeff 4");
case CoeffID_5 : return _T("Coeff 5");
}
return nullptr;
}
const char *KernelDll_GetShuffleString(Kdll_Shuffling shuffling)
{
switch (shuffling)
{
case Shuffle_None : return _T("None");
case Shuffle_Any : return _T("Any");
case Shuffle_All_8x8_Layer : return _T("All 8x8 Layer");
case Shuffle_RenderTarget : return _T("Render Target");
}
return nullptr;
}
int32_t KernelDll_PrintRule(
char *szOut,
int32_t iSize,
const Kdll_RuleEntry *pEntry,
Kdll_KernelCache *pCache)
{
char data[32];
int32_t increment = 1;
const char *szRID = KernelDll_GetRuleIDString(pEntry->id);
const char *szValue = nullptr;
switch (pEntry->id)
{
case RID_IsParserState :
case RID_SetParserState :
szValue = KernelDll_GetParserStateString((Kdll_ParserState) pEntry->value);
break;
case RID_IsLayerID :
szValue = KernelDll_GetLayerString((Kdll_Layer) pEntry->value);
break;
case RID_Op_NewEntry :
case RID_Op_EOF :
case RID_IsLayerNumber :
case RID_IsQuadrant :
case RID_SetQuadrant :
case RID_SetPatchData :
_stprintf(data, _T("%d"), pEntry->value);
szValue = data;
break;
case RID_IsSrc0ColorFill :
case RID_SetSrc0ColorFill :
if (pEntry->value == ColorFill_Source)
{
szValue = _T("Current Layer");
}
else if (pEntry->value)
{
szValue = _T("TRUE");
}
else
{
szValue = _T("FALSE");
}
break;
case RID_IsSrc0LumaKey :
case RID_IsSrc1LumaKey :
case RID_SetSrc0LumaKey :
case RID_SetSrc1LumaKey :
case RID_IsSrc1SamplerLumaKey :
case RID_SetSrc1SamplerLumaKey :
if (pEntry->value == LumaKey_Source)
{
szValue = _T("Current Layer");
}
else if (pEntry->value)
{
szValue = _T("TRUE");
}
else
{
szValue = _T("FALSE");
}
break;
case RID_IsSrc0Procamp :
case RID_IsSrc1Procamp :
case RID_SetSrc0Procamp :
case RID_SetSrc1Procamp :
if (pEntry->value == Procamp_Source)
{
szValue = _T("Current Layer");
}
else if (pEntry->value == DL_PROCAMP_DISABLED)
{
szValue = _T("FALSE");
}
else
{
szValue = _T("TRUE");
}
break;
case RID_IsSrc0Chromasiting :
case RID_IsSrc1Chromasiting :
if (pEntry->value == DL_CHROMASITING_DISABLE)
{
szValue = _T("FALSE");
}
else
{
szValue = _T("TRUE");
}
break;
case RID_SetKernel:
if (pCache && (pEntry->value < pCache->iCacheEntries))
{
szValue = pCache->pCacheEntries[pEntry->value].szName;
}
else
{
_stprintf(data, _T("%d"), pEntry->value);
szValue = data;
}
break;
case RID_SetNextLayer:
if (pEntry->value == 0)
{
szValue = _T("Next Layer");
}
else if (pEntry->value == -1)
{
szValue = _T("Previous Layer");
}
else if (pEntry->value == 2)
{
szValue = _T("Target Layer");
}
else if (pEntry->value == -2)
{
szValue = _T("Main Layer");
}
break;
case RID_SetPatch :
_stprintf(data, _T("%d patch entries"), pEntry->value);
increment += pEntry->value;
szValue = data;
break;
case RID_IsLayerFormat :
case RID_IsSrc0Format :
case RID_IsSrc1Format :
case RID_SetSrc0Format :
case RID_SetSrc1Format :
case RID_IsTargetFormat :
szValue = KernelDll_GetFormatString((MOS_FORMAT ) pEntry->value);
break;
case RID_IsTargetCspace :
case RID_SetTargetCspace :
szValue = KernelDll_GetCSpaceString((VPHAL_CSPACE) pEntry->value);
break;
case RID_IsSrc0Sampling :
case RID_IsSrc1Sampling :
case RID_SetSrc0Sampling :
case RID_SetSrc1Sampling :
szValue = KernelDll_GetSamplingString((Kdll_Sampling) pEntry->value);
break;
case RID_IsSrc0Rotation :
szValue = KernelDll_GetRotationString((VPHAL_ROTATION) pEntry->value);
break;
case RID_IsShuffling :
szValue = KernelDll_GetShuffleString((Kdll_Shuffling) pEntry->value);
break;
case RID_IsSrc0Coeff :
case RID_IsSrc1Coeff :
case RID_SetSrc0Coeff :
case RID_SetSrc1Coeff :
szValue = KernelDll_GetCoeffIDString((Kdll_CoeffID) pEntry->value);
break;
case RID_IsSrc1Processing :
case RID_SetSrc1Processing :
szValue = KernelDll_GetProcessString((Kdll_Processing) pEntry->value);
break;
case RID_IsCSCBeforeMix :
case RID_SetCSCBeforeMix :
if (pEntry->value)
{
szValue = _T("TRUE");
}
else
{
szValue = _T("FALSE");
}
break;
case RID_Is64BSaveEnabled :
if (pEntry->value)
{
szValue = _T("TRUE");
}
else
{
szValue = _T("FALSE");
}
break;
default:
break;
}
if (szRID && szValue)
{
_sntprintf(szOut, iSize, _T("%-22s %s"), szRID, szValue);
}
else if (szRID)
{
_sntprintf(szOut, iSize, _T("%-22s"), szRID);
}
return increment;
}
#ifndef VPHAL_LIB
#pragma warning( pop )
#endif // !VPHAL_LIB
#endif // _DEBUG || EMUL || VPHAL_LIB
/*----------------------------------------------------------------------------
| Name : KernelDll_IsSameFormatType
| Purpose : Check if 2 formats are similar
|
| Input : Internal Format
|
| Return : FourCC format
\---------------------------------------------------------------------------*/
bool KernelDll_IsSameFormatType(MOS_FORMAT format1, MOS_FORMAT format2)
{
int32_t group1, group2;
switch (format1)
{
CASE_PA_FORMAT:
group1 = 1;
break;
CASE_PL2_FORMAT:
group1 = 2;
break;
CASE_PL3_FORMAT:
group1 = 3;
break;
CASE_RGB_FORMAT:
group1 = 4;
break;
default:
group1 = 0;
break;
}
switch (format2)
{
CASE_PA_FORMAT:
group2 = 1;
break;
CASE_PL2_FORMAT:
group2 = 2;
break;
CASE_PL3_FORMAT:
group2 = 3;
break;
CASE_RGB_FORMAT:
group2 = 4;
break;
default:
group2 = 0;
break;
}
if (group1 == group2)
{
return true;
}
else
{
return false;
}
}
bool KernelDll_IsYUVFormat(MOS_FORMAT format)
{
if (format >= Format_Any && format < Format_Count)
{
return g_cIsFormatYUV[format];
}
else
{
return false;
}
}
bool KernelDll_IsCspace(VPHAL_CSPACE cspace, VPHAL_CSPACE match)
{
switch (match)
{
case CSpace_RGB:
return (cspace == CSpace_sRGB ||
cspace == CSpace_stRGB);
case CSpace_YUV:
return (cspace == CSpace_BT709 ||
cspace == CSpace_BT601 ||
cspace == CSpace_BT601_FullRange ||
cspace == CSpace_BT709_FullRange ||
cspace == CSpace_xvYCC709 ||
cspace == CSpace_xvYCC601);
case CSpace_Gray:
return (cspace == CSpace_BT601Gray ||
cspace == CSpace_BT601Gray_FullRange);
case CSpace_Any:
return (cspace != CSpace_None);
case CSpace_BT2020:
return (cspace == CSpace_BT2020 ||
cspace == CSpace_BT2020_FullRange);
case CSpace_BT2020_RGB:
return (cspace == CSpace_BT2020_RGB ||
cspace == CSpace_BT2020_stRGB);
default:
return (cspace == match);
}
return false;
}
bool KernelDll_IsFormat(
MOS_FORMAT format,
VPHAL_CSPACE cspace,
MOS_FORMAT match)
{
switch (match)
{
case Format_Any:
return (format != Format_None);
break;
case Format_RGB_Swap:
return (IS_RGB_SWAP(format));
case Format_RGB_No_Swap:
return (IS_RGB_NO_SWAP(format));
case Format_RGB:
if (IS_PAL_FORMAT(format))
{
return (KernelDll_IsCspace(cspace, CSpace_RGB));
}
else
{
return (IS_RGB_FORMAT(format) && !IS_PL3_RGB_FORMAT(format));
}
case Format_RGB32:
return (IS_RGB32_FORMAT(format));
case Format_PA:
if (IS_PAL_FORMAT(format))
{
return (KernelDll_IsCspace(cspace, CSpace_YUV));
}
else
{
return (IS_PA_FORMAT(format) ||
format == Format_AUYV);
}
case Format_PL2:
return (IS_PL2_FORMAT(format));
case Format_PL2_UnAligned:
return (IS_PL2_FORMAT_UnAligned(format));
case Format_PL3:
return (IS_PL3_FORMAT(format));
case Format_PL3_RGB:
return (IS_PL3_RGB_FORMAT(format));
case Format_AYUV:
return (format == Format_AYUV);
case Format_PAL:
return (IS_PAL_FORMAT(format));
default:
return (format == match);
}
return false;
}
/*----------------------------------------------------------------------------
| Purpose : Group common color spaces into one
| Returns : Return the representative color space of the group
\---------------------------------------------------------------------------*/
VPHAL_CSPACE KernelDll_TranslateCspace(VPHAL_CSPACE cspace)
{
switch (cspace)
{
case CSpace_BT709:
case CSpace_xvYCC709:
return CSpace_BT709;
case CSpace_BT601:
case CSpace_xvYCC601:
return CSpace_BT601;
case CSpace_BT601_FullRange:
return CSpace_BT601_FullRange;
case CSpace_BT709_FullRange:
return CSpace_BT709_FullRange;
case CSpace_RGB:
case CSpace_sRGB:
return CSpace_sRGB;
case CSpace_stRGB:
return CSpace_stRGB;
case CSpace_Gray:
case CSpace_BT601Gray:
return CSpace_BT601Gray;
case CSpace_BT601Gray_FullRange:
return CSpace_BT601Gray_FullRange;
case CSpace_BT2020:
return CSpace_BT2020;
case CSpace_BT2020_FullRange:
return CSpace_BT2020_FullRange;
case CSpace_BT2020_RGB:
return CSpace_BT2020_RGB;
case CSpace_BT2020_stRGB:
return CSpace_BT2020_stRGB;
default:
return CSpace_None;
}
}
/*----------------------------------------------------------------------------
| Name : KernelDll_GetYuvRangeAndOffset
| Purpose : Get the YUV offset and excursion for the input color space
| Return : true if success else false
\---------------------------------------------------------------------------*/
bool KernelDll_GetYuvRangeAndOffset(
Kdll_CSpace cspace,
float *pLumaOffset,
float *pLumaExcursion,
float *pChromaZero,
float *pChromaExcursion)
{
bool res = true;
switch (cspace)
{
case CSpace_BT601_FullRange:
case CSpace_BT709_FullRange:
case CSpace_BT601Gray_FullRange:
case CSpace_BT2020_FullRange:
*pLumaOffset = 0.0f;
*pLumaExcursion = 255.0f;
*pChromaZero = 128.0f;
*pChromaExcursion = 255.0f;
break;
case CSpace_BT601:
case CSpace_BT709:
case CSpace_xvYCC601: // since matrix is the same as 601, use the same range
case CSpace_xvYCC709: // since matrix is the same as 709, use the same range
case CSpace_BT601Gray:
case CSpace_BT2020:
*pLumaOffset = 16.0f;
*pLumaExcursion = 219.0f;
*pChromaZero = 128.0f;
*pChromaExcursion = 224.0f;
break;
default:
res = false;
break;
}
return res;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_GetRgbRangeAndOffset
| Purpose : Get the RGB offset and excursion for the input color space
| Return : true if success else false
\---------------------------------------------------------------------------*/
bool KernelDll_GetRgbRangeAndOffset(
Kdll_CSpace cspace,
float *pRgbOffset,
float *pRgbExcursion)
{
bool res = true;
switch (cspace)
{
case CSpace_sRGB:
case CSpace_BT2020_RGB:
*pRgbOffset = 0.0f;
*pRgbExcursion = 255.0f;
break;
case CSpace_stRGB:
case CSpace_BT2020_stRGB:
*pRgbOffset = 16.0f;
*pRgbExcursion = 219.0f;
break;
default:
res = false;
break;
}
return res;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_CalcYuvToRgbMatrix
| Purpose : Given the YUV->RGB transfer matrix, get the final matrix after
| applying offsets and excursions.
|
| [R'] [R_o] [R_e/Y_e 0 0 ] [Y' - Y_o]
| [G'] = [R_o] + [YUVtoRGBCoeff (3x3 matrix)]. [ 0 R_e/C_e 0 ]. [Cb' - C_z]
| [B'] [R_o] [ 0 0 R_e/C_e]. [Cr' - C_z]
|
| [R'] = [C0 C1 C2] [Y' ] [C3] {Out pMatrix}
| [G'] = [C4 C5 C6].[Cb'] + [C7]
| [B'] = [C8 C9 C10] [Cr'] + [C11]
|
| Return : true if success else false
\---------------------------------------------------------------------------*/
bool KernelDll_CalcYuvToRgbMatrix(
Kdll_CSpace src, // [in] YUV Color space
Kdll_CSpace dst, // [in] RGB Color space
float *pTransferMatrix, // [in] Transfer matrix (3x3)
float *pOutMatrix) // [out] Conversion matrix (3x4)
{
bool res;
float Y_o, Y_e, C_z, C_e;
float R_o, R_e;
res = true;
res = KernelDll_GetRgbRangeAndOffset(dst, &R_o, &R_e);
if (res == false)
{
goto finish;
}
res = KernelDll_GetYuvRangeAndOffset(src, &Y_o, &Y_e, &C_z, &C_e);
if (res == false)
{
goto finish;
}
// after + (3x3)(3x3)
pOutMatrix[0] = pTransferMatrix[0] * R_e / Y_e;
pOutMatrix[4] = pTransferMatrix[3] * R_e / Y_e;
pOutMatrix[8] = pTransferMatrix[6] * R_e / Y_e;
pOutMatrix[1] = pTransferMatrix[1] * R_e / C_e;
pOutMatrix[5] = pTransferMatrix[4] * R_e / C_e;
pOutMatrix[9] = pTransferMatrix[7] * R_e / C_e;
pOutMatrix[2] = pTransferMatrix[2] * R_e / C_e;
pOutMatrix[6] = pTransferMatrix[5] * R_e / C_e;
pOutMatrix[10] = pTransferMatrix[8] * R_e / C_e;
// (3x1) - (3x3)(3x3)(3x1)
pOutMatrix[3] = R_o - (pOutMatrix[0] * Y_o + pOutMatrix[1] * C_z + pOutMatrix[2] * C_z);
pOutMatrix[7] = R_o - (pOutMatrix[4] * Y_o + pOutMatrix[5] * C_z + pOutMatrix[6] * C_z);
pOutMatrix[11] = R_o - (pOutMatrix[8] * Y_o + pOutMatrix[9] * C_z + pOutMatrix[10] * C_z);
finish:
return res;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_CalcRgbToYuvMatrix
| Purpose : Given the RGB->YUV transfer matrix, get the final matrix after
| applying offsets and excursions.
|
| [Y' ] [Y_o - Y_e.R_o/R_e] [Y_e/R_e 0 0 ] [ RGB to YUV ] [R']
| [Cb'] = [C_z] + [ 0 C_e/R_e 0 ]. [Transfer matrix]. [G']
| [Cr'] [C_z] [ 0 0 C_e/R_e] [ 3x3 matrix ] [B']
|
| [Y' ] = [C0 C1 C2] [R'] [C3] {Out pMatrix}
| [Cb'] = [C4 C5 C6].[G'] + [C7]
| [Cr'] = [C8 C9 C10] [B'] + [C11]
|
| Return : true if success else false
\---------------------------------------------------------------------------*/
bool KernelDll_CalcRgbToYuvMatrix(
Kdll_CSpace src, // [in] RGB Color space
Kdll_CSpace dst, // [in] YUV Color space
float *pTransferMatrix, // [in] Transfer matrix (3x3)
float *pOutMatrix) // [out] Conversion matrix (3x4)
{
bool res;
float Y_o, Y_e, C_z, C_e;
float R_o, R_e;
res = true;
res = KernelDll_GetRgbRangeAndOffset(src, &R_o, &R_e);
if (res == false)
{
goto finish;
}
res = KernelDll_GetYuvRangeAndOffset(dst, &Y_o, &Y_e, &C_z, &C_e);
if (res == false)
{
goto finish;
}
// multiplication of + onwards
pOutMatrix[0] = pTransferMatrix[0] * Y_e / R_e;
pOutMatrix[1] = pTransferMatrix[1] * Y_e / R_e;
pOutMatrix[2] = pTransferMatrix[2] * Y_e / R_e;
pOutMatrix[4] = pTransferMatrix[3] * C_e / R_e;
pOutMatrix[5] = pTransferMatrix[4] * C_e / R_e;
pOutMatrix[6] = pTransferMatrix[5] * C_e / R_e;
pOutMatrix[8] = pTransferMatrix[6] * C_e / R_e;
pOutMatrix[9] = pTransferMatrix[7] * C_e / R_e;
pOutMatrix[10] = pTransferMatrix[8] * C_e / R_e;
// before +
pOutMatrix[3] = Y_o - Y_e * R_o / R_e;
pOutMatrix[7] = C_z;
pOutMatrix[11] = C_z;
finish:
return res;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_CalcGrayCoeffs
| Purpose : Given CSC matrix, calculate the new matrix making Chroma zero.
| Chroma will be read from the surface, but we need to factor in C_z
| by adjusting this in the constant.
|
| [R'] = [C0 C1 C2] [Y' ] [C3] {Out pMatrix}
| [G'] = [C4 C5 C6].[C_z] + [C7]
| [B'] = [C8 C9 C10] [C_z] [C11]
|
| New C3 = C1 * C_z + C2 * C_z + C3
|
| Return : true if success else false
\---------------------------------------------------------------------------*/
bool KernelDll_CalcGrayCoeffs(
Kdll_CSpace src, // [in] YUV source Color space
float *pMatrix) // [in/out] Conversion matrix (3x4)
{
float Y_o, Y_e, C_z, C_e;
bool res;
res = true;
res = KernelDll_GetYuvRangeAndOffset(src, &Y_o, &Y_e, &C_z, &C_e);
if (res == false)
{
goto finish;
}
// Calculate the constant offset by factoring in C_z
pMatrix[3] = pMatrix[1] * C_z + pMatrix[2] * C_z + pMatrix[3];
pMatrix[7] = pMatrix[5] * C_z + pMatrix[6] * C_z + pMatrix[7];
pMatrix[11] = pMatrix[9] * C_z + pMatrix[10] * C_z + pMatrix[11];
// Nullify the effect of chroma read
pMatrix[1] = pMatrix[2] = 0;
pMatrix[5] = pMatrix[6] = 0;
pMatrix[9] = pMatrix[10] = 0;
finish:
return res;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_3x3MatrixProduct
| Purpose : Given two [3x4] input matrices, calculate [3x3]x[3x3] ignoring
| the last column in both inputs
| Return : none
\---------------------------------------------------------------------------*/
void KernelDll_3x3MatrixProduct(
float *dest,
const float *m1,
const float *m2)
{
dest[ 0] = m1[0] * m2[0] + m1[1] * m2[4] + m1[ 2] * m2[ 8];
dest[ 1] = m1[0] * m2[1] + m1[1] * m2[5] + m1[ 2] * m2[ 9];
dest[ 2] = m1[0] * m2[2] + m1[1] * m2[6] + m1[ 2] * m2[10];
dest[ 4] = m1[4] * m2[0] + m1[5] * m2[4] + m1[ 6] * m2[ 8];
dest[ 5] = m1[4] * m2[1] + m1[5] * m2[5] + m1[ 6] * m2[ 9];
dest[ 6] = m1[4] * m2[2] + m1[5] * m2[6] + m1[ 6] * m2[10];
dest[ 8] = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[ 8];
dest[ 9] = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[ 9];
dest[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10];
}
/*----------------------------------------------------------------------------
| Name : KernelDll_CalcYuvToYuvMatrix
| Purpose : Calculate the matrix equation for converting b/w YUV color spaces.
| 1. Get conversion matrix from Source YUV to sRGB
| 2. Get conversion matrix from sRGB to Destination YUV
| 3. Apply the transformation below to get the final matrix
|
| [Y'dst] = [C0 C1 C2] [C0 C1 C2][Y'src] [C0 C1 C2] [C3] [C3]
| [U'] = [C4 C5 C6].[C4 C5 C6][C_z] + [C4 C5 C6].[C7] + [C7]
| [V'] = [C8 C9 C10] [C8 C9 C10][C_z] [C8 C9 C10] [C11] [C11]
| dst matrix src matrix dst matrix src dst
|
| [Y'dst] = [C0 C1 C2] [Y'src] [C3] {Out pMatrix}
| [U'] = [C4 C5 C6].[C_z] + [C7]
| [V'] = [C8 C9 C10] [C_z] [C11]
|
| Return : true if success else false
\---------------------------------------------------------------------------*/
bool KernelDll_CalcYuvToYuvMatrix(
Kdll_CSpace src, // [in] RGB Color space
Kdll_CSpace dst, // [in] YUV Color space
float *pOutMatrix) // [out] Conversion matrix (3x4)
{
float fYuvToRgb[12] = {0};
float fRgbToYuv[12] = {0};
bool res;
res = true;
// 1. Get conversion matrix from Source YUV to sRGB
if (IS_BT601_CSPACE(src))
{
res = KernelDll_CalcYuvToRgbMatrix(src, CSpace_sRGB, (float *) g_cCSC_BT601_YUV_RGB, fYuvToRgb);
}
else
{
res = KernelDll_CalcYuvToRgbMatrix(src, CSpace_sRGB, (float *) g_cCSC_BT709_YUV_RGB, fYuvToRgb);
}
if (res == false)
{
goto finish;
}
// 2. Get conversion matrix from sRGB to Destination YUV
if (IS_BT601_CSPACE(dst))
{
res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *) g_cCSC_BT601_RGB_YUV, fRgbToYuv);
}
else
{
res = KernelDll_CalcRgbToYuvMatrix(CSpace_sRGB, dst, (float *) g_cCSC_BT709_RGB_YUV, fRgbToYuv);
}
if (res == false)
{
goto finish;
}
// 3. Multiply the 2 matrices above
KernelDll_3x3MatrixProduct(pOutMatrix, fRgbToYuv, fYuvToRgb);
// Perform [3x3][3x1] matrix multiply + [3x1] matrix
pOutMatrix[3] = fRgbToYuv[0] * fYuvToRgb[3] + fRgbToYuv[1] * fYuvToRgb[7] +
fRgbToYuv[2] * fYuvToRgb[11] + fRgbToYuv[3];
pOutMatrix[7] = fRgbToYuv[4] * fYuvToRgb[3] + fRgbToYuv[5] * fYuvToRgb[7] +
fRgbToYuv[6] * fYuvToRgb[11] + fRgbToYuv[7];
pOutMatrix[11] = fRgbToYuv[8] * fYuvToRgb[3] + fRgbToYuv[9] * fYuvToRgb[7] +
fRgbToYuv[10]* fYuvToRgb[11] + fRgbToYuv[11];
finish:
return res;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_GetCSCMatrix
| Purpose : Get the required matrix for the given CSC conversion
| Return :
\---------------------------------------------------------------------------*/
void KernelDll_GetCSCMatrix(
Kdll_CSpace src, // [in] Source Color space
Kdll_CSpace dst, // [in] Destination Color space
float *pCSC_Matrix) // [out] CSC matrix to use
{
bool bMatrix;
bool bSrcGray;
Kdll_CSpace temp;
int32_t i;
bMatrix = false;
bSrcGray = KernelDll_IsCspace(src, CSpace_Gray);
// convert gray color spaces to its equivalent non-gray cpsace
switch(src)
{
case CSpace_BT601Gray:
temp = CSpace_BT601;
break;
case CSpace_BT601Gray_FullRange:
temp = CSpace_BT601_FullRange;
break;
default:
temp = src;
break;
}
// BT601/709 YUV to sRGB/stRGB conversion
if (KernelDll_IsCspace(temp, CSpace_YUV) || KernelDll_IsCspace(temp, CSpace_Gray))
{
if(KernelDll_IsCspace(dst, CSpace_RGB))
{
if (IS_BT601_CSPACE(temp))
{
KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *) g_cCSC_BT601_YUV_RGB, pCSC_Matrix);
bMatrix = true;
}
else // if (IS_BT709_CSPACE(temp))
{
KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *) g_cCSC_BT709_YUV_RGB, pCSC_Matrix);
bMatrix = true;
}
}
}
// sRGB/stRGB to BT601/709 YUV conversion
else if (KernelDll_IsCspace(temp, CSpace_RGB))
{
if (KernelDll_IsCspace(dst, CSpace_YUV))
{
if (IS_BT601_CSPACE(dst))
{
KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *) g_cCSC_BT601_RGB_YUV, pCSC_Matrix);
bMatrix = true;
}
else // if (IS_BT709_CSPACE(temp))
{
KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *) g_cCSC_BT709_RGB_YUV, pCSC_Matrix);
bMatrix = true;
}
}
}
// BT2020 YUV to RGB conversion
else if (KernelDll_IsCspace(temp, CSpace_BT2020))
{
if (KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
{
KernelDll_CalcYuvToRgbMatrix(temp, dst, (float *)g_cCSC_BT2020_YUV_RGB, pCSC_Matrix);
bMatrix = true;
}
}
// BT2020 RGB to YUV conversion
else if (KernelDll_IsCspace(temp, CSpace_BT2020_RGB))
{
if (KernelDll_IsCspace(dst, CSpace_BT2020))
{
KernelDll_CalcRgbToYuvMatrix(temp, dst, (float *)g_cCSC_BT2020_RGB_YUV, pCSC_Matrix);
bMatrix = true;
}
}
// If matrix has not been derived yet, its one of the below special cases
if (!bMatrix)
{
if (temp == dst) // Check if its identity matrix
{
MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_Identity), (void *)g_cCSC_Identity, sizeof(g_cCSC_Identity));
}
else if (KernelDll_IsCspace(temp, CSpace_RGB)) // sRGB to stRGB inter-conversions
{
if (temp == CSpace_sRGB)
{
MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_sRGB_stRGB), (void *)g_cCSC_sRGB_stRGB, sizeof(g_cCSC_sRGB_stRGB));
}
else //temp == CSpace_stRGB
{
MOS_SecureMemcpy(pCSC_Matrix, sizeof(g_cCSC_stRGB_sRGB), (void *)g_cCSC_stRGB_sRGB, sizeof(g_cCSC_stRGB_sRGB));
}
}
else if (KernelDll_IsCspace(temp, CSpace_YUV)) // 601 to 709 inter-conversions
{
KernelDll_CalcYuvToYuvMatrix(temp, dst, pCSC_Matrix);
}
else
{
VPHAL_RENDER_ASSERTMESSAGE("Not supported color space conversion(from %d to %d)", src, dst);
}
}
// Calculate the Gray transformation matrix now
if (bSrcGray)
{
KernelDll_CalcGrayCoeffs(src, pCSC_Matrix);
}
VPHAL_RENDER_NORMALMESSAGE("");
for(i = 0; i < 3; i++)
{
VPHAL_RENDER_NORMALMESSAGE("%f\t%f\t%f\t%f",
pCSC_Matrix[4 * i],
pCSC_Matrix[4 * i + 1],
pCSC_Matrix[4 * i + 2],
pCSC_Matrix[4 * i + 3]);
}
}
void KernelDll_MatrixProduct(
float *dest,
const float *m1,
const float *m2)
{
bool save;
float temp[12];
// setup temp matrix to allow the following operations:
// dest = dest * m2
// dest = m1 * dest
// dest = dest * dest
save = (m1 == dest) || (m2 == dest);
m1 = (m1 == dest) ? temp : m1;
m2 = (m2 == dest) ? temp : m2;
if (save) MOS_SecureMemcpy(temp, sizeof(temp), (void *)dest, sizeof(temp));
// Multiply the matrices
dest[ 0] = m1[0] * m2[0] + m1[1] * m2[4] + m1[ 2] * m2[ 8];
dest[ 1] = m1[0] * m2[1] + m1[1] * m2[5] + m1[ 2] * m2[ 9];
dest[ 2] = m1[0] * m2[2] + m1[1] * m2[6] + m1[ 2] * m2[10];
dest[ 3] = m1[0] * m2[3] + m1[1] * m2[7] + m1[ 2] * m2[11] + m1[3];
dest[ 4] = m1[4] * m2[0] + m1[5] * m2[4] + m1[ 6] * m2[ 8];
dest[ 5] = m1[4] * m2[1] + m1[5] * m2[5] + m1[ 6] * m2[ 9];
dest[ 6] = m1[4] * m2[2] + m1[5] * m2[6] + m1[ 6] * m2[10];
dest[ 7] = m1[4] * m2[3] + m1[5] * m2[7] + m1[ 6] * m2[11] + m1[7];
dest[ 8] = m1[8] * m2[0] + m1[9] * m2[4] + m1[10] * m2[ 8];
dest[ 9] = m1[8] * m2[1] + m1[9] * m2[5] + m1[10] * m2[ 9];
dest[10] = m1[8] * m2[2] + m1[9] * m2[6] + m1[10] * m2[10];
dest[11] = m1[8] * m2[3] + m1[9] * m2[7] + m1[10] * m2[11] + m1[11];
}
bool KernelDll_MapCSCMatrix(
Kdll_CSCType csctype,
const float *matrix,
short *coeff)
{
// Unified kernel architecture requires that the color space
// conversion coefficients programmed in specific orders, depends on the
// type of the color space conversion.
//
// M (matrix) ---> C (coeff)
switch (csctype)
{
case CSC_YUV_RGB:
// direct mapping from matrix to coeff
coeff[ 0] = FLOAT_TO_SHORT(matrix[0] ); // M0 --> C0
coeff[ 1] = FLOAT_TO_SHORT(matrix[1] ); // M1 --> C1
coeff[ 2] = FLOAT_TO_SHORT(matrix[2] ); // M2 --> C2
coeff[ 3] = FLOAT_TO_SHORT(matrix[3] ); // M3 --> C3
coeff[ 4] = FLOAT_TO_SHORT(matrix[4] ); // M4 --> C4
coeff[ 5] = FLOAT_TO_SHORT(matrix[5] ); // M5 --> C5
coeff[ 6] = FLOAT_TO_SHORT(matrix[6] ); // M6 --> C6
coeff[ 7] = FLOAT_TO_SHORT(matrix[7] ); // M7 --> C7
coeff[ 8] = FLOAT_TO_SHORT(matrix[8] ); // M8 --> C8
coeff[ 9] = FLOAT_TO_SHORT(matrix[9] ); // M9 --> C9
coeff[10] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C10
coeff[11] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C11
break;
case CSC_RGB_YUV:
coeff[ 6] = FLOAT_TO_SHORT(matrix[0] ); // M0 --> C6
coeff[ 4] = FLOAT_TO_SHORT(matrix[1] ); // M1 --> C4
coeff[ 5] = FLOAT_TO_SHORT(matrix[2] ); // M2 --> C5
coeff[ 7] = FLOAT_TO_SHORT(matrix[3] ); // M3 --> C7
coeff[10] = FLOAT_TO_SHORT(matrix[4] ); // M4 --> C10
coeff[ 8] = FLOAT_TO_SHORT(matrix[5] ); // M5 --> C8
coeff[ 9] = FLOAT_TO_SHORT(matrix[6] ); // M6 --> C9
coeff[11] = FLOAT_TO_SHORT(matrix[7] ); // M7 --> C11
coeff[ 2] = FLOAT_TO_SHORT(matrix[8] ); // M8 --> C2
coeff[ 0] = FLOAT_TO_SHORT(matrix[9] ); // M9 --> C0
coeff[ 1] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C1
coeff[ 3] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C3
break;
case CSC_YUV_YUV:
coeff[ 4] = FLOAT_TO_SHORT(matrix[0] ); // M0 --> C4
coeff[ 5] = FLOAT_TO_SHORT(matrix[1] ); // M1 --> C5
coeff[ 6] = FLOAT_TO_SHORT(matrix[2] ); // M2 --> C6
coeff[ 7] = FLOAT_TO_SHORT(matrix[3] ); // M3 --> C7
coeff[ 8] = FLOAT_TO_SHORT(matrix[4] ); // M4 --> C8
coeff[ 9] = FLOAT_TO_SHORT(matrix[5] ); // M5 --> C9
coeff[10] = FLOAT_TO_SHORT(matrix[6] ); // M6 --> C10
coeff[11] = FLOAT_TO_SHORT(matrix[7] ); // M7 --> C11
coeff[ 0] = FLOAT_TO_SHORT(matrix[8] ); // M8 --> C0
coeff[ 1] = FLOAT_TO_SHORT(matrix[9] ); // M9 --> C1
coeff[ 2] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C2
coeff[ 3] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C3
break;
default:
//CSC_RGB_RGB
coeff[ 2] = FLOAT_TO_SHORT(matrix[0] ); // M0 --> C2
coeff[ 0] = FLOAT_TO_SHORT(matrix[1] ); // M1 --> C0
coeff[ 1] = FLOAT_TO_SHORT(matrix[2] ); // M2 --> C1
coeff[ 3] = FLOAT_TO_SHORT(matrix[3] ); // M3 --> C3
coeff[ 6] = FLOAT_TO_SHORT(matrix[4] ); // M4 --> C6
coeff[ 4] = FLOAT_TO_SHORT(matrix[5] ); // M5 --> C4
coeff[ 5] = FLOAT_TO_SHORT(matrix[6] ); // M6 --> C5
coeff[ 7] = FLOAT_TO_SHORT(matrix[7] ); // M7 --> C7
coeff[10] = FLOAT_TO_SHORT(matrix[8] ); // M8 --> C10
coeff[ 8] = FLOAT_TO_SHORT(matrix[9] ); // M9 --> C8
coeff[ 9] = FLOAT_TO_SHORT(matrix[10]); // M10 --> C9
coeff[11] = FLOAT_TO_SHORT(matrix[11]); // M11 --> C11
break;
}
return true;
}
void KernelDll_UpdateCscCoefficients(Kdll_State *pState,
Kdll_CSC_Matrix *pMatrix)
{
float csc[12]; // CSC matrix (YUV->RGB)
float icsc[12]; // ICSC matrix (RGB->YUV), (YUV->YUV)
float m[12]; // auxiliary matrix
float matrix[12]; // final matrix
Kdll_CSCType csctype;
Kdll_Procamp *pProcamp = nullptr;
VPHAL_CSPACE src = pMatrix->SrcSpace;
VPHAL_CSPACE dst = pMatrix->DstSpace;
bool bCSC, bICSC;
bCSC = bICSC = false;
MOS_ZeroMemory(m, sizeof(m));
MOS_ZeroMemory(csc, sizeof(csc));
MOS_ZeroMemory(icsc, sizeof(icsc));
// Select procamp parameters
if (pMatrix->iProcampID > DL_PROCAMP_DISABLED &&
pMatrix->iProcampID < pState->iProcampSize &&
pState->pProcamp != nullptr)
{
pProcamp = pState->pProcamp + pMatrix->iProcampID;
}
// Setup CSC matrix
if (src != dst)
{
if ((dst == CSpace_sRGB) && (src != CSpace_stRGB))
{
KernelDll_GetCSCMatrix(src, dst, csc);
MOS_SecureMemcpy(m, sizeof(csc), (void *)csc, sizeof(csc));
bCSC = true;
csctype = CSC_YUV_RGB;
}
else if ((dst == CSpace_stRGB) && (src != CSpace_sRGB))
{
KernelDll_GetCSCMatrix(src, dst, csc);
MOS_SecureMemcpy(m, sizeof(csc), (void *)csc, sizeof(csc));
bCSC = true;
csctype = CSC_YUV_RGB;
}
else
{
KernelDll_GetCSCMatrix(src, dst, icsc);
MOS_SecureMemcpy(m, sizeof(icsc), (void *)icsc, sizeof(icsc));
bICSC = true;
if (KernelDll_IsCspace(src, CSpace_RGB) && !KernelDll_IsCspace(dst, CSpace_RGB))
{
csctype = CSC_RGB_YUV;
}
else if (KernelDll_IsCspace(src, CSpace_BT2020_RGB) && KernelDll_IsCspace(dst, CSpace_BT2020))
{
csctype = CSC_RGB_YUV;
}
else if (KernelDll_IsCspace(src, CSpace_BT2020) && KernelDll_IsCspace(dst, CSpace_BT2020_RGB))
{
csctype = CSC_YUV_RGB;
}
else
{
csctype = CSC_YUV_YUV;
}
}
}
// Setup CSC matrix for procamp in sRGB space
else if ((dst == CSpace_sRGB) && (pProcamp))
{
KernelDll_GetCSCMatrix(CSpace_sRGB, CSpace_BT709, icsc);
KernelDll_GetCSCMatrix(CSpace_BT709, CSpace_sRGB, csc);
bICSC = bCSC = true;
csctype = CSC_RGB_RGB;
}
// Setup CSC matrix for procamp in stRGB space
else if ((dst == CSpace_stRGB) && (pProcamp))
{
KernelDll_GetCSCMatrix(CSpace_stRGB, CSpace_BT709, icsc);
KernelDll_GetCSCMatrix(CSpace_BT709, CSpace_stRGB, csc);
bICSC = bCSC = true;
csctype = CSC_RGB_RGB;
}
else
{
MOS_SecureMemcpy(m, sizeof(g_cCSC_Identity), (void *)g_cCSC_Identity, sizeof(g_cCSC_Identity));
csctype = CSC_YUV_YUV;
}
// Product only happens if Procamp is present
// Otherwise use the original matrix
if (pProcamp)
{
float b,c,h,s;
// Calculate procamp parameters
b = pProcamp->fBrightness;
c = pProcamp->fContrast;
h = pProcamp->fHue * (PI / 180.0f);
s = pProcamp->fSaturation;
// procamp matrix
//
// [Y'] [ c 0 0 ] [Y] [ 16 - 16 * c + b ]
// [U'] = [ 0 c*s*cos(h) c*s*sin(h) ] [U] + [ 128 - 128*c*s*(cos(h)+sin(h)) ]
// [V'] [ 0 -c*s*sin(h) c*s*cos(h) ] [V] [ 128 - 128*c*s*(cos(h)-sin(h)) ]
matrix[0] = c;
matrix[1] = 0.0f;
matrix[2] = 0.0f;
matrix[3] = 16.0f - 16.0f * c + b;
matrix[4] = 0.0f;
matrix[5] = (float)cos(h) * c * s;
matrix[6] = (float)sin(h) * c * s;
matrix[7] = 128.0f * (1.0f - matrix[5] - matrix[6]);
matrix[8] = 0.0f;
matrix[9] = -matrix[6];
matrix[10] = matrix[5];
matrix[11] = 128.0f * (1.0f - matrix[5] + matrix[6]);
// Calculate final CSC matrix (csc * pa * icsc)
if (bICSC)
{ // Calculate [pa] * [icsc]
KernelDll_MatrixProduct(matrix, matrix, icsc);
}
if (bCSC)
{ // Calculate [csc] * [pa] (if no icsc)
// or [csc] * [pa] * [icsc]
KernelDll_MatrixProduct(matrix, csc, matrix);
}
// Update procamp version
pMatrix->iProcampVersion = pProcamp->iProcampVersion;
// Use the output matrix to generate kernel CSC parameters
MOS_SecureMemcpy(m, sizeof(m), (void *)matrix, sizeof(m));
}
// normalize for kernel use
matrix[0] = ROUND_FLOAT(m[ 0], 128.0f); // 9.7
matrix[1] = ROUND_FLOAT(m[ 1], 128.0f); // 9.7
matrix[2] = ROUND_FLOAT(m[ 2], 128.0f); // 9.7
matrix[3] = ROUND_FLOAT(m[ 3], 0.5f); // 16.0 (value/2)
matrix[4] = ROUND_FLOAT(m[ 4], 128.0f); // 9.7
matrix[5] = ROUND_FLOAT(m[ 5], 128.0f); // 9.7
matrix[6] = ROUND_FLOAT(m[ 6], 128.0f); // 9.7
matrix[7] = ROUND_FLOAT(m[ 7], 0.5f); // 16.0 (value/2)
matrix[8] = ROUND_FLOAT(m[ 8], 128.0f); // 9.7
matrix[9] = ROUND_FLOAT(m[ 9], 128.0f); // 9.7
matrix[10] = ROUND_FLOAT(m[10], 128.0f); // 9.7
matrix[11] = ROUND_FLOAT(m[11], 0.5f); // 16.0 (value/2)
// Save matrix as kernel CSC coefficients
pState->pfnMapCSCMatrix(csctype, matrix, pMatrix->Coeff);
}
/*----------------------------------------------------------------------------
| Name : KernelDll_FindRule
| Purpose : Find a rule that matches the current search/input state
|
| Input : pState - Kernel Dll state
| pSearchState - current DL search state
|
| Return :
\---------------------------------------------------------------------------*/
bool KernelDll_FindRule(
Kdll_State *pState,
Kdll_SearchState *pSearchState)
{
uint32_t parser_state = (uint32_t)pSearchState->state;
Kdll_RuleEntrySet *pRuleSet;
const Kdll_RuleEntry *pRuleEntry;
int32_t iRuleCount;
int32_t iMatchCount;
bool bLayerFormatMatched;
bool bSrc0FormatMatched;
bool bSrc1FormatMatched;
bool bTargetFormatMatched;
bool bSrc0SampingMatched;
VPHAL_RENDER_FUNCTION_ENTER;
// All Custom states are handled as a single group
if (parser_state >= Parser_Custom)
{
parser_state = Parser_Custom;
}
pRuleSet = pState->pDllRuleTable[parser_state];
iRuleCount = pState->iDllRuleCount[parser_state];
if (pRuleSet == nullptr || iRuleCount == 0)
{
VPHAL_RENDER_NORMALMESSAGE("Search rules undefined.");
pSearchState->pMatchingRuleSet = nullptr;
return false;
}
// Search matching entry
for ( ; iRuleCount > 0; iRuleCount--, pRuleSet++)
{
// Points to the first rule, get number of matches
pRuleEntry = pRuleSet->pRuleEntry;
iMatchCount = pRuleSet->iMatchCount;
// Initialize for each Ruleset
bLayerFormatMatched = false;
bSrc0FormatMatched = false;
bSrc1FormatMatched = false;
bTargetFormatMatched = false;
bSrc0SampingMatched = false;
// Match all rules within the same RuleSet
for (; iMatchCount > 0; iMatchCount--, pRuleEntry++)
{
switch (pRuleEntry->id)
{
// Match current Parser State
case RID_IsParserState:
if (pSearchState->state == (Kdll_ParserState) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match render method
case RID_IsRenderMethod:
if (pSearchState->pFilter->RenderMethod == (Kdll_RenderMethod)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match target color space
case RID_IsTargetCspace:
if (KernelDll_IsCspace(pSearchState->cspace, (VPHAL_CSPACE) pRuleEntry->value))
{
continue;
}
else
{
break;
}
// Match current layer ID
case RID_IsLayerID:
if (pSearchState->pFilter->layer == (Kdll_Layer) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match current layer format
case RID_IsLayerFormat:
if (pRuleEntry->logic == Kdll_Or && bLayerFormatMatched)
{
// Already found matching format in the ruleset
continue;
}
else
{
// Check if the layer format matches the rule
if (KernelDll_IsFormat(pSearchState->pFilter->format,
pSearchState->pFilter->cspace,
(MOS_FORMAT ) pRuleEntry->value))
{
bLayerFormatMatched = true;
}
if (pRuleEntry->logic == Kdll_None && !bLayerFormatMatched)
{
// Last entry and No matching format was found
break;
}
else
{
continue;
}
}
// Match shuffling requirement
case RID_IsShuffling:
if (pSearchState->ShuffleSamplerData == (Kdll_Shuffling) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Check if RT rotates
case RID_IsRTRotate:
if (pSearchState->bRTRotate == (pRuleEntry->value ? true : false) )
{
continue;
}
else
{
break;
}
// Match current layer rotation
case RID_IsLayerRotation:
if (pSearchState->pFilter->rotation == (VPHAL_ROTATION) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src0 source format (surface)
case RID_IsSrc0Format:
if (pRuleEntry->logic == Kdll_Or && bSrc0FormatMatched)
{
// Already found matching format in the ruleset
continue;
}
else
{
// Check if the source 0 format matches the rule
// The intermediate colorspace is used to determine
// if palettized input is given in RGB or YUV format.
if (KernelDll_IsFormat(pSearchState->src0_format,
pSearchState->cspace,
(MOS_FORMAT ) pRuleEntry->value))
{
bSrc0FormatMatched = true;
}
if (pRuleEntry->logic == Kdll_None && !bSrc0FormatMatched)
{
// Last entry and No matching format was found
break;
}
else
{
continue;
}
}
// Match Src0 sampling mode
case RID_IsSrc0Sampling:
// Check if the layer format matches the rule
if (pSearchState->src0_sampling == (Kdll_Sampling) pRuleEntry->value)
{
bSrc0SampingMatched = true;
continue;
}
else if (bSrc0SampingMatched || pRuleEntry->logic == Kdll_Or)
{
continue;
}
else if ((Kdll_Sampling) pRuleEntry->value == Sample_Any &&
pSearchState->src0_sampling != Sample_None)
{
continue;
}
else
{
break;
}
// Match Src0 rotation
case RID_IsSrc0Rotation:
if (pSearchState->src0_rotation == (VPHAL_ROTATION) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src0 Colorfill
case RID_IsSrc0ColorFill:
if (pSearchState->src0_colorfill == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src0 Luma Key
case RID_IsSrc0LumaKey:
if (pSearchState->src0_lumakey == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src0 Procamp
case RID_IsSrc0Procamp:
if (pSearchState->pFilter->procamp == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src0 CSC coefficients
case RID_IsSrc0Coeff:
if (pSearchState->src0_coeff == (Kdll_CoeffID) pRuleEntry->value)
{
continue;
}
else if ((Kdll_CoeffID) pRuleEntry->value == CoeffID_Any &&
pSearchState->src0_coeff != CoeffID_None)
{
continue;
}
else
{
break;
}
// Match Src0 CSC coefficients setting mode
case RID_IsSetCoeffMode:
if (pSearchState->pFilter->SetCSCCoeffMode == (Kdll_SetCSCCoeffMethod) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src0 processing mode
case RID_IsSrc0Processing:
if (pSearchState->src0_process == (Kdll_Processing) pRuleEntry->value)
{
continue;
}
if ((Kdll_Processing) pRuleEntry->value == Process_Any &&
pSearchState->src0_process != Process_None)
{
continue;
}
else
{
break;
}
// Match Src0 chromasiting mode
case RID_IsSrc0Chromasiting:
if (pSearchState->Filter->chromasiting == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src1 source format (surface)
case RID_IsSrc1Format:
if (pRuleEntry->logic == Kdll_Or && bSrc1FormatMatched)
{
// Already found matching format in the ruleset
continue;
}
else
{
// Check if the source 1 format matches the rule
// The intermediate colorspace is used to determine
// if palettized input is given in RGB or YUV format.
if (KernelDll_IsFormat(pSearchState->src1_format,
pSearchState->cspace,
(MOS_FORMAT) pRuleEntry->value))
{
bSrc1FormatMatched = true;
}
if (pRuleEntry->logic == Kdll_None && !bSrc1FormatMatched)
{
// Last entry and No matching format was found
break;
}
else
{
continue;
}
}
// Match Src1 sampling mode
case RID_IsSrc1Sampling:
if (pSearchState->src1_sampling == (Kdll_Sampling) pRuleEntry->value)
{
continue;
}
else if ((Kdll_Sampling) pRuleEntry->value == Sample_Any &&
pSearchState->src1_sampling != Sample_None)
{
continue;
}
else
{
break;
}
// Match Src1 Luma Key
case RID_IsSrc1LumaKey:
if (pSearchState->src1_lumakey == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src1 Sampler LumaKey
case RID_IsSrc1SamplerLumaKey:
if (pSearchState->src1_samplerlumakey == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src1 Procamp
case RID_IsSrc1Procamp:
if (pSearchState->pFilter->procamp == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Src1 CSC coefficients
case RID_IsSrc1Coeff:
if (pSearchState->src1_coeff == (Kdll_CoeffID) pRuleEntry->value)
{
continue;
}
else if ((Kdll_CoeffID) pRuleEntry->value == CoeffID_Any &&
pSearchState->src1_coeff != CoeffID_None)
{
continue;
}
else
{
break;
}
// Match Src1 processing mode
case RID_IsSrc1Processing:
if (pSearchState->src1_process == (Kdll_Processing) pRuleEntry->value)
{
continue;
}
if ((Kdll_Processing) pRuleEntry->value == Process_Any &&
pSearchState->src1_process != Process_None)
{
continue;
}
else
{
break;
}
// Match Src1 chromasiting mode
case RID_IsSrc1Chromasiting:
//pSearchState->pFilter is pointed to the real sub layer
if (pSearchState->pFilter->chromasiting == (int32_t)pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match Layer number
case RID_IsLayerNumber:
if (pSearchState->layer_number == (int32_t) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Match quadrant
case RID_IsQuadrant:
if (pSearchState->quadrant == (int32_t) pRuleEntry->value)
{
continue;
}
else
{
break;
}
// Set CSC flag before Mix
case RID_IsCSCBeforeMix:
if (pSearchState->bCscBeforeMix == (pRuleEntry->value ? true : false))
{
continue;
}
else
{
break;
}
case RID_IsDualOutput:
if (pSearchState->pFilter->dualout == (pRuleEntry->value ? true : false))
{
continue;
}
else
{
break;
}
case RID_IsTargetFormat:
if (pRuleEntry->logic == Kdll_Or && bTargetFormatMatched)
{
// Already found matching format in the ruleset
continue;
}
else
{
if (pSearchState->target_format == (MOS_FORMAT) pRuleEntry->value)
{
bTargetFormatMatched = true;
}
if (pRuleEntry->logic == Kdll_None && !bTargetFormatMatched)
{
// Last entry and No matching format was found
break;
}
else
{
continue;
}
}
case RID_Is64BSaveEnabled:
if (pSearchState->b64BSaveEnabled == (pRuleEntry->value ? true : false))
{
continue;
}
else
{
break;
}
case RID_IsTargetTileType:
if (pRuleEntry->logic == Kdll_None &&
pSearchState->target_tiletype == (MOS_TILE_TYPE) pRuleEntry->value)
{
continue;
}
else if (pRuleEntry->logic == Kdll_Not &&
pSearchState->target_tiletype != (MOS_TILE_TYPE) pRuleEntry->value)
{
continue;
}
else
{
break;
}
case RID_IsProcampEnabled:
if (pSearchState->bProcamp == (pRuleEntry->value ? true : false))
{
continue;
}
else
{
break;
}
case RID_IsConstOutAlpha:
if (pSearchState->pFilter->bFillOutputAlphaWithConstant == (pRuleEntry->value ? true : false))
{
continue;
}
else
{
break;
}
case RID_IsDitherNeeded:
if (pSearchState->pFilter->bIsDitherNeeded == (pRuleEntry->value ? true : false))
{
continue;
}
else
{
break;
}
// Undefined search rule will fail
default:
VPHAL_RENDER_ASSERTMESSAGE("Invalid rule %d @ layer %d, state %d.", pRuleEntry->id, pSearchState->layer_number, pSearchState->state);
break;
} // End of switch to deal with all matching rule IDs
// Rule didn't match - try another RuleSet
break;
} // End of file loop to test all rules for the current RuleSet
// Match
if (iMatchCount == 0)
{
pSearchState->pMatchingRuleSet = pRuleSet;
return true;
}
} // End of for loop to test all RuleSets for the current parser state
// Failed to find a matching rule -> kernel search will fail
VPHAL_RENDER_NORMALMESSAGE("Fail to find a matching rule @ layer %d, state %d.", pSearchState->layer_number, pSearchState->state);
// No match -> return
pSearchState->pMatchingRuleSet = nullptr;
return false;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_GetPatchData
| Purpose : Get binary data block to be used for kernel patching
|
| Input : pState - [in] Current DL state
| pSearchState - [in] Current DL search state
| iPatchKind - [in] Patch kind
| pSize - [out] Data block Size
|
| Return : nullptr - Unsupported patch data kind
| <>nullptr - Pointer to data block
\---------------------------------------------------------------------------*/
static uint8_t *KernelDll_GetPatchData(
Kdll_State *pState,
Kdll_SearchState *pSearchState,
int32_t iPatchKind,
int32_t *pSize)
{
MOS_UNUSED(pState);
VPHAL_RENDER_FUNCTION_ENTER;
if (iPatchKind == PatchKind_CSC_Coeff_Src0 ||
iPatchKind == PatchKind_CSC_Coeff_Src1)
{
Kdll_CoeffID coeffID = CoeffID_None;
uint8_t matrixID = DL_CSC_DISABLED;
// Get matrix id
if (iPatchKind == PatchKind_CSC_Coeff_Src0)
{
coeffID = pSearchState->src0_coeff;
}
else
{
coeffID = pSearchState->src1_coeff;
}
// Get matrix associated with the coefficient ID
if (coeffID > CoeffID_None)
{
matrixID = pSearchState->CscParams.MatrixID[coeffID];
}
// Found matrix
if (matrixID < DL_CSC_MAX)
{
Kdll_CSC_Matrix *pMatrix = &(pSearchState->CscParams.Matrix[matrixID]);
*pSize = 12 * sizeof(uint16_t);
if (pState->bEnableCMFC)
{
if (pSearchState->CscParams.PatchMatrixNum < DL_CSC_MAX)
{
pSearchState->CscParams.PatchMatrixID[pSearchState->CscParams.PatchMatrixNum] = matrixID;
pSearchState->CscParams.PatchMatrixNum ++;
}
else
{
VPHAL_RENDER_NORMALMESSAGE("Patch CSC coefficient exceed limitation");
}
}
return ((uint8_t *)pMatrix->Coeff);
}
}
else
{
VPHAL_RENDER_NORMALMESSAGE("Invalid patch kind %d.", iPatchKind);
}
return nullptr;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_UpdateState
| Purpose : Update search state using current matching rule
|
| Input : pState - Kernel Dll state
| pSearchState - current DL search state
|
| Return :
\---------------------------------------------------------------------------*/
bool KernelDll_UpdateState(
Kdll_State *pState,
Kdll_SearchState *pSearchState)
{
Kdll_RuleEntrySet *pRuleSet = pSearchState->pMatchingRuleSet;
const Kdll_RuleEntry *pRuleEntry;
int32_t iSetCount;
VPHAL_RENDER_FUNCTION_ENTER;
// Ensures that we have a matching rule
if (pRuleSet == nullptr)
{
return false;
}
// Get rule entry and number of state update ("Set") rules; validate
pRuleEntry = pRuleSet->pRuleEntry;
iSetCount = pRuleSet->iSetCount;
if (pRuleEntry == nullptr || iSetCount < 1)
{
VPHAL_RENDER_NORMALMESSAGE("Invalid rule set.");
return false;
}
// Jump to set rules (skip match rules)
pRuleEntry += pRuleSet->iMatchCount;
// Apply state update rules
for (; iSetCount > 0; iSetCount--, pRuleEntry++)
{
switch (pRuleEntry->id)
{
// Add kernel to the Dynamic Linking array
case RID_SetKernel:
if (pSearchState->KernelCount < DL_MAX_KERNELS)
{
int32_t i = pSearchState->KernelCount++;
pSearchState->KernelID [i] = pRuleEntry->value;
pSearchState->KernelGrp[i] = pRuleSet->iGroup; // Group associated with the kernel ID
}
else
{
VPHAL_RENDER_ASSERTMESSAGE("reached maximum number of component kernels.");
return false;
}
break;
// Set Parser State
case RID_SetParserState:
pSearchState->state = (Kdll_ParserState) pRuleEntry->value;
break;
// Move to Next/Prev Layer
case RID_SetNextLayer:
if (pRuleEntry->value == -1)
{
pSearchState->layer_number--;
pSearchState->pFilter--;
}
else if (pRuleEntry->value == -2) // jump to layer main video
{
do
{
pSearchState->layer_number--;
pSearchState->pFilter--;
if (pSearchState->pFilter == nullptr || pSearchState->layer_number < 0 )
{
return false;
}
}
while (pSearchState->pFilter->layer != Layer_MainVideo);
}
else if (pRuleEntry->value == 2) // jump to target layer
{
while (pSearchState->pFilter->layer < Layer_RenderTarget )
{
pSearchState->layer_number++;
pSearchState->pFilter++;
}
}
else
{
pSearchState->layer_number++;
pSearchState->pFilter++;
}
break;
// Set patch data
case RID_SetPatchData:
{
uint8_t *pData = nullptr;
int32_t iSize = 0;
int32_t iKernelIndex = pSearchState->KernelCount - 1;
int32_t iPatchIndex;
Kdll_PatchData *pPatch;
// Get block of data for patching
pData = KernelDll_GetPatchData(pState, pSearchState, (Kdll_PatchKind)pRuleEntry->value, &iSize);
if (pData == nullptr || iSize == 0)
{
VPHAL_RENDER_ASSERTMESSAGE("invalid patch.");
return false;
}
// Append to the existing patch data block
iPatchIndex = pSearchState->PatchID[iKernelIndex];
// Allocate new patch structure
if (iPatchIndex < 0)
{
// Fail to allocate
if (pSearchState->PatchCount >= DL_MAX_PATCHES)
{
VPHAL_RENDER_ASSERTMESSAGE("reached maximum number of patches.");
return false;
}
// Get new patch block
iPatchIndex = pSearchState->PatchCount++;
pSearchState->PatchID[iKernelIndex] = iPatchIndex;
// Reset new patch entry
pPatch = &(pSearchState->Patches[iPatchIndex]);
MOS_ZeroMemory(pPatch, sizeof(Kdll_PatchData));
}
else
{
// Get Patch entry already in use
pPatch = &(pSearchState->Patches[iPatchIndex]);
}
// Check if data can be appended
if (pPatch->iPatchDataSize + iSize > DL_MAX_PATCH_DATA_SIZE)
{
VPHAL_RENDER_ASSERTMESSAGE("exceeded maximum patch size.");
return false;
}
// Append patch data
MOS_SecureMemcpy(pPatch->Data + pPatch->iPatchDataSize, iSize, (void *)pData, iSize);
pPatch->iPatchDataSize += iSize;
}
break;
// Set patch operation
case RID_SetPatch:
{
int32_t iKernelIndex = pSearchState->KernelCount - 1;
int32_t iPatchIndex = pSearchState->PatchID[iKernelIndex];
Kdll_PatchData *pPatch;
uint8_t *pPatchRule;
Kdll_PatchBlock *pPatchBlock;
int32_t nPatches;
// No patch associated with the current kernel
if (iPatchIndex < 0)
{
return false;
}
// Get Patch entry
pPatch = &(pSearchState->Patches[iPatchIndex]);
// Get number of patches and pointer to first rule extension (patch rule)
nPatches = pRuleEntry->value;
pPatchRule = (uint8_t*) (pRuleEntry + 1);
// Check if rules can be applied
if (nPatches + pPatch->nPatches > DL_MAX_PATCH_BLOCKS)
{
VPHAL_RENDER_ASSERTMESSAGE("exceeded number of patch blocks.");
return false;
}
// Set Patches
pPatchBlock = &(pPatch->Patch[pPatch->nPatches]);
for (; nPatches > 0; nPatches--, pPatchBlock++, pPatch->nPatches++)
{
pPatchBlock->BlockSize = ((Kdll_PatchRuleEntry *)pPatchRule)->Size;
pPatchBlock->SrcOffset = ((Kdll_PatchRuleEntry *)pPatchRule)->Source;
pPatchBlock->DstOffset = ((Kdll_PatchRuleEntry *)pPatchRule)->Dest;
pPatchRule += sizeof(Kdll_RuleEntry);
}
// Skip rule extensions
iSetCount -= pRuleEntry->value;
pRuleEntry += pRuleEntry->value;
}
break;
// Set destination colorspace
case RID_SetTargetCspace:
if ((VPHAL_CSPACE)pRuleEntry->value == CSpace_Source)
{
pSearchState->cspace = pSearchState->pFilter->cspace;
}
else
{
pSearchState->cspace = (VPHAL_CSPACE)pRuleEntry->value;
}
break;
// Set Src0 source format
case RID_SetSrc0Format:
if ((MOS_FORMAT )pRuleEntry->value == Format_Source)
{
pSearchState->src0_format = pSearchState->pFilter->format;
}
else
{
pSearchState->src0_format = (MOS_FORMAT ) pRuleEntry->value;
}
break;
// Set Src0 sampling mode
case RID_SetSrc0Sampling:
if ((Kdll_Sampling)pRuleEntry->value == Sample_Source)
{
pSearchState->src0_sampling = pSearchState->pFilter->sampler;
}
else
{
pSearchState->src0_sampling = (Kdll_Sampling) pRuleEntry->value;
}
break;
// Set Src0 Rotation
case RID_SetSrc0Rotation:
pSearchState->src0_rotation = pSearchState->pFilter->rotation;
break;
// Set Src0 Colorfill
case RID_SetSrc0ColorFill:
if ((int32_t)pRuleEntry->value == ColorFill_Source)
{
pSearchState->src0_colorfill = pSearchState->pFilter->colorfill;
}
else
{
pSearchState->src0_colorfill = (int32_t)pRuleEntry->value;
}
break;
// Set Src0 luma key
case RID_SetSrc0LumaKey:
if (pRuleEntry->value == LumaKey_Source)
{
pSearchState->src0_lumakey = pSearchState->pFilter->lumakey;
}
else
{
pSearchState->src0_lumakey = (int32_t)pRuleEntry->value;
}
break;
// Set Src0 Procamp
case RID_SetSrc0Procamp:
if (pRuleEntry->value == Procamp_Source)
{
pSearchState->src0_procamp = pSearchState->pFilter->procamp;
}
else
{
pSearchState->src0_procamp = (int32_t)pRuleEntry->value;
}
break;
// Set Src0 CSC coefficients
case RID_SetSrc0Coeff:
if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Source)
{
if (pSearchState->pFilter->matrix == DL_CSC_DISABLED)
{
pSearchState->src0_coeff = CoeffID_None;
}
else
{
Kdll_CSC_Matrix *matrix = pSearchState->CscParams.Matrix;
matrix += pSearchState->pFilter->matrix;
pSearchState->src0_coeff = matrix->iCoeffID;
}
}
else
{
pSearchState->src0_coeff = (Kdll_CoeffID) pRuleEntry->value;
}
break;
case RID_SetSrc0Processing:
if ((Kdll_Processing)pRuleEntry->value == Process_Source)
{
pSearchState->src0_process = pSearchState->pFilter->process;
}
else
{
pSearchState->src0_process = (Kdll_Processing) pRuleEntry->value;
}
break;
// Set Src1 source format
case RID_SetSrc1Format:
if ((MOS_FORMAT )pRuleEntry->value == Format_Source)
{
pSearchState->src1_format = pSearchState->pFilter->format;
}
else
{
pSearchState->src1_format = (MOS_FORMAT ) pRuleEntry->value;
}
break;
// Set Src1 sampling mode
case RID_SetSrc1Sampling:
if ((Kdll_Sampling)pRuleEntry->value == Sample_Source)
{
pSearchState->src1_sampling = pSearchState->pFilter->sampler;
}
else
{
pSearchState->src1_sampling = (Kdll_Sampling) pRuleEntry->value;
}
break;
// Set Src1 Rotation
case RID_SetSrc1Rotation:
pSearchState->src1_rotation = pSearchState->pFilter->rotation;
break;
// Set Src1 luma key
case RID_SetSrc1LumaKey:
if (pRuleEntry->value == LumaKey_Source)
{
pSearchState->src1_lumakey = pSearchState->pFilter->lumakey;
}
else
{
pSearchState->src1_lumakey = (int32_t)pRuleEntry->value;
}
break;
// Set Src1 Sampler LumaKey
case RID_SetSrc1SamplerLumaKey:
if (pRuleEntry->value == LumaKey_Source)
{
pSearchState->src1_samplerlumakey = pSearchState->pFilter->samplerlumakey;
}
else
{
pSearchState->src1_samplerlumakey = (int32_t)pRuleEntry->value;
}
break;
// Set Src1 Procamp
case RID_SetSrc1Procamp:
if (pRuleEntry->value == Procamp_Source)
{
pSearchState->src1_procamp = pSearchState->pFilter->procamp;
}
else
{
pSearchState->src1_procamp = (int32_t)pRuleEntry->value;
}
break;
// Set Src1 CSC coefficients
case RID_SetSrc1Coeff:
if ((Kdll_CoeffID)pRuleEntry->value == CoeffID_Source)
{
if (pSearchState->pFilter->matrix == DL_CSC_DISABLED)
{
pSearchState->src1_coeff = CoeffID_None;
}
else
{
Kdll_CSC_Matrix *matrix = pSearchState->CscParams.Matrix;
matrix += pSearchState->pFilter->matrix;
pSearchState->src1_coeff = matrix->iCoeffID;
}
}
else
{
pSearchState->src1_coeff = (Kdll_CoeffID) pRuleEntry->value;
}
break;
// Set Src1 processing mode
case RID_SetSrc1Processing:
if ((Kdll_Processing)pRuleEntry->value == Process_Source)
{
pSearchState->src1_process = pSearchState->pFilter->process;
}
else
{
pSearchState->src1_process = (Kdll_Processing) pRuleEntry->value;
}
break;
// Set current quadrant
case RID_SetQuadrant:
pSearchState->quadrant = (int32_t) pRuleEntry->value;
break;
// Set CSC flag before Mix
case RID_SetCSCBeforeMix:
pSearchState->bCscBeforeMix = pRuleEntry->value ? true : false;
break;
// Unsupported "Set" rule
default:
// Failed to find a matching rule -> kernel search will fail
VPHAL_RENDER_ASSERTMESSAGE("Invalid rule %d @ layer %d, state %d.", pRuleEntry->id, pSearchState->layer_number, pSearchState->state);
return false;
}
}
// Reset matching rule
pSearchState->pMatchingRuleSet = nullptr;
return true;
}
//-----------------------------------------------------------------------------------------
// KernelDll_SortRuleTable - Sort master dynamic linking rule table
//
// Parameters:
// char *pState - [in] Kernel Dll state
//
// Output: true - Master rule table (and acceleration table) successfully created
// false - Failed to setup master rule table
//-----------------------------------------------------------------------------------------
bool KernelDll_SortRuleTable(Kdll_State *pState)
{
uint8_t group;
int32_t state;
const Kdll_RuleEntry *pRule = nullptr;
Kdll_RuleEntrySet *pRuleSet;
int32_t i, j;
int32_t iTotal = 0;
int32_t iNoOverr[Parser_Count]; // Non-overridable (enforced) rules
int32_t iDefault[Parser_Count]; // Default rules
int32_t iCustom [Parser_Count]; // Custom rules
VPHAL_RENDER_FUNCTION_ENTER;
// Release previous table (rule table update)
if (pState->pSortedRules)
{
MOS_FreeMemory(pState->pSortedRules);
pState->pSortedRules = nullptr;
MOS_ZeroMemory(pState->pDllRuleTable, sizeof(pState->pDllRuleTable));
MOS_ZeroMemory(pState->iDllRuleCount, sizeof(pState->iDllRuleCount));
}
// Zero counters
MOS_ZeroMemory(iNoOverr, sizeof(iNoOverr));
MOS_ZeroMemory(iDefault, sizeof(iDefault));
MOS_ZeroMemory(iCustom , sizeof(iCustom));
// Count number of entries for each state
for (i = 0; i < 2; i++)
{
if (i == 0)
{
pRule = pState->pRuleTableDefault;
}
else if (i == 1)
{
pRule = pState->pRuleTableCustom;
}
// Table not set - continue
if (!pRule) continue;
for (; pRule->id != RID_Op_EOF; pRule++)
{
// Skip extended rules (variable lenght)
if (RID_IS_EXTENDED(pRule->id))
{ // value contains number of entries
pRule += pRule->value;
}
else if (pRule->id == RID_Op_NewEntry)
{
// Save Rule Group
if (i == 0)
{
group = pRule->value;
}
else
{
group = RULE_CUSTOM;
}
// Second rule must always be RID_IsParserState
pRule++;
if (pRule->id != RID_IsParserState)
{
VPHAL_RENDER_ASSERTMESSAGE("Rule does not start with State.");
return false;
}
// Get Parser State -> validate value
state = pRule->value;
if (state < Parser_Begin)
{
VPHAL_RENDER_ASSERTMESSAGE("Invalid State %d.", state);
return false;
}
else if (state >= Parser_Custom)
{ // Custom states are set together in the same entry
state = Parser_Custom;
}
if (group == RULE_NO_OVERRIDE)
{
iNoOverr[state]++;
}
else if (group == RULE_DEFAULT)
{
iDefault[state]++;
}
else
{
iCustom[state]++;
}
iTotal++;
}
}
}
// Allocate rules
pState->pSortedRules = (Kdll_RuleEntrySet *)MOS_AllocAndZeroMemory(iTotal * sizeof(Kdll_RuleEntrySet));
if (!pState->pSortedRules)
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to allocate rule table.");
return false;
}
// Setup pointers to sorted rules
pState->pDllRuleTable[0] = pState->pSortedRules;
for (j = 0, i = 0; i < Parser_Count; i++)
{
// Setup start pointer and number of entries to search for each state
pState->pDllRuleTable[i] = pState->pDllRuleTable[j] + pState->iDllRuleCount[j];
pState->iDllRuleCount[i] = iNoOverr[i] + iCustom[i] + iDefault[i];
// Setup offsets to rules for sorting
iDefault[i] = iNoOverr[i] + iCustom[i]; // Last set of rules
iCustom [i] = iNoOverr[i]; // 2nd set of rules
iNoOverr[i] = 0; // 1st set of rules
j = i;
}
// Sort rules for fast access
// Integrate enforced, custom, default rules into one single access table
for (i = 0; i < 2; i++)
{
if (i == 0)
{
pRule = pState->pRuleTableDefault;
}
else if (i == 1)
{
pRule = pState->pRuleTableCustom;
}
// Table not set - continue
if (!pRule) continue;
while (pRule->id != RID_Op_EOF)
{
if (pRule->id != RID_Op_NewEntry)
{
VPHAL_RENDER_ASSERTMESSAGE("New rule entry expected.");
return false;
}
// Save Rule Group
if (i == 0)
{
group = pRule->value;
}
else
{
group = RULE_CUSTOM;
}
// Get Parser State -> validate value
pRule++;
state = pRule->value;
if (state >= Parser_Custom)
{ // Custom states are set together in the same entry
state = Parser_Custom;
}
else
{ // Skip state check - already handled by acceleration table
pRule++;
}
// Point to sorted rule set entry
if (group == RULE_NO_OVERRIDE)
{
j = iNoOverr[state]++;
}
else if (group == RULE_DEFAULT)
{
j = iDefault[state]++;
}
else
{
j = iCustom[state]++;
}
// Point to sorted ruleset for the current parser state
pRuleSet = pState->pDllRuleTable[state] + j;
// Fill RuleSet
pRuleSet->pRuleEntry = pRule;
pRuleSet->iGroup = group;
// Count number of match rules, including extended rules
while (RID_IS_MATCH(pRule->id))
{
if (RID_IS_EXTENDED(pRule->id))
{
pRuleSet->iMatchCount += pRule->value;
pRule += pRule->value;
}
pRuleSet->iMatchCount++;
pRule++;
}
// Count number of set rules, including extended rules
while (RID_IS_SET(pRule->id))
{
if (RID_IS_EXTENDED(pRule->id))
{
pRuleSet->iSetCount += pRule->value;
pRule += pRule->value;
}
pRuleSet->iSetCount++;
pRule++;
}
// Rule must have at least one "Set" rule
if (pRuleSet->iSetCount < 1)
{
VPHAL_RENDER_ASSERTMESSAGE("Ruleset must have at least one set rule.");
return false;
}
}
}
// Rule table is now sorted and integrated with custom rules
return true;
}
//---------------------------------------------------------------------------------------
// KernelDll_AllocateStates - Allocate Kernel Dynamic Linking/Loading (Dll) States
//
// - Allocate DL states
// - Setup export/import list for linking
// - Prepare pool of search nodes
// - Load component kernels from binary file
// - Setup kernel cache
// - Setup kernel dynamic linking rules
//
// Parameters: [in] pKernelBin - Pointer to Kernel binary file loaded in sys memory
// [in] uKernelSize - Kernel file size
// [in] pFcPatchBin - Pointer to FC patch binary file loaded in sys memory
// [in] uFcPatchCacheSize - FC patch binary file size
// [in] platform - Gfx platform
// [in] pDefaultRules - Dynamic Linking Rules Table
//
// Output: Pointer to allocated Kernel dll state
// nullptr - Failed to allocate Kernel dll state
//-----------------------------------------------------------------------------------------
Kdll_State *KernelDll_AllocateStates(
void *pKernelBin,
uint32_t uKernelSize,
void *pFcPatchCache,
uint32_t uFcPatchCacheSize,
const Kdll_RuleEntry *pDefaultRules,
void(*ModifyFunctionPointers)(PKdll_State))
{
Kdll_State *pState;
Kdll_CacheEntry *pCacheEntry;
Kdll_KernelCache *pKernelCache;
Kdll_KernelHashTable *pHashTable;
Kdll_KernelHashEntry *pHashEntries;
int32_t iSize;
int32_t nExports = 0;
int32_t nImports = 0;
uint32_t *pLinkOffset = nullptr;
Kdll_LinkData *pLinkSort = nullptr;
Kdll_LinkData *pLinkData;
Kdll_LinkData *pExports;
Kdll_LinkFileHeader *pLinkHeader;
int32_t i, j;
uint32_t *pOffsets;
uint8_t *pBase;
VPHAL_RENDER_FUNCTION_ENTER;
// Allocate dynamic linking states
i = sizeof(Kdll_State); // Dynamic linking states
i += sizeof(Kdll_CacheEntry) * IDR_VP_TOTAL_NUM_KERNELS; // Component kernel cache entries
i += sizeof(Kdll_CacheEntry) * IDR_VP_TOTAL_NUM_KERNELS; // CMFC kernel patch cache entries
i += sizeof(Kdll_CacheEntry) * DL_DEFAULT_COMBINED_KERNELS; // Combined kernel cache entries
i += DL_COMBINED_KERNEL_CACHE_SIZE; // Combined kernel buffer
i += sizeof(Kdll_LinkData) * DL_MAX_EXPORT_COUNT; // Kernel Export table
pState = (Kdll_State *)MOS_AllocAndZeroMemory(i);
if (!pState)
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to allocate kernel dll states.");
goto cleanup;
}
pState->iSize = i;
pState->dwRefresh = 0;
pState->pProcamp = nullptr;
pState->iProcampSize = 0;
pState->pSortedRules = nullptr;
if ((pFcPatchCache != nullptr) && (uFcPatchCacheSize != 0))
{
pState->bEnableCMFC = true;
}
// Initialize platform specific function pointers
if (!KernelDll_SetupFunctionPointers(pState, ModifyFunctionPointers))
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to setup function pointers.");
goto cleanup;
}
pKernelCache = &pState->ComponentKernelCache;
// No custom kernels/rules
pState->pRuleTableCustom = nullptr;
pState->pCustomKernelCache = nullptr;
// Set Kernel DLL Rules
pState->pRuleTableDefault = pDefaultRules;
// Integrate and sort rule tables
KernelDll_SortRuleTable(pState);
// Setup component kernel cache
pKernelCache->pCache = (uint8_t*)pKernelBin;
pKernelCache->iCacheSize = (int32_t) uKernelSize;
pKernelCache->iCacheFree = 0;
pKernelCache->iCacheMaxEntries = IDR_VP_TOTAL_NUM_KERNELS;
pKernelCache->iCacheEntries = IDR_VP_TOTAL_NUM_KERNELS;
pKernelCache->pCacheEntries = (Kdll_CacheEntry *)(pState + 1);
pOffsets = (uint32_t *) pKernelCache->pCache;
pBase = (uint8_t *)(pOffsets + IDR_VP_TOTAL_NUM_KERNELS + 1);
pCacheEntry = pKernelCache->pCacheEntries;
for (i = 0; i < IDR_VP_TOTAL_NUM_KERNELS; i++, pCacheEntry++)
{
pCacheEntry->iKUID = i;
pCacheEntry->iKCID = -1;
pCacheEntry->dwLoaded = 0;
pCacheEntry->dwRefresh = 0;
pCacheEntry->wHashEntry = 0;
pCacheEntry->szName = g_cInit_ComponentNames[i];
pCacheEntry->iSize = pOffsets[i + 1] - pOffsets[i];
pCacheEntry->pBinary = (pCacheEntry->iSize > 0) ? (pBase + pOffsets[i]) : nullptr;
}
// Setup CMFC kernel patch cache
pKernelCache = &pState->CmFcPatchCache;
if (pState->bEnableCMFC && pFcPatchCache)
{
pKernelCache->pCache = (uint8_t*)pFcPatchCache;
pKernelCache->iCacheSize = (int32_t)uFcPatchCacheSize;
pKernelCache->iCacheFree = 0;
pKernelCache->iCacheMaxEntries = IDR_VP_TOTAL_NUM_KERNELS;
pKernelCache->iCacheEntries = IDR_VP_TOTAL_NUM_KERNELS;
pKernelCache->pCacheEntries = pCacheEntry;
pOffsets = (uint32_t *)pKernelCache->pCache;
pBase = (uint8_t *)(pOffsets + IDR_VP_TOTAL_NUM_KERNELS + 1);
for (i = 0; i < IDR_VP_TOTAL_NUM_KERNELS; i++, pCacheEntry++)
{
pCacheEntry->iKUID = i;
pCacheEntry->iKCID = -1;
pCacheEntry->dwLoaded = 0;
pCacheEntry->dwRefresh = 0;
pCacheEntry->wHashEntry = 0;
pCacheEntry->szName = g_cInit_ComponentNames[i];
pCacheEntry->iSize = pOffsets[i + 1] - pOffsets[i];
pCacheEntry->pBinary = (pCacheEntry->iSize > 0) ? (pBase + pOffsets[i]) : nullptr;
}
}
else
{
pCacheEntry += IDR_VP_TOTAL_NUM_KERNELS;
}
// Setup combined kernel cache
pKernelCache = &pState->KernelCache;
pKernelCache->iCacheMaxEntries = DL_DEFAULT_COMBINED_KERNELS;
pKernelCache->iCacheEntries = 0;
pKernelCache->iCacheSize = DL_COMBINED_KERNEL_CACHE_SIZE; // Size of kernel cache
pKernelCache->iCacheFree = DL_COMBINED_KERNEL_CACHE_SIZE; // Free cache size
pKernelCache->iCacheID = 0x00010000; // Cache ID
pKernelCache->pCacheEntries = pCacheEntry; // Cached kernel entries
pKernelCache->pCache = (uint8_t *)(pCacheEntry + DL_DEFAULT_COMBINED_KERNELS); // kernels
// reset cache entries
for (i = 0; i < DL_DEFAULT_COMBINED_KERNELS; i++, pCacheEntry++)
{
pCacheEntry->iKUID = -1;
pCacheEntry->iKCID = -1;
pCacheEntry->pBinary = pKernelCache->pCache + i * DL_CACHE_BLOCK_SIZE;
if(i != DL_DEFAULT_COMBINED_KERNELS - 1)
{
pCacheEntry->pNextEntry = pCacheEntry + 1;
}
else
{
pCacheEntry->pNextEntry = nullptr;
}
}
//------------------------------------
// Setup hash table
//------------------------------------
pHashTable = &pState->KernelHashTable;
pHashEntries = pState->KernelHashTable.HashEntry - 1;
pHashTable->pool = 1; // first in pool (1 based index)
pHashTable->last = DL_MAX_COMBINED_KERNELS; // last in pool (for releasing)
for (i = 1; i <= DL_MAX_COMBINED_KERNELS; i++)
{
pHashEntries[i].next = i + 1;
}
pHashEntries[i - 1].next = 0; // last entry
//------------------------------------
// Setup dynamic linking import/export array
//------------------------------------
pCacheEntry = pState->ComponentKernelCache.pCacheEntries;
iSize = pCacheEntry[IDR_VP_LinkFile].iSize;
if (iSize == 0)
{
VPHAL_RENDER_NORMALMESSAGE("Link file is missing.");
goto cleanup;
}
// Get link file binary data
pLinkHeader = (Kdll_LinkFileHeader *) pCacheEntry[IDR_VP_LinkFile].pBinary;
if (pLinkHeader == nullptr ||
pLinkHeader->dwVersion != IDR_VP_LINKFILE_VERSION ||
sizeof(Kdll_LinkFileHeader) != IDR_VP_LINKFILE_HEADER)
{
VPHAL_RENDER_ASSERTMESSAGE("Invalid link file version.");
goto cleanup;
}
iSize = (iSize - IDR_VP_LINKFILE_HEADER) / sizeof(Kdll_LinkData);
// Create temporary list of sorted link data and offsets
pLinkSort = (Kdll_LinkData *)MOS_AllocAndZeroMemory(iSize * sizeof(Kdll_LinkData));
pLinkOffset = (uint32_t *)MOS_AllocAndZeroMemory((IDR_VP_TOTAL_NUM_KERNELS + 1) * sizeof(uint32_t));
if (!pLinkSort || !pLinkOffset)
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to allocate temporary buffers.");
goto cleanup;
}
// Count number of imports for each component kernel
pCacheEntry[0].pLink = pLinkData = (Kdll_LinkData *)(pLinkHeader + 1);
for (i = iSize; i > 0; i--, pLinkData++)
{
if (pLinkData->iKUID < IDR_VP_TOTAL_NUM_KERNELS)
{
pCacheEntry[pLinkData->iKUID].nLink++;
}
nExports += pLinkData->bExport;
nImports += !pLinkData->bExport;
}
// Sanity check
if (nExports != (int32_t)pLinkHeader->dwExports ||
nImports != (int32_t)pLinkHeader->dwImports)
{
VPHAL_RENDER_ASSERTMESSAGE("Inconsistent header data.");
goto cleanup;
}
if (nExports > DL_MAX_EXPORT_COUNT)
{
VPHAL_RENDER_ASSERTMESSAGE("Unsupported number of exports %d > %d.", nExports, DL_MAX_EXPORT_COUNT);
goto cleanup;
}
pState->ComponentKernelCache.pExports = pExports = (Kdll_LinkData *)(pKernelCache->pCache + pKernelCache->iCacheSize);
pState->ComponentKernelCache.nExports = nExports;
// Calculate offsets for sorting
pLinkOffset[0] = 0;
pLinkData = pCacheEntry[0].pLink;
for (i = 1; i < IDR_VP_TOTAL_NUM_KERNELS; i++)
{
pLinkOffset[i] = pLinkOffset[i-1] + pCacheEntry[i-1].nLink;
pCacheEntry[i].pLink = (pCacheEntry[i].nLink) ? (pLinkData + pLinkOffset[i]) : nullptr;
}
pLinkOffset[i] = pLinkOffset[i-1] + pCacheEntry[i-1].nLink;
// Sort link data
for (i = iSize; i > 0; i--, pLinkData++)
{
j = pLinkOffset[MOS_MIN(pLinkData->iKUID, IDR_VP_TOTAL_NUM_KERNELS)]++;
pLinkSort[j] = *pLinkData;
// Add to export table
if (pLinkData->bExport &&
pLinkData->iLabelID < DL_MAX_EXPORT_COUNT)
{
pExports[pLinkData->iLabelID] = *pLinkData;
}
}
// Copy sort data
pLinkData = pCacheEntry[0].pLink;
MOS_SecureMemcpy(pLinkData, iSize * sizeof(Kdll_LinkData), (void *)pLinkSort, iSize * sizeof(Kdll_LinkData));
// Release sort buffers
MOS_FreeMemory(pLinkOffset);
MOS_FreeMemory(pLinkSort);
// Return
return pState;
cleanup:
if (pState)
{
MOS_FreeMemory(pState->pSortedRules);
pState->pSortedRules = nullptr;
}
// Free DL States and temporary sort buffers
MOS_FreeMemory(pState);
MOS_FreeMemory(pLinkSort);
MOS_FreeMemory(pLinkOffset);
return nullptr;
}
//---------------------------------------------------------------------------------------
// KernelDll_ReleaseStates - Release Kernel Dynamic Linking/Loading (Dll) States
//
// Parameters:
// Kdll_State *pState - [in] Kernel dll State to release
//
// Output: Pointer to allocated Kernel dll state
// nullptr - Failed to allocate Kernel dll state
//-----------------------------------------------------------------------------------------
void KernelDll_ReleaseStates(Kdll_State *pState)
{
VPHAL_RENDER_FUNCTION_ENTER;
if (!pState) return;
KernelDll_ReleaseAdditionalCacheEntries(&pState->KernelCache);
MOS_FreeMemory(pState->ComponentKernelCache.pCache);
MOS_FreeMemory(pState->CmFcPatchCache.pCache);
MOS_FreeMemory(pState->pSortedRules);
MOS_FreeMemory(pState);
}
//---------------------------------------------------------------------------------------
// KernelDll_SetupProcampParameters - Setup Kernel Procamp Parameters
//
// Parameters:
// Kdll_State *pState - [in] Kernel dll State to release
// Kdll_Procamp *pProcamp - [in] Pointer to array of Procamp Parameters
// int32_t iProcampSize - [in] Size of the array
//
// Output: Pointer to allocated Kernel dll state
// nullptr - Failed to allocate Kernel dll state
//-----------------------------------------------------------------------------------------
void KernelDll_SetupProcampParameters(Kdll_State *pState,
Kdll_Procamp *pProcamp,
int32_t iProcampSize)
{
VPHAL_RENDER_FUNCTION_ENTER;
// Setup pointer to procamp parameters
pState->pProcamp = pProcamp;
pState->iProcampSize = iProcampSize;
}
//---------------------------------------------------------------------------------------
// Kdll_SearchKernel - Performs full kernel search, including selection of best match
// Search state must be initialized by KernelDll_StartKernelSearch
//
// Parameters:
// Kdll_State *pState - [in] Dynamic Linking state
// Kdll_SearchState *pSearchState - [in/out] Kernel search state
//
// Output: true if suceeded, false otherwise
//---------------------------------------------------------------------------------------
bool KernelDll_SearchKernel(Kdll_State *pState,
Kdll_SearchState *pSearchState)
{
VPHAL_RENDER_FUNCTION_ENTER;
// Check parameters
if ((!pSearchState) || pSearchState->iFilterSize < 1)
{
VPHAL_RENDER_NORMALMESSAGE("Search is empty, must contain 2 or more layers.");
return false;
}
// Setup CSC; allocate and calculate CSC matrices
if (!pState->pfnSetupCSC(pState, pSearchState))
{
VPHAL_RENDER_NORMALMESSAGE("CSC setup failed.");
return false;
}
// Initial search states
pSearchState->bCscBeforeMix = false;
pSearchState->state = Parser_Begin;
pSearchState->cspace = pSearchState->CscParams.ColorSpace;
pSearchState->quadrant = 0;
pSearchState->layer_number = 0;
pSearchState->pMatchingRuleSet = nullptr;
// Reset Src0 state
pSearchState->src0_format = Format_None;
pSearchState->src0_sampling = Sample_None;
pSearchState->src0_colorfill = false;
pSearchState->src0_lumakey = LumaKey_False;
pSearchState->src0_coeff = CoeffID_None;
// Reset Src1 state
pSearchState->src1_format = Format_None;
pSearchState->src1_sampling = Sample_None;
pSearchState->src1_lumakey = LumaKey_False;
pSearchState->src1_samplerlumakey = LumaKey_False;
pSearchState->src1_coeff = CoeffID_None;
pSearchState->src1_process = Process_None;
// Search loop
while (pSearchState->state != Parser_End)
{
#if EMUL || VPHAL_LIB
if (pState->pfnCbSearchSate)
{
pState->pfnCbSearchSate(pState->pToken, CB_REASON_BEGIN_SEARCH, pSearchState);
}
#endif
// Find rule that matches
if (!pState->pfnFindRule(pState, pSearchState))
{
#if EMUL || VPHAL_LIB
if (pState->pfnCbSearchSate)
{
pState->pfnCbSearchSate(pState->pToken, CB_REASON_SEARCH_FAILED, pSearchState);
}
#endif
return false;
}
#if EMUL || VPHAL_LIB
if (pState->pfnCbSearchSate)
{
pState->pfnCbSearchSate(pState->pToken, CB_REASON_BEGIN_UPDATE, pSearchState);
}
#endif
// Update state
if (!pState->pfnUpdateState(pState, pSearchState))
{
#if EMUL || VPHAL_LIB
if (pState->pfnCbSearchSate)
{
pState->pfnCbSearchSate(pState->pToken, CB_REASON_UPDATE_FAILED, pSearchState);
}
#endif
return false;
}
}
#if EMUL || VPHAL_LIB
if (pState->pfnCbSearchSate)
{
pState->pfnCbSearchSate(pState->pToken, CB_REASON_END_SEARCH, pSearchState);
}
#endif
VPHAL_RENDER_VERBOSEMESSAGE("Search completed successfully.");
return true;
}
//--------------------------------------------------------------
// Append kernel, include symbols to resolve
//--------------------------------------------------------------
bool Kdll_AppendKernel(Kdll_KernelCache *pKernelCache,
Kdll_SearchState *pSearchState,
int32_t iKUID,
Kdll_PatchData *pKernelPatch)
{
Kdll_State *pState;
Kdll_Symbol *pSymbols;
Kdll_CacheEntry *kernels;
Kdll_LinkData *link;
Kdll_LinkData *liSearch_reloc;
uint8_t *kernel;
int *size;
int *left;
int dwSize;
int i;
int base;
bool bInline;
bool res;
VPHAL_RENDER_FUNCTION_ENTER;
res = false;
// Check if Kernel ID is valid
if (iKUID >= pKernelCache->iCacheEntries)
{
VPHAL_RENDER_NORMALMESSAGE("invalid Kernel ID %d.", iKUID);
goto cleanup;
}
// Get KDLL state
pState = pSearchState->pKdllState;
// Get current combined kernel
kernel = pSearchState->Kernel;
size = &pSearchState->KernelSize;
left = &pSearchState->KernelLeft;
pSymbols = &pSearchState->KernelLink;
base = (*size) >> 2;
// Find selected kernel and kernel size; check if there is enough space
kernels = &pKernelCache->pCacheEntries[iKUID];
dwSize = kernels->iSize;
if (*left < dwSize)
{
VPHAL_RENDER_NORMALMESSAGE("exceeded maximum kernel size.");
goto cleanup;
}
// Check if there is enough space for symbols
if (pSymbols->dwCount + kernels->nLink >= pSymbols->dwSize)
{
VPHAL_RENDER_NORMALMESSAGE("exceeded maximum numbers of symbols to resolve.");
goto cleanup;
}
#if EMUL || VPHAL_LIB
VPHAL_RENDER_NORMALMESSAGE("%s.", kernels->szName);
if (pState->pfnCbListKernel)
{
pState->pfnCbListKernel(pState->pToken, kernels->szName);
}
#elif _DEBUG // EMUL || VPHAL_LIB
VPHAL_RENDER_NORMALMESSAGE("%s.", kernels->szName);
#endif // _DEBUG
// Append symbols to resolve, relocate symbols
link = kernels->pLink;
liSearch_reloc = pSymbols->pLink + pSymbols->dwCount;
bInline = false;
if (link)
{
for (i = kernels->nLink; i > 0; i--, link++)
{
if (link->bInline)
{
// Inline code included
if (!link->bExport)
{
bInline = true;
}
}
else
{
*liSearch_reloc = *link;
liSearch_reloc->dwOffset += base;
liSearch_reloc++;
pSymbols->dwCount++;
}
}
}
// Append kernel
MOS_SecureMemcpy(&kernel[*size], dwSize, (void *)kernels->pBinary, dwSize);
// Patch kernel
if (pKernelPatch)
{
uint8_t *pSource = pKernelPatch->Data;
uint8_t *pDestination = kernel + (*size);
int32_t i;
Kdll_PatchBlock *pBlock = pKernelPatch->Patch;
for (i = pKernelPatch->nPatches; i > 0; i--, pBlock++)
{
MOS_SecureMemcpy(pDestination + pBlock->DstOffset, pBlock->BlockSize, (void *)(pSource + pBlock->SrcOffset), pBlock->BlockSize);
}
}
res = true;
*size += dwSize;
*left -= dwSize;
// Insert inline code
if (bInline)
{
for (link = kernels->pLink, i = kernels->nLink; (i > 0) && (res); i--, link++)
{
if (link->bInline && (!link->bExport))
{
iKUID = pKernelCache->pExports[link->iLabelID].iKUID;
res &= Kdll_AppendKernel(pKernelCache, pSearchState, iKUID, pKernelPatch);
}
}
}
cleanup:
return res;
}
//--------------------------------------------------------------
// Resolve kernel dependencies and perform patching
//--------------------------------------------------------------
bool Kdll_ResolveKernelDependencies(
Kdll_State *pState,
Kdll_SearchState *pSearchState)
{
Kdll_KernelCache *cache = &pState->ComponentKernelCache;
uint8_t *kernel = pSearchState->Kernel;
Kdll_Symbol *pSymbols = &pSearchState->KernelLink;
uint32_t nExports = cache->nExports;
Kdll_LinkData *pExports = cache->pExports;
Kdll_LinkData *pLink;
int32_t iKUID;
int32_t iOffset;
uint32_t dwResolveOffset[DL_MAX_EXPORT_COUNT];
bool bResolveDone;
int32_t i;
uint32_t *d;
VPHAL_RENDER_FUNCTION_ENTER;
MOS_ZeroMemory(dwResolveOffset, sizeof(dwResolveOffset));
do
{
// Update exports
for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
{
if (pLink->bExport)
{
dwResolveOffset[pLink->iLabelID] = pLink->dwOffset;
}
}
bResolveDone = true;
for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
{
// validate label
if (pLink->iLabelID > nExports || // invalid label
pExports[pLink->iLabelID].bExport == 0) // label not in the export table
{
VPHAL_RENDER_ASSERTMESSAGE("Invalid/unresolved label %d.", pLink->iLabelID);
return false;
}
// load dependencies
if (!pLink->bExport && !dwResolveOffset[pLink->iLabelID])
{
// set flag for another pass as newly loaded
// kernels may contain dependencies of their own
bResolveDone = false;
// Load dependencies
iKUID = pExports[pLink->iLabelID].iKUID;
Kdll_AppendKernel(cache, pSearchState, iKUID, nullptr);
// Restart
break;
}
} // for
} while (!bResolveDone);
// All modules must be loaded by now, start patching
for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
{
iOffset = (int32_t)dwResolveOffset[pLink->iLabelID] - 4;
iOffset -= pLink->dwOffset;
d = ((uint32_t *)kernel) + pLink->dwOffset;
// Patch offset
if (!pLink->bExport && !pLink->bInline)
{
d[3] = iOffset << 2; // jmpi - index * 8 bits
}
}
return true;
}
//--------------------------------------------------------------
// Fowler/Noll/Vo FNV-1a hash algorithm - public domain
//--------------------------------------------------------------
uint32_t KernelDll_SimpleHash(void *pData, int32_t iSize)
{
static const uint32_t k = 0x1000193;
uint32_t hash = 0x811c9dc5;
char *p = (char *)pData;
for(; iSize > 0; iSize--)
{
hash ^= (*p++);
hash *= k;
}
return hash;
}
//--------------------------------------------------------------
// KernelDll_GetCombinedKernel - Search combined kernel
//--------------------------------------------------------------
Kdll_CacheEntry *KernelDll_GetCombinedKernel(
Kdll_State *pState,
Kdll_FilterEntry *pFilter,
int32_t iFilterSize,
uint32_t dwHash)
{
Kdll_KernelHashTable *pHashTable;
Kdll_KernelHashEntry *entries, *curr, *next;
uint32_t folded_hash;
uint16_t entry;
VPHAL_RENDER_FUNCTION_ENTER;
// Get hash table
pHashTable = &pState->KernelHashTable;
// fold hash from 32 to 8 bit :-)
FOLD_HASH(folded_hash, dwHash)
// No entries
entry = pHashTable->wHashTable[folded_hash];
if (entry == 0 || entry > DL_MAX_COMBINED_KERNELS ) return nullptr;
entries = (&pHashTable->HashEntry[0]) - 1; // all indices are 1 based (0 means null)
curr = &entries[entry];
for (; (curr != nullptr); curr = next)
{
// match 32-bit hash, then compare filter
if (curr->dwHash == dwHash &&
curr->iFilter == iFilterSize)
{
if (memcmp(curr->pFilter, pFilter, iFilterSize * sizeof(Kdll_FilterEntry)) == 0)
{
break;
}
}
// Next entry with the same 8-bit folded hash
next = (curr->next) ? (&entries[curr->next]) : nullptr;
}
if (curr)
{ // Kernel already cached
curr->pCacheEntry->dwRefresh = pState->dwRefresh++;
return (curr->pCacheEntry);
}
else
{ // Kernel must be built
return nullptr;
}
}
//--------------------------------------------------------------
// KernelDll_AllocateHashEntry - Allocate hash entry
//--------------------------------------------------------------
uint16_t KernelDll_AllocateHashEntry(Kdll_KernelHashTable *pHashTable,
uint32_t hash)
{
Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
Kdll_KernelHashEntry *pNewEntry;
uint32_t folded_hash;
uint16_t entry;
VPHAL_RENDER_FUNCTION_ENTER;
entry = pHashTable->pool;
if (!entry)
{
return 0;
}
// Get entry from pool
pNewEntry = &pHashEntry[entry];
pHashTable->pool = pNewEntry->next;
if (pHashTable->last == entry)
{
pHashTable->last = 0;
}
// Initialize entry, attach to the hash table
FOLD_HASH(folded_hash, hash);
pNewEntry->dwHash = hash;
pNewEntry->next = pHashTable->wHashTable[folded_hash];
pNewEntry->iFilter = 0;
pNewEntry->pFilter = nullptr;
pNewEntry->pCacheEntry = nullptr;
pHashTable->wHashTable[folded_hash] = entry;
return entry;
}
//--------------------------------------------------------------
// KernelDll_ReleaseHashEntry - Release hash table entry
//--------------------------------------------------------------
void KernelDll_ReleaseHashEntry(Kdll_KernelHashTable *pHashTable, uint16_t entry)
{
Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
uint32_t folded_hash;
uint16_t next;
VPHAL_RENDER_FUNCTION_ENTER;
if (entry == 0)
{
return;
}
// unlink entry
next = pHashEntry[entry].next;
pHashEntry[entry].next = 0;
// remove references to entry from hash table
FOLD_HASH(folded_hash, pHashEntry[entry].dwHash);
if (pHashTable->wHashTable[folded_hash] == entry)
{
pHashTable->wHashTable[folded_hash] = next;
}
else
{
uint16_t prev = pHashTable->wHashTable[folded_hash];
while (prev != 0 &&
pHashEntry[prev].next != entry)
{
prev = pHashEntry[prev].next;
}
if (prev)
{
pHashEntry[prev].next = next;
}
}
// return entry to pool
if (pHashTable->pool == 0)
{
pHashTable->pool = entry;
}
else
{
pHashEntry[pHashTable->last].next = entry;
}
pHashTable->last = entry;
}
//--------------------------------------------------------------
// KernelDll_ReleaseCacheEntry - Release cache entry
//--------------------------------------------------------------
void KernelDll_ReleaseCacheEntry(Kdll_KernelCache *pCache,
Kdll_CacheEntry *pEntry)
{
pEntry->iKUID = -1;
pEntry->iKCID = -1;
pCache->iCacheEntries--;
}
//--------------------------------------------------------------
// KernelDll_CacheGarbageCollection - performs garbage collection
//--------------------------------------------------------------
bool KernelDll_GarbageCollection(Kdll_State *pState, int32_t size)
{
Kdll_KernelCache *pCache = &pState->KernelCache;
Kdll_CacheEntry *pEntry = pCache->pCacheEntries;
Kdll_CacheEntry *pOldest = nullptr;
Kdll_KernelHashTable *pHashTable = &pState->KernelHashTable;
Kdll_KernelHashEntry *pHashEntry = &pHashTable->HashEntry[0] - 1;
uint32_t dwOldest = (uint32_t)-1;
uint16_t wEntry = 0;
int32_t i;
MOS_UNUSED(size);
VPHAL_RENDER_FUNCTION_ENTER;
// Adjust refresh values to avoid overflow
if (pState->dwRefresh > 0xffff0000)
{
pState->dwRefresh -= 0x80000000;
for (i = pCache->iCacheMaxEntries; i > 0; i--)
{
if (pEntry->dwRefresh < 0x80000000)
pEntry->dwRefresh = 0;
else
pEntry->dwRefresh -= 0x80000000;
pEntry = pEntry->pNextEntry;
}
}
// No need to deallocate old entries
if (pCache->iCacheEntries < DL_MAX_COMBINED_KERNELS)
{
return true;
}
for (i = pCache->iCacheMaxEntries; i > 0; i--)
{
// deallocate old unreferenced entries
if (pEntry->iKCID != -1 && pEntry->dwLoaded == 0)
{
if (pEntry->dwRefresh < dwOldest)
{
pOldest = pEntry;
dwOldest = pEntry->dwRefresh;
wEntry = pEntry->wHashEntry;
}
}
pEntry = pEntry->pNextEntry;
}
// No entry to release, sanity checks
pHashEntry += wEntry;
if (!pOldest ||
wEntry == 0 ||
pHashEntry->pCacheEntry != pOldest)
{
VPHAL_RENDER_ASSERT(false);
return false;
}
// Release hash and cache entries
KernelDll_ReleaseHashEntry(pHashTable, wEntry);
KernelDll_ReleaseCacheEntry(pCache, pOldest);
return true;
}
//--------------------------------------------------------------
// KernelDll_AllocateAdditionalCacheEntries - Allocate more kernel cache entries
//--------------------------------------------------------------
Kdll_CacheEntry *
KernelDll_AllocateAdditionalCacheEntries(Kdll_KernelCache *pCache)
{
Kdll_CacheEntry *pNewEntry = nullptr;
Kdll_CacheEntry *pChcheEntry;
int i, j;
VPHAL_RENDER_FUNCTION_ENTER;
// Check num
if (pCache->iCacheEntries + DL_NEW_COMBINED_KERNELS > DL_MAX_COMBINED_KERNELS)
{
VPHAL_RENDER_ASSERTMESSAGE("KernelDll_AllocateAdditionalCacheEntries: Can't allocate more kernel cache entries\n");
return nullptr;
}
// Allocate the new entires
i = (sizeof(Kdll_CacheEntry) + DL_CACHE_BLOCK_SIZE) * DL_NEW_COMBINED_KERNELS;
pNewEntry = (Kdll_CacheEntry *)MOS_AllocAndZeroMemory(i);
if (!pNewEntry)
{
VPHAL_RENDER_ASSERTMESSAGE("KernelDll_AllocateAdditionalCacheEntries: Failed to allocate kernel cache entries\n");
return nullptr;
}
// Update the cache entires
pChcheEntry = pCache->pCacheEntries;
for(j = 0; j < pCache->iCacheMaxEntries - 1; j++)
{
pChcheEntry = pChcheEntry->pNextEntry;
}
pChcheEntry->pNextEntry = pNewEntry;
for(j = 0; j < DL_NEW_COMBINED_KERNELS; j++, pNewEntry++)
{
pNewEntry->iKUID = -1;
pNewEntry->iKCID = -1;
pNewEntry->pBinary = (uint8_t *)(pNewEntry + DL_NEW_COMBINED_KERNELS - j) + j * DL_CACHE_BLOCK_SIZE;
if(j != DL_NEW_COMBINED_KERNELS - 1)
{
pNewEntry->pNextEntry = pNewEntry + 1;
}
else
{
pNewEntry->pNextEntry = nullptr;
}
}
pCache->iCacheMaxEntries += DL_NEW_COMBINED_KERNELS;
pCache->iCacheSize += DL_NEW_COMBINED_KERNELS * DL_CACHE_BLOCK_SIZE;
pCache->iCacheFree += DL_NEW_COMBINED_KERNELS * DL_CACHE_BLOCK_SIZE;
return (Kdll_CacheEntry *)(pNewEntry - DL_NEW_COMBINED_KERNELS);
}
//--------------------------------------------------------------
// KernelDll_ReleaseAdditionalCacheEntries - Release the additional kernel cache entries
//--------------------------------------------------------------
void KernelDll_ReleaseAdditionalCacheEntries(Kdll_KernelCache *pCache)
{
VPHAL_RENDER_FUNCTION_ENTER;
if(pCache->iCacheMaxEntries > DL_DEFAULT_COMBINED_KERNELS)
{
Kdll_CacheEntry *pNewEntries, *pEntries;
pNewEntries = (pCache->pCacheEntries + DL_DEFAULT_COMBINED_KERNELS - 1)->pNextEntry;
for(int i = 0; i < (pCache->iCacheMaxEntries - DL_DEFAULT_COMBINED_KERNELS) / DL_NEW_COMBINED_KERNELS; i++)
{
pEntries = (pNewEntries + DL_NEW_COMBINED_KERNELS - 1)->pNextEntry;
MOS_FreeMemory(pNewEntries);
pNewEntries = pEntries;
}
}
}
//--------------------------------------------------------------
// KernelDll_AllocateCacheEntry - Allocate cache entry for a given size
//--------------------------------------------------------------
Kdll_CacheEntry *
KernelDll_AllocateCacheEntry(Kdll_KernelCache *pCache, int32_t iSize)
{
Kdll_CacheEntry *pEntry = pCache->pCacheEntries;
uint8_t *pCacheBinary = nullptr;
Kdll_CacheEntry *pCacheNextEntry = nullptr;
int32_t i, j;
VPHAL_RENDER_FUNCTION_ENTER;
// Check size
if (iSize > DL_CACHE_BLOCK_SIZE)
{
return nullptr;
}
// Search empty entry
j = pCache->iCacheMaxEntries;
for (i = 0; i < j; i++)
{
if (pEntry->iKCID == -1)
{
break;
}
pEntry = pEntry->pNextEntry;
}
if (i == j)
{
// Try to allocate more cache entries
pEntry = KernelDll_AllocateAdditionalCacheEntries(pCache);
if(! pEntry)
{
return nullptr;
}
}
// Reset entry
pCacheBinary = pEntry->pBinary;
pCacheNextEntry = pEntry->pNextEntry;
MOS_ZeroMemory(pEntry, sizeof(Kdll_CacheEntry));
pEntry->iSize = iSize;
pEntry->pBinary = pCacheBinary;
pEntry->pNextEntry = pCacheNextEntry;
// Increment entries
pCache->iCacheEntries++;
return pEntry;
}
//--------------------------------------------------------------
// Kerneldll_GetComponentKernel - Get component/static kernel
// entry from cache
//--------------------------------------------------------------
Kdll_CacheEntry *
KernelDll_GetComponentKernel(Kdll_State *pState,
int32_t iKUID)
{
Kdll_CacheEntry *pEntry = nullptr;
if (iKUID < pState->ComponentKernelCache.iCacheMaxEntries)
{
pEntry = &(pState->ComponentKernelCache.pCacheEntries[iKUID]);
if (pEntry->iKUID != iKUID ||
pEntry->pBinary == nullptr ||
pEntry->iSize == 0)
{
pEntry = nullptr;
}
}
return pEntry;
}
//--------------------------------------------------------------
// KernelDll_AddKernel - Add kernel into hash table and kernel cache
//--------------------------------------------------------------
Kdll_CacheEntry *
KernelDll_AddKernel(Kdll_State *pState, // Kernel Dll state
Kdll_SearchState *pSearchState, // Search state
Kdll_FilterEntry *pFilter, // Original filter
int32_t iFilterSize, // Original filter size
uint32_t dwHash)
{
Kdll_CacheEntry *pCacheEntry;
Kdll_KernelHashTable *pHashTable;
Kdll_KernelHashEntry *pHashEntry;
uint16_t entry;
int32_t size;
uint8_t *ptr;
VPHAL_RENDER_FUNCTION_ENTER;
// Check kernel
if (pSearchState->KernelSize <= 0)
{
return nullptr;
}
// Get hash table
pHashTable = &pState->KernelHashTable;
pHashEntry = &pHashTable->HashEntry[0] - 1; // all indices are 1 based (0 = null)
// allocate space in kernel cache to store the kernel, filter, CSC parameters
size = pSearchState->KernelSize + // Kernel
pSearchState->iFilterSize * sizeof(Kdll_FilterEntry) * 2 + // Original + Modified Filter
sizeof(Kdll_CSC_Params) + // CSC parameters
sizeof(VPHAL_CSPACE); // Intermediate Color Space for colorfill
// Run garbage collection, create space for new kernel and metadata
KernelDll_GarbageCollection(pState, size);
// Get new kernel cache entry
pCacheEntry = KernelDll_AllocateCacheEntry(&pState->KernelCache, size);
if (!pCacheEntry)
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to allocate cache space for new kernel.");
return nullptr;
}
// Get hash entry
entry = KernelDll_AllocateHashEntry(pHashTable, dwHash);
if (!entry)
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to allocate hash entry for new kernel.");
KernelDll_ReleaseCacheEntry(&pState->KernelCache, pCacheEntry);
return nullptr;
}
// Setup cache entry, copy kernel
pCacheEntry->iKUID = -1;
pCacheEntry->iKCID = pState->KernelCache.iCacheID; // Create new kernel cache id (KCID)
pCacheEntry->dwRefresh = pState->dwRefresh++;
pCacheEntry->wHashEntry = entry;
// Save kernel
pCacheEntry->iSize = pSearchState->KernelSize;
MOS_SecureMemcpy(pCacheEntry->pBinary, pSearchState->KernelSize, (void *)pSearchState->Kernel, pSearchState->KernelSize);
ptr = pCacheEntry->pBinary + pSearchState->KernelSize;
// Save modified filter
pCacheEntry->iFilterSize = pSearchState->iFilterSize;
pCacheEntry->pFilter = (Kdll_FilterEntry *) (ptr);
MOS_SecureMemcpy(ptr, pSearchState->iFilterSize * sizeof(Kdll_FilterEntry), (void *)pSearchState->Filter, pSearchState->iFilterSize * sizeof(Kdll_FilterEntry));
ptr += pSearchState->iFilterSize * sizeof(Kdll_FilterEntry);
// Save CSC parameters associated with the kernel
pCacheEntry->pCscParams = (Kdll_CSC_Params *) (ptr);
MOS_SecureMemcpy(ptr, sizeof(Kdll_CSC_Params), (void *)&pSearchState->CscParams, sizeof(Kdll_CSC_Params));
ptr += sizeof(Kdll_CSC_Params);
// Save intermediate color space for colorfill
pCacheEntry->colorfill_cspace = pState->colorfill_cspace;
ptr += sizeof(VPHAL_CSPACE);
// increment KCID (Range = 0x00010000 - 0x7fffffff)
pState->KernelCache.iCacheID = 0x00010000 + (pState->KernelCache.iCacheID - 0x0000ffff) % 0x7fff0000;
// Setup hash entry, copy filter
pHashEntry += entry;
pHashEntry->pCacheEntry = pCacheEntry;
// Save original filter for search purposes - modified filter is used for rendering
pHashEntry->iFilter = iFilterSize;
pHashEntry->pFilter = (Kdll_FilterEntry *) (ptr);
MOS_SecureMemcpy(ptr, iFilterSize * sizeof(Kdll_FilterEntry), (void *)pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
return pCacheEntry;
}
//--------------------------------------------------------------
// KernelDll_BuildKernel - build kernel
//--------------------------------------------------------------
bool KernelDll_BuildKernel(Kdll_State *pState, Kdll_SearchState *pSearchState)
{
Kdll_KernelCache *pKernelCache = &pState->ComponentKernelCache;
Kdll_KernelCache *pCustomCache = pState->pCustomKernelCache;
Kdll_PatchData *pKernelPatch;
bool res;
int32_t offset = 0;
int32_t *pKernelID, *pGroupID, *pPatchID;
VPHAL_RENDER_FUNCTION_ENTER;
pSearchState->KernelLink.dwSize = DL_MAX_SYMBOLS;
pSearchState->KernelLink.dwCount = 0;
pSearchState->KernelLink.pLink = pSearchState->LinkArray;
pSearchState->KernelSize = 0;
pSearchState->KernelLeft = sizeof(pSearchState->Kernel);
pSearchState->KernelLink.dwCount = 0;
#if EMUL || VPHAL_LIB || _DEBUG || _RELEASE_INTERNAL
VPHAL_RENDER_NORMALMESSAGE("Component Kernels:");
#endif // EMUL || VPHAL_LIB || _DEBUG
pKernelID = pSearchState->KernelID;
pGroupID = pSearchState->KernelGrp;
pPatchID = pSearchState->PatchID;
for (offset = 0; offset < pSearchState->KernelCount; offset++, pKernelID++, pGroupID++, pPatchID++)
{
// Get patch information associated with the kernel
pKernelPatch = (*pPatchID >= 0) ? &(pSearchState->Patches[*pPatchID]) : nullptr;
// Append/Patch kernel from custom cache
if (*pGroupID == GROUP_CUSTOM)
{
res = Kdll_AppendKernel(pCustomCache, pSearchState, *pKernelID, pKernelPatch);
}
// Append/Patch kernel from internal cache
else
{
res = Kdll_AppendKernel(pKernelCache, pSearchState, *pKernelID, pKernelPatch);
}
if (!res)
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
return false;
}
else
{
Kdll_CacheEntry *kernels = (*pGroupID == GROUP_CUSTOM) ? &pCustomCache->pCacheEntries[*pKernelID] : &pKernelCache->pCacheEntries[*pKernelID];
VPHAL_RENDER_NORMALMESSAGE("Component kernels [%d]: %s", *pKernelID, kernels->szName);
}
}
// Resolve kernel dependencies
res = Kdll_ResolveKernelDependencies(pState, pSearchState);
if (!res)
{
VPHAL_RENDER_ASSERTMESSAGE("Failed to resolve symbols.");
return false;
}
return true;
}
//---------------------------------------------------------------------------------------
// KernelDll_StartKernelSearch - Starts kernel search
//
// Parameters:
// Kdll_State *pState - [in] Dynamic Linking State
// Kdll_FilterEntry *pFilter - [in] Search filter (array of search entries)
// int iFilterSize - [in] Search filter size
// Kdll_SearchState *pSearchState - [in/out] Kernel search state
//
// Output: none
//---------------------------------------------------------------------------------------
void KernelDll_StartKernelSearch(
Kdll_State *pState,
Kdll_SearchState *pSearchState,
Kdll_FilterEntry *pFilter,
int32_t iFilterSize,
uint32_t uiIs64BInstrEnabled)
{
int32_t nLayer;
VPHAL_RENDER_FUNCTION_ENTER;
// Reset all states
MOS_ZeroMemory(pSearchState, sizeof(Kdll_SearchState));
// Setup KDLL state
pSearchState->pKdllState = pState; // KDLL state
// Cleanup kernel table
pSearchState->KernelCount = 0; // # of kernels
// Cleanup patch data
memset(pSearchState->Patches , 0, sizeof(pSearchState->Patches));
memset(pSearchState->PatchID , -1, sizeof(pSearchState->PatchID));
pSearchState->PatchCount = 0;
// Copy original filter; filter will be modified as part of the search
if (pFilter && iFilterSize > 0)
{
MOS_SecureMemcpy(pSearchState->Filter, iFilterSize * sizeof(Kdll_FilterEntry), pFilter, iFilterSize * sizeof(Kdll_FilterEntry));
pSearchState->pFilter = pSearchState->Filter;
pSearchState->iFilterSize = iFilterSize;
for (nLayer = 0; nLayer < iFilterSize; nLayer++)
{
// DScale Kernels are enabled for all gen9 stepping.
//For Gen9+, kernel don't support sublayer DScale+rotation
//Sampler_unorm does not support Y410/RGB10, we need to use sampler_16 to support Y410/RGB10
if (!pFilter[nLayer].bEnableDscale &&
(!pFilter[nLayer].bWaEnableDscale ||
(pFilter[nLayer].layer == Layer_SubVideo &&
pFilter[nLayer].rotation != VPHAL_ROTATION_IDENTITY)))
{
if (pFilter[nLayer].sampler == Sample_Scaling_034x)
{
pSearchState->pFilter[nLayer].sampler = Sample_Scaling;
}
else if (pFilter[nLayer].sampler == Sample_iScaling_034x)
{
pSearchState->pFilter[nLayer].sampler = Sample_iScaling;
}
else if (pFilter[nLayer].sampler == Sample_iScaling_AVS)
{
pSearchState->pFilter[nLayer].sampler = Sample_iScaling_AVS;
}
}
}
// Copy the render target format
pSearchState->target_format = pSearchState->pFilter[iFilterSize - 1].format;
// Copy the render target tile type
pSearchState->target_tiletype = pSearchState->pFilter[iFilterSize - 1].tiletype;
// Indicate whether to use 64B save kernel for render target surface
if (uiIs64BInstrEnabled &&
((pSearchState->target_tiletype == MOS_TILE_X) ||
(pSearchState->target_tiletype == MOS_TILE_LINEAR)))
{
pSearchState->b64BSaveEnabled = true;
}
}
}
//---------------------------------------------------------------------------------------
// KernelDll_SetupFunctionPointers - Setup Function pointers based on platform
//
// Parameters:
// char *pState - [in/out] Kernel Dll state
// platform - [in] platform
//
// Output: true - Function pointers are set
// false - Failed to setup function pointers (invalid platform)
//-----------------------------------------------------------------------------------------
static bool KernelDll_SetupFunctionPointers(
Kdll_State *pState,
void (*ModifyFunctionPointers)(PKdll_State))
{
VPHAL_RENDER_FUNCTION_ENTER;
pState->pfnSetupCSC = KernelDll_SetupCSC;
pState->pfnMapCSCMatrix = KernelDll_MapCSCMatrix;
pState->pfnFindRule = KernelDll_FindRule;
pState->pfnUpdateState = KernelDll_UpdateState;
pState->pfnSearchKernel = KernelDll_SearchKernel;
pState->pfnBuildKernel = KernelDll_BuildKernel;
pState->pfnStartKernelSearch = KernelDll_StartKernelSearch;
if (ModifyFunctionPointers != nullptr)
{
(*ModifyFunctionPointers)(pState);
}
#if EMUL || VPHAL_LIB
// Disable callbacks
pState->pToken = nullptr;
pState->pfnCbListKernel = nullptr;
pState->pfnCbSearchSate = nullptr;
#endif // EMUL || VPHAL_LIB
return true;
}
/*----------------------------------------------------------------------------
| Name : KernelDll_SetupCSC
| Purpose : Defines CSC conversions necessary for a given filter
|
| Input : pState - Kernel Dll state
| pSearchState - current DL search state
|
| Return :
\---------------------------------------------------------------------------*/
bool KernelDll_SetupCSC(
Kdll_State *pState,
Kdll_SearchState *pSearchState)
{
int i, m; // Integer iterators
bool bCoeffID_0_Used = false;
VPHAL_CSPACE cspace = CSpace_None; // Current ColorSpace
VPHAL_CSPACE out_cspace = CSpace_None; // Render Target CS
VPHAL_CSPACE main_cspace = CSpace_None; // Main video CS
VPHAL_CSPACE sel_cspace = CSpace_Any; // Selected CS
Kdll_FilterEntry *pFilter; // Current Filter information
int iFilterSize = pSearchState->iFilterSize;
Kdll_CSC_Params *pCSC = &pSearchState->CscParams;
int csc_count; // Number of CSC operations
int matrix_count; // Number of Matrices in use
int procamp_count = 0; // Number of PA operations
int sel_csc_count = -1; // Minimum number of CSC operations
int iCoeffID; // coeffID for layers other than main video
uint8_t cspace_in_use[CSpace_Count]; // Color Spaces in use
Kdll_CSC_Matrix curr_matrix;
Kdll_CSC_Matrix *matrix = pCSC->Matrix; // Color Space conversion matrix
uint8_t *matrixID = pCSC->MatrixID; // CSC coefficient allocation table
// Clear all CSC matrices
MOS_ZeroMemory(matrix, sizeof(pCSC->Matrix));
memset(matrixID, DL_CSC_DISABLED, sizeof(pCSC->MatrixID));
memset(pCSC->PatchMatrixID, DL_CSC_DISABLED, sizeof(pCSC->PatchMatrixID));
pCSC->PatchMatrixNum = 0;
// Clear array of color spaces in use
MOS_ZeroMemory(cspace_in_use, sizeof(cspace_in_use));
//---------------------------------------------------------------//
// Collect information about Color Spaces in use
// Get Primary Video and Render Target Color Spaces
// Force xvYCC passthrough if enabled
//---------------------------------------------------------------//
for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
{
// Disable Procamp for all layers except Main Video
// Disable Procamp if source is RGB
if (pFilter->layer != Layer_MainVideo ||
pFilter->cspace == CSpace_sRGB ||
pFilter->cspace == CSpace_stRGB)
{
pFilter->procamp = DL_PROCAMP_DISABLED;
}
// Count number of procamp operations (limited by number of independent coefficients)
// Ignore layers with palletized/constant colors
if (pFilter->procamp != DL_PROCAMP_DISABLED &&
pFilter->cspace != CSpace_Any)
{
procamp_count++;
}
// Set xvYCC passthrough mode
if (pFilter->cspace == CSpace_xvYCC709 ||
pFilter->cspace == CSpace_xvYCC601)
{
sel_cspace = pFilter->cspace;
}
// Get Main Video color space
if (pFilter->layer == Layer_MainVideo)
{
main_cspace = pFilter->cspace;
}
// Get Render Target color space
if (pFilter->layer == Layer_RenderTarget)
{
// Target is sRGB/stRGB
if (!KernelDll_IsYUVFormat(pFilter->format))
{
// Disable xvYCC passthrough (sRGB cannot have extended gamut)
sel_cspace = CSpace_Any;
}
out_cspace = pFilter->cspace;
}
// Mark color spaces in use for search that follows
if (pFilter->cspace > CSpace_Any && pFilter->cspace < CSpace_Count)
{
cspace_in_use[pFilter->cspace] = 1;
}
}
// Check max number of procamp operations
if (procamp_count > DL_PROCAMP_MAX)
{
return false;
}
//---------------------------------------------------------------//
// Search Color Space that provides minimum number of CSC conversions
// If there are multiple solutions, select main video cspace (quality)
//---------------------------------------------------------------//
if (sel_cspace == CSpace_Any)
{
int cs;
for (cs = (CSpace_Any + 1); cs < CSpace_Count; cs++)
{
// Skip color spaces not in use
cspace = (VPHAL_CSPACE) cs;
if (!cspace_in_use[cspace])
{
continue;
}
// xvYCC and BT are treated as same for CSC considerations (BT.x to xvYCC.x matrix is I)
cspace = KernelDll_TranslateCspace(cspace);
// Count # of CS conversions and matrices
csc_count = 0;
for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
{
// Ignore layers where the Color Space may be set in software (colorfill, palletized)
if (pFilter->cspace == CSpace_Any)
{
continue;
}
// Check if CSC/PA is required
if (KernelDll_TranslateCspace(pFilter->cspace) != cspace ||
pFilter->procamp != DL_PROCAMP_DISABLED)
{
csc_count++;
}
}
// Save best choice as requiring minimum number of CSC operations
if ((sel_csc_count < 0) || // Initial value
(csc_count < sel_csc_count) || // Minimum number of CSC operations
(csc_count == sel_csc_count && cs == main_cspace) ) // Use main cspace as default if same CSC count
{
sel_cspace = cspace;
sel_csc_count = csc_count;
}
}
}
// Due to put the colorfill behind CSC, so Src0 cspace needs to change
// to selspace in order to fill colorfill values correctly.
pState->colorfill_cspace = sel_cspace;
// color space is selected by now... setup CSC matrices
matrix_count = 0;
iCoeffID = 1;
for (i = iFilterSize, pFilter = pSearchState->Filter; i > 0; i--, pFilter++)
{
// Setup CSC for palettized/colorfill layers
if (pFilter->cspace == CSpace_Any)
{
// Set Color Space and format (for software)
if (pFilter->format == Format_Any)
{
pFilter->format = KernelDll_IsCspace(sel_cspace, CSpace_YUV) ? Format_AYUV : Format_A8R8G8B8;
}
pFilter->cspace = sel_cspace;
pFilter->matrix = DL_CSC_DISABLED;
}
else
{
// Setup CSC parameters: SrcSpace is the layer color space,
// DstSpace is the internal color space selected
curr_matrix.SrcSpace = KernelDll_TranslateCspace(pFilter->cspace);
curr_matrix.DstSpace = KernelDll_TranslateCspace(sel_cspace);
curr_matrix.iProcampID = pFilter->procamp;
// Check if CSC is necessary
if (curr_matrix.SrcSpace == curr_matrix.DstSpace &&
curr_matrix.iProcampID == DL_PROCAMP_DISABLED)
{
pFilter->matrix = DL_CSC_DISABLED;
curr_matrix.iCoeffID = CoeffID_None;
continue;
}
// Reserve CoeffID_0 for main video - CoeffID_0 gets CSC coeff from static parameters
// If main video doesn't use CoeffID_0, assign to RT
if ( (pFilter->layer == Layer_MainVideo) ||
(pFilter->layer == Layer_RenderTarget))
{
if (bCoeffID_0_Used)
{
curr_matrix.iCoeffID = (Kdll_CoeffID)iCoeffID++;
}
else
{
curr_matrix.iCoeffID = CoeffID_0;
bCoeffID_0_Used = true;
}
}
else
{
curr_matrix.iCoeffID = (Kdll_CoeffID)iCoeffID++;
}
// CSC at the target layer is from internal cspace (SrcSpace)
// to external cspace (DstCspace)
if (pFilter->layer == Layer_RenderTarget)
{
VPHAL_CSPACE aux = curr_matrix.SrcSpace;
curr_matrix.SrcSpace = curr_matrix.DstSpace;
curr_matrix.DstSpace = aux;
}
// Search CSC matrix - avoid duplicated CSC matrices
for (m = 0; m < matrix_count; m++)
{
if (curr_matrix.SrcSpace == matrix[m].SrcSpace &&
curr_matrix.DstSpace == matrix[m].DstSpace &&
curr_matrix.iProcampID == matrix[m].iProcampID)
{
break;
}
}
// Check limit
if (m == matrix_count)
{
// Exceeded number of CSC matrices allowed
if (matrix_count == DL_CSC_MAX)
{
return false;
}
matrix[m].bInUse = true;
matrix[m].SrcSpace = curr_matrix.SrcSpace;
matrix[m].DstSpace = curr_matrix.DstSpace;
matrix[m].iProcampID = curr_matrix.iProcampID;
matrix[m].iCoeffID = curr_matrix.iCoeffID;
// Calculate coefficients for the first time
KernelDll_UpdateCscCoefficients(pState, &matrix[m]);
// Next matrix
matrix_count++;
}
// point to the matrix
pFilter->matrix = m;
}
}
// Link matrices to kernel coefficients (and vice-versa)
matrix = pCSC->Matrix;
for (m = 0; m < matrix_count; m++, matrix++)
{
// Coefficient table points to matrix index
matrixID[matrix->iCoeffID] = (uint8_t)m;
}
// Save selected color space
pCSC->ColorSpace = sel_cspace;
return true;
}
//---------------------------------------------------------------------------------------
// Kdll_AddKernelList - Add kernel to CM FC kernel list
//
// Parameters:
// Kdll_KernelCache *pKernelCache - [in] Component kernel cache
// Kdll_KernelCache *pCmFcPatchCache - [in] Component kernel patch data cache
// Kdll_SearchState *pSearchState - [in/out] Kernel search state
// Kdll_PatchData *pKernelPatch - [in] Kernel Patch data
// void *pPatchDst - [in] Patch data Dst address
// int32_t iKUID - [in] Kernel Unique ID
// cm_fc_kernel_t *Cm_Fc_Kernels - [in/out] CM FC Kernels
//
// Output: true if suceeded, false otherwise
//---------------------------------------------------------------------------------------
bool Kdll_AddKernelList(Kdll_KernelCache *pKernelCache,
Kdll_KernelCache *pCmFcPatchCache,
Kdll_SearchState *pSearchState,
int32_t iKUID,
Kdll_PatchData *pKernelPatch,
void *pPatchDst,
cm_fc_kernel_t *Cm_Fc_Kernels)
{
Kdll_State *pState;
Kdll_Symbol *pSymbols;
Kdll_CacheEntry *kernels;
Kdll_CacheEntry *pPatch;
Kdll_LinkData *link;
Kdll_LinkData *liSearch_reloc;
int *size;
int *left;
int dwSize;
int i;
int base;
bool bInline;
bool res;
VPHAL_RENDER_FUNCTION_ENTER;
res = false;
// Check if Kernel ID is valid
if (iKUID >= pKernelCache->iCacheEntries)
{
VPHAL_RENDER_NORMALMESSAGE("invalid Kernel ID %d.", iKUID);
goto finish;
}
// Get KDLL state
pState = pSearchState->pKdllState;
// Get current combined kernel
size = &pSearchState->KernelSize;
left = &pSearchState->KernelLeft;
pSymbols = &pSearchState->KernelLink;
base = (*size) >> 2;
// Find selected kernel/patch and kernel size; check if there is enough space
kernels = &pKernelCache->pCacheEntries[iKUID];
pPatch = &pCmFcPatchCache->pCacheEntries[iKUID];
dwSize = kernels->iSize;
if (*left < dwSize)
{
VPHAL_RENDER_NORMALMESSAGE("exceeded maximum kernel size.");
goto finish;
}
// Check if there is enough space for symbols
if (pSymbols->dwCount + kernels->nLink >= pSymbols->dwSize)
{
VPHAL_RENDER_NORMALMESSAGE("exceeded maximum numbers of symbols to resolve.");
goto finish;
}
#if EMUL || VPHAL_LIB
VPHAL_RENDER_NORMALMESSAGE("%s.", kernels->szName);
if (pState->pfnCbListKernel)
{
pState->pfnCbListKernel(pState->pToken, kernels->szName);
}
#elif _DEBUG || _RELEASE_INTERNAL // EMUL || VPHAL_LIB
VPHAL_RENDER_NORMALMESSAGE("%s.", kernels->szName);
#endif // _DEBUG
// Append symbols to resolve, relocate symbols
link = kernels->pLink;
liSearch_reloc = pSymbols->pLink + pSymbols->dwCount;
bInline = false;
if (link)
{
for (i = kernels->nLink; i > 0; i--, link++)
{
if (link->bInline)
{
// Inline code included
if (!link->bExport)
{
bInline = true;
}
}
else
{
*liSearch_reloc = *link;
liSearch_reloc->dwOffset += base;
liSearch_reloc++;
pSymbols->dwCount++;
}
}
}
*size += dwSize;
*left -= dwSize;
Cm_Fc_Kernels->binary_buf = (const char *)kernels->pBinary;
Cm_Fc_Kernels->binary_size = kernels->iSize;
Cm_Fc_Kernels->patch_buf = (const char *)pPatch->pBinary;
Cm_Fc_Kernels->patch_size = pPatch->iSize;
res = true;
finish:
return res;
}
//---------------------------------------------------------------------------------------
// KernelDll_BuildKernel_CmFc - Build CM based FC combine Kernel
//
// Parameters: [in/out] pState - Pointer to Kernel binary file loaded in sys memory
// [in/out] pSearchState - Kernel file size
//
// Output: bool
// TRUE - Successful FALSE - Failed
//-----------------------------------------------------------------------------------------
bool KernelDll_BuildKernel_CmFc(Kdll_State *pState, Kdll_SearchState *pSearchState)
{
Kdll_KernelCache *pKernelCache = &pState->ComponentKernelCache;
Kdll_KernelCache *pPatchCache = &pState->CmFcPatchCache;
Kdll_KernelCache *pCustomCache = pState->pCustomKernelCache;
bool res;
int32_t offset = 0;
int32_t *pKernelID, *pPatchID;
uint8_t *pPatchData;
Kdll_PatchData *pKernelPatch;
uint8_t *kernel = pSearchState->Kernel;
Kdll_Symbol *pSymbols = &pSearchState->KernelLink;
uint32_t nExports = pKernelCache->nExports;
Kdll_LinkData *pExports = pKernelCache->pExports;
Kdll_LinkData *pLink;
int32_t iOffset;
uint32_t dwResolveOffset[DL_MAX_EXPORT_COUNT];
uint32_t dwTotalKernelCount;
size_t stEstimatedKernelSize;
int32_t iKUID;
bool bResolveDone;
int32_t i;
cm_fc_kernel_t Cm_Fc_kernels[DL_MAX_KERNELS];
VPHAL_RENDER_FUNCTION_ENTER;
// Disable pop-up box window for STL assertion to avoid VM hang in auto test.
#if (!LINUX)
::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
#if defined(_MSC_VER)
::_set_error_mode(_OUT_TO_STDERR);
_CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
_CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
_CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
_CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
_CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
#endif
#endif
pSearchState->KernelLink.dwSize = DL_MAX_SYMBOLS;
pSearchState->KernelLink.dwCount = 0;
pSearchState->KernelLink.pLink = pSearchState->LinkArray;
pSearchState->KernelSize = 0;
pSearchState->KernelLeft = sizeof(pSearchState->Kernel);
pSearchState->KernelLink.dwCount = 0;
MOS_ZeroMemory(Cm_Fc_kernels, sizeof(Cm_Fc_kernels));
dwTotalKernelCount = 0;
stEstimatedKernelSize = 0;
#if EMUL || VPHAL_LIB || _DEBUG
VPHAL_RENDER_NORMALMESSAGE("Component Kernels:");
#endif // EMUL || VPHAL_LIB || _DEBUG
pKernelID = pSearchState->KernelID;
pPatchID = pSearchState->PatchID;
pPatchData = nullptr;
for (offset = 0; offset < pSearchState->KernelCount; offset++, pKernelID++, pPatchID++, dwTotalKernelCount++)
{
// Get patch information associated with the kernel
pKernelPatch = (*pPatchID >= 0) ? &(pSearchState->Patches[*pPatchID]) : nullptr;
// Append/Patch kernel from internal cache
res = Kdll_AddKernelList(pKernelCache, pPatchCache, pSearchState, *pKernelID, pKernelPatch, pPatchData, &Cm_Fc_kernels[dwTotalKernelCount]);
stEstimatedKernelSize += Cm_Fc_kernels[dwTotalKernelCount].binary_size;
if (*pKernelID == IDR_VP_EOT)
{
dwTotalKernelCount--;
}
if (!res)
{
VPHAL_RENDER_NORMALMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
res = false;
goto finish;
}
}
// Resolve kernel dependencies
MOS_ZeroMemory(dwResolveOffset, sizeof(dwResolveOffset));
do
{
// Update exports
for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
{
if (pLink->bExport)
{
dwResolveOffset[pLink->iLabelID] = pLink->dwOffset;
}
}
bResolveDone = true;
for (pLink = pSymbols->pLink, i = pSymbols->dwCount; i > 0; i--, pLink++)
{
// validate label
if (pLink->iLabelID > nExports || // invalid label
pExports[pLink->iLabelID].bExport == 0) // label not in the export table
{
VPHAL_RENDER_NORMALMESSAGE("Invalid/unresolved label %d.", pLink->iLabelID);
res = false;
goto finish;
}
// load dependencies
if (!pLink->bExport && !dwResolveOffset[pLink->iLabelID])
{
// set flag for another pass as newly loaded
// kernels may contain dependencies of their own
bResolveDone = false;
// Add dependencies to kernel list
iKUID = pExports[pLink->iLabelID].iKUID;
res = Kdll_AddKernelList(pKernelCache, pPatchCache, pSearchState, iKUID, nullptr, nullptr, &Cm_Fc_kernels[dwTotalKernelCount]);
if (!res)
{
VPHAL_RENDER_NORMALMESSAGE("Failed to build kernel ID %d.", pSearchState->KernelID[offset]);
res = false;
goto finish;
}
dwTotalKernelCount++;
// Restart
break;
}
} // for
} while (!bResolveDone);
if (stEstimatedKernelSize > DL_MAX_KERNEL_SIZE)
{
res = false;
VPHAL_RENDER_NORMALMESSAGE("Kernel size exceeded kdll limitatin.");
goto finish;
}
stEstimatedKernelSize = DL_MAX_KERNEL_SIZE;
// Get combine kernel binary from CMFC lib
if (CM_FC_OK != cm_fc_combine_kernels(dwTotalKernelCount, Cm_Fc_kernels, (char *)pSearchState->Kernel, &stEstimatedKernelSize, nullptr))
{
res = false;
VPHAL_RENDER_NORMALMESSAGE("cm_fc_combine_kernels() function call failed.");
goto finish;
}
// Get combine kernel binary size from CMFC lib
pSearchState->KernelSize = (int) stEstimatedKernelSize;
res = true;
finish:
return res;
}
//---------------------------------------------------------------------------------------
// KernelDll_SetupFunctionPointers - Setup Function pointers based on platform
//
// Parameters:
// KdllState *pState - [in/out] Kernel Dll state
//
// Output: true - Function pointers are set
// false - Failed to setup function pointers (invalid platform)
//-----------------------------------------------------------------------------------------
bool KernelDll_SetupFunctionPointers_Ext(
Kdll_State *pState)
{
VPHAL_RENDER_FUNCTION_ENTER;
if (pState->bEnableCMFC)
{
pState->pfnBuildKernel = KernelDll_BuildKernel_CmFc;
}
return true;
}
#ifdef __cplusplus
}
#endif // __cplusplus