| /**************************************************************************** |
| * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * @file blend_jit.cpp |
| * |
| * @brief Implementation of the blend jitter |
| * |
| * Notes: |
| * |
| ******************************************************************************/ |
| #include "jit_pch.hpp" |
| #include "builder.h" |
| #include "jit_api.h" |
| #include "blend_jit.h" |
| #include "gen_state_llvm.h" |
| #include "functionpasses/passes.h" |
| |
| // components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized |
| #define QUANTIZE_THRESHOLD 2 |
| |
| using namespace llvm; |
| using namespace SwrJit; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// Interface to Jitting a blend shader |
| ////////////////////////////////////////////////////////////////////////// |
| struct BlendJit : public Builder |
| { |
| BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){}; |
| |
| template <bool Color, bool Alpha> |
| void GenerateBlendFactor(SWR_BLEND_FACTOR factor, |
| Value* constColor[4], |
| Value* src[4], |
| Value* src1[4], |
| Value* dst[4], |
| Value* result[4]) |
| { |
| Value* out[4]; |
| |
| switch (factor) |
| { |
| case BLENDFACTOR_ONE: |
| out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f); |
| break; |
| case BLENDFACTOR_SRC_COLOR: |
| out[0] = src[0]; |
| out[1] = src[1]; |
| out[2] = src[2]; |
| out[3] = src[3]; |
| break; |
| case BLENDFACTOR_SRC_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = src[3]; |
| break; |
| case BLENDFACTOR_DST_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = dst[3]; |
| break; |
| case BLENDFACTOR_DST_COLOR: |
| out[0] = dst[0]; |
| out[1] = dst[1]; |
| out[2] = dst[2]; |
| out[3] = dst[3]; |
| break; |
| case BLENDFACTOR_SRC_ALPHA_SATURATE: |
| out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3])); |
| out[3] = VIMMED1(1.0f); |
| break; |
| case BLENDFACTOR_CONST_COLOR: |
| out[0] = constColor[0]; |
| out[1] = constColor[1]; |
| out[2] = constColor[2]; |
| out[3] = constColor[3]; |
| break; |
| case BLENDFACTOR_CONST_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = constColor[3]; |
| break; |
| case BLENDFACTOR_SRC1_COLOR: |
| out[0] = src1[0]; |
| out[1] = src1[1]; |
| out[2] = src1[2]; |
| out[3] = src1[3]; |
| break; |
| case BLENDFACTOR_SRC1_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = src1[3]; |
| break; |
| case BLENDFACTOR_ZERO: |
| out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); |
| break; |
| case BLENDFACTOR_INV_SRC_COLOR: |
| out[0] = FSUB(VIMMED1(1.0f), src[0]); |
| out[1] = FSUB(VIMMED1(1.0f), src[1]); |
| out[2] = FSUB(VIMMED1(1.0f), src[2]); |
| out[3] = FSUB(VIMMED1(1.0f), src[3]); |
| break; |
| case BLENDFACTOR_INV_SRC_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]); |
| break; |
| case BLENDFACTOR_INV_DST_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]); |
| break; |
| case BLENDFACTOR_INV_DST_COLOR: |
| out[0] = FSUB(VIMMED1(1.0f), dst[0]); |
| out[1] = FSUB(VIMMED1(1.0f), dst[1]); |
| out[2] = FSUB(VIMMED1(1.0f), dst[2]); |
| out[3] = FSUB(VIMMED1(1.0f), dst[3]); |
| break; |
| case BLENDFACTOR_INV_CONST_COLOR: |
| out[0] = FSUB(VIMMED1(1.0f), constColor[0]); |
| out[1] = FSUB(VIMMED1(1.0f), constColor[1]); |
| out[2] = FSUB(VIMMED1(1.0f), constColor[2]); |
| out[3] = FSUB(VIMMED1(1.0f), constColor[3]); |
| break; |
| case BLENDFACTOR_INV_CONST_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]); |
| break; |
| case BLENDFACTOR_INV_SRC1_COLOR: |
| out[0] = FSUB(VIMMED1(1.0f), src1[0]); |
| out[1] = FSUB(VIMMED1(1.0f), src1[1]); |
| out[2] = FSUB(VIMMED1(1.0f), src1[2]); |
| out[3] = FSUB(VIMMED1(1.0f), src1[3]); |
| break; |
| case BLENDFACTOR_INV_SRC1_ALPHA: |
| out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]); |
| break; |
| default: |
| SWR_INVALID("Unsupported blend factor: %d", factor); |
| out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); |
| break; |
| } |
| |
| if (Color) |
| { |
| result[0] = out[0]; |
| result[1] = out[1]; |
| result[2] = out[2]; |
| } |
| |
| if (Alpha) |
| { |
| result[3] = out[3]; |
| } |
| } |
| |
| void Clamp(SWR_FORMAT format, Value* src[4]) |
| { |
| const SWR_FORMAT_INFO& info = GetFormatInfo(format); |
| SWR_TYPE type = info.type[0]; |
| |
| switch (type) |
| { |
| default: |
| break; |
| |
| case SWR_TYPE_UNORM: |
| src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f)); |
| src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f)); |
| src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f)); |
| src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f)); |
| break; |
| |
| case SWR_TYPE_SNORM: |
| src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f)); |
| src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f)); |
| src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f)); |
| src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f)); |
| break; |
| |
| case SWR_TYPE_UNKNOWN: |
| SWR_INVALID("Unsupport format type: %d", type); |
| } |
| } |
| |
| void ApplyDefaults(SWR_FORMAT format, Value* src[4]) |
| { |
| const SWR_FORMAT_INFO& info = GetFormatInfo(format); |
| |
| bool valid[] = {false, false, false, false}; |
| for (uint32_t c = 0; c < info.numComps; ++c) |
| { |
| valid[info.swizzle[c]] = true; |
| } |
| |
| for (uint32_t c = 0; c < 4; ++c) |
| { |
| if (!valid[c]) |
| { |
| src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty); |
| } |
| } |
| } |
| |
| void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4]) |
| { |
| const SWR_FORMAT_INFO& info = GetFormatInfo(format); |
| |
| for (uint32_t c = 0; c < info.numComps; ++c) |
| { |
| if (info.type[c] == SWR_TYPE_UNUSED) |
| { |
| src[info.swizzle[c]] = |
| BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty); |
| } |
| } |
| } |
| |
| void Quantize(SWR_FORMAT format, Value* src[4]) |
| { |
| const SWR_FORMAT_INFO& info = GetFormatInfo(format); |
| for (uint32_t c = 0; c < info.numComps; ++c) |
| { |
| if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED) |
| { |
| uint32_t swizComp = info.swizzle[c]; |
| float factor = (float)((1 << info.bpc[c]) - 1); |
| switch (info.type[c]) |
| { |
| case SWR_TYPE_UNORM: |
| src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f)); |
| src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO)); |
| src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f / factor)); |
| break; |
| default: |
| SWR_INVALID("Unsupported format type: %d", info.type[c]); |
| } |
| } |
| } |
| } |
| |
| template <bool Color, bool Alpha> |
| void BlendFunc(SWR_BLEND_OP blendOp, |
| Value* src[4], |
| Value* srcFactor[4], |
| Value* dst[4], |
| Value* dstFactor[4], |
| Value* result[4]) |
| { |
| Value* out[4]; |
| Value* srcBlend[4]; |
| Value* dstBlend[4]; |
| for (uint32_t i = 0; i < 4; ++i) |
| { |
| srcBlend[i] = FMUL(src[i], srcFactor[i]); |
| dstBlend[i] = FMUL(dst[i], dstFactor[i]); |
| } |
| |
| switch (blendOp) |
| { |
| case BLENDOP_ADD: |
| out[0] = FADD(srcBlend[0], dstBlend[0]); |
| out[1] = FADD(srcBlend[1], dstBlend[1]); |
| out[2] = FADD(srcBlend[2], dstBlend[2]); |
| out[3] = FADD(srcBlend[3], dstBlend[3]); |
| break; |
| |
| case BLENDOP_SUBTRACT: |
| out[0] = FSUB(srcBlend[0], dstBlend[0]); |
| out[1] = FSUB(srcBlend[1], dstBlend[1]); |
| out[2] = FSUB(srcBlend[2], dstBlend[2]); |
| out[3] = FSUB(srcBlend[3], dstBlend[3]); |
| break; |
| |
| case BLENDOP_REVSUBTRACT: |
| out[0] = FSUB(dstBlend[0], srcBlend[0]); |
| out[1] = FSUB(dstBlend[1], srcBlend[1]); |
| out[2] = FSUB(dstBlend[2], srcBlend[2]); |
| out[3] = FSUB(dstBlend[3], srcBlend[3]); |
| break; |
| |
| case BLENDOP_MIN: |
| out[0] = VMINPS(src[0], dst[0]); |
| out[1] = VMINPS(src[1], dst[1]); |
| out[2] = VMINPS(src[2], dst[2]); |
| out[3] = VMINPS(src[3], dst[3]); |
| break; |
| |
| case BLENDOP_MAX: |
| out[0] = VMAXPS(src[0], dst[0]); |
| out[1] = VMAXPS(src[1], dst[1]); |
| out[2] = VMAXPS(src[2], dst[2]); |
| out[3] = VMAXPS(src[3], dst[3]); |
| break; |
| |
| default: |
| SWR_INVALID("Unsupported blend operation: %d", blendOp); |
| out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f); |
| break; |
| } |
| |
| if (Color) |
| { |
| result[0] = out[0]; |
| result[1] = out[1]; |
| result[2] = out[2]; |
| } |
| |
| if (Alpha) |
| { |
| result[3] = out[3]; |
| } |
| } |
| |
| void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4]) |
| { |
| // Op: (s == PS output, d = RT contents) |
| switch (logicOp) |
| { |
| case LOGICOP_CLEAR: |
| result[0] = VIMMED1(0); |
| result[1] = VIMMED1(0); |
| result[2] = VIMMED1(0); |
| result[3] = VIMMED1(0); |
| break; |
| |
| case LOGICOP_NOR: |
| // ~(s | d) |
| result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); |
| result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); |
| result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); |
| result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); |
| break; |
| |
| case LOGICOP_AND_INVERTED: |
| // ~s & d |
| // todo: use avx andnot instr when I can find the intrinsic to call |
| result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); |
| result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); |
| result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); |
| result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); |
| break; |
| |
| case LOGICOP_COPY_INVERTED: |
| // ~s |
| result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF)); |
| result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF)); |
| result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF)); |
| result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF)); |
| break; |
| |
| case LOGICOP_AND_REVERSE: |
| // s & ~d |
| // todo: use avx andnot instr when I can find the intrinsic to call |
| result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); |
| result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); |
| result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); |
| result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); |
| break; |
| |
| case LOGICOP_INVERT: |
| // ~d |
| result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF)); |
| result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF)); |
| result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF)); |
| result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF)); |
| break; |
| |
| case LOGICOP_XOR: |
| // s ^ d |
| result[0] = XOR(src[0], dst[0]); |
| result[1] = XOR(src[1], dst[1]); |
| result[2] = XOR(src[2], dst[2]); |
| result[3] = XOR(src[3], dst[3]); |
| break; |
| |
| case LOGICOP_NAND: |
| // ~(s & d) |
| result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); |
| result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); |
| result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); |
| result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); |
| break; |
| |
| case LOGICOP_AND: |
| // s & d |
| result[0] = AND(src[0], dst[0]); |
| result[1] = AND(src[1], dst[1]); |
| result[2] = AND(src[2], dst[2]); |
| result[3] = AND(src[3], dst[3]); |
| break; |
| |
| case LOGICOP_EQUIV: |
| // ~(s ^ d) |
| result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF)); |
| result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF)); |
| result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF)); |
| result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF)); |
| break; |
| |
| case LOGICOP_NOOP: |
| result[0] = dst[0]; |
| result[1] = dst[1]; |
| result[2] = dst[2]; |
| result[3] = dst[3]; |
| break; |
| |
| case LOGICOP_OR_INVERTED: |
| // ~s | d |
| result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]); |
| result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]); |
| result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]); |
| result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]); |
| break; |
| |
| case LOGICOP_COPY: |
| result[0] = src[0]; |
| result[1] = src[1]; |
| result[2] = src[2]; |
| result[3] = src[3]; |
| break; |
| |
| case LOGICOP_OR_REVERSE: |
| // s | ~d |
| result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]); |
| result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]); |
| result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]); |
| result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]); |
| break; |
| |
| case LOGICOP_OR: |
| // s | d |
| result[0] = OR(src[0], dst[0]); |
| result[1] = OR(src[1], dst[1]); |
| result[2] = OR(src[2], dst[2]); |
| result[3] = OR(src[3], dst[3]); |
| break; |
| |
| case LOGICOP_SET: |
| result[0] = VIMMED1(0xFFFFFFFF); |
| result[1] = VIMMED1(0xFFFFFFFF); |
| result[2] = VIMMED1(0xFFFFFFFF); |
| result[3] = VIMMED1(0xFFFFFFFF); |
| break; |
| |
| default: |
| SWR_INVALID("Unsupported logic operation: %d", logicOp); |
| result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f); |
| break; |
| } |
| } |
| |
| void |
| AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask) |
| { |
| // load uint32_t reference |
| Value* pRef = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_alphaTestReference})); |
| |
| // load alpha |
| Value* pAlpha = LOAD(ppAlpha, {0, 0}); |
| |
| Value* pTest = nullptr; |
| if (state.alphaTestFormat == ALPHA_TEST_UNORM8) |
| { |
| // convert float alpha to unorm8 |
| Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f)); |
| pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty); |
| |
| // compare |
| switch (state.alphaTestFunction) |
| { |
| case ZFUNC_ALWAYS: |
| pTest = VIMMED1(true); |
| break; |
| case ZFUNC_NEVER: |
| pTest = VIMMED1(false); |
| break; |
| case ZFUNC_LT: |
| pTest = ICMP_ULT(pAlphaU8, pRef); |
| break; |
| case ZFUNC_EQ: |
| pTest = ICMP_EQ(pAlphaU8, pRef); |
| break; |
| case ZFUNC_LE: |
| pTest = ICMP_ULE(pAlphaU8, pRef); |
| break; |
| case ZFUNC_GT: |
| pTest = ICMP_UGT(pAlphaU8, pRef); |
| break; |
| case ZFUNC_NE: |
| pTest = ICMP_NE(pAlphaU8, pRef); |
| break; |
| case ZFUNC_GE: |
| pTest = ICMP_UGE(pAlphaU8, pRef); |
| break; |
| default: |
| SWR_INVALID("Invalid alpha test function"); |
| break; |
| } |
| } |
| else |
| { |
| // cast ref to float |
| pRef = BITCAST(pRef, mSimdFP32Ty); |
| |
| // compare |
| switch (state.alphaTestFunction) |
| { |
| case ZFUNC_ALWAYS: |
| pTest = VIMMED1(true); |
| break; |
| case ZFUNC_NEVER: |
| pTest = VIMMED1(false); |
| break; |
| case ZFUNC_LT: |
| pTest = FCMP_OLT(pAlpha, pRef); |
| break; |
| case ZFUNC_EQ: |
| pTest = FCMP_OEQ(pAlpha, pRef); |
| break; |
| case ZFUNC_LE: |
| pTest = FCMP_OLE(pAlpha, pRef); |
| break; |
| case ZFUNC_GT: |
| pTest = FCMP_OGT(pAlpha, pRef); |
| break; |
| case ZFUNC_NE: |
| pTest = FCMP_ONE(pAlpha, pRef); |
| break; |
| case ZFUNC_GE: |
| pTest = FCMP_OGE(pAlpha, pRef); |
| break; |
| default: |
| SWR_INVALID("Invalid alpha test function"); |
| break; |
| } |
| } |
| |
| // load current mask |
| Value* pMask = LOAD(ppMask); |
| |
| // convert to int1 mask |
| pMask = MASK(pMask); |
| |
| // and with alpha test result |
| pMask = AND(pMask, pTest); |
| |
| // convert back to vector mask |
| pMask = VMASK(pMask); |
| |
| // store new mask |
| STORE(pMask, ppMask); |
| } |
| |
| Function* Create(const BLEND_COMPILE_STATE& state) |
| { |
| std::stringstream fnName("BLND_", |
| std::ios_base::in | std::ios_base::out | std::ios_base::ate); |
| fnName << ComputeCRC(0, &state, sizeof(state)); |
| |
| // blend function signature |
| // typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*); |
| |
| std::vector<Type*> args{ |
| PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0) // SWR_BLEND_CONTEXT* |
| }; |
| |
| // std::vector<Type*> args{ |
| // PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0), // SWR_BLEND_CONTEXT* |
| //}; |
| |
| FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false); |
| Function* blendFunc = Function::Create( |
| fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule); |
| blendFunc->getParent()->setModuleIdentifier(blendFunc->getName()); |
| |
| BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc); |
| |
| IRB()->SetInsertPoint(entry); |
| |
| // arguments |
| auto argitr = blendFunc->arg_begin(); |
| Value* pBlendContext = &*argitr++; |
| pBlendContext->setName("pBlendContext"); |
| Value* pBlendState = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pBlendState}); |
| pBlendState->setName("pBlendState"); |
| Value* pSrc = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src}); |
| pSrc->setName("src"); |
| Value* pSrc1 = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src1}); |
| pSrc1->setName("src1"); |
| Value* pSrc0Alpha = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src0alpha}); |
| pSrc0Alpha->setName("src0alpha"); |
| Value* sampleNum = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_sampleNum}); |
| sampleNum->setName("sampleNum"); |
| Value* pDst = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pDst}); |
| pDst->setName("pDst"); |
| Value* pResult = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_result}); |
| pResult->setName("result"); |
| Value* ppoMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_oMask}); |
| ppoMask->setName("ppoMask"); |
| Value* ppMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pMask}); |
| ppMask->setName("pMask"); |
| |
| static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, |
| "Unsupported hot tile format"); |
| Value* dst[4]; |
| Value* constantColor[4]; |
| Value* src[4]; |
| Value* src1[4]; |
| Value* result[4]; |
| for (uint32_t i = 0; i < 4; ++i) |
| { |
| // load hot tile |
| dst[i] = LOAD(pDst, {0, i}); |
| |
| // load constant color |
| constantColor[i] = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_constantColor, i})); |
| |
| // load src |
| src[i] = LOAD(pSrc, {0, i}); |
| |
| // load src1 |
| src1[i] = LOAD(pSrc1, {0, i}); |
| } |
| Value* currentSampleMask = VIMMED1(-1); |
| if (state.desc.alphaToCoverageEnable) |
| { |
| Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f); |
| uint32_t bits = (1 << state.desc.numSamples) - 1; |
| currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits))); |
| currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty); |
| } |
| |
| // alpha test |
| if (state.desc.alphaTestEnable) |
| { |
| // Gather for archrast stats |
| STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested}); |
| AlphaTest(state, pBlendState, pSrc0Alpha, ppMask); |
| } |
| else |
| { |
| // Gather for archrast stats |
| STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested}); |
| } |
| |
| // color blend |
| if (state.blendState.blendEnable) |
| { |
| // Gather for archrast stats |
| STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended}); |
| |
| // clamp sources |
| Clamp(state.format, src); |
| Clamp(state.format, src1); |
| Clamp(state.format, dst); |
| Clamp(state.format, constantColor); |
| |
| // apply defaults to hottile contents to take into account missing components |
| ApplyDefaults(state.format, dst); |
| |
| // Force defaults for unused 'X' components |
| ApplyUnusedDefaults(state.format, dst); |
| |
| // Quantize low precision components |
| Quantize(state.format, dst); |
| |
| // special case clamping for R11G11B10_float which has no sign bit |
| if (state.format == R11G11B10_FLOAT) |
| { |
| dst[0] = VMAXPS(dst[0], VIMMED1(0.0f)); |
| dst[1] = VMAXPS(dst[1], VIMMED1(0.0f)); |
| dst[2] = VMAXPS(dst[2], VIMMED1(0.0f)); |
| dst[3] = VMAXPS(dst[3], VIMMED1(0.0f)); |
| } |
| |
| Value* srcFactor[4]; |
| Value* dstFactor[4]; |
| if (state.desc.independentAlphaBlendEnable) |
| { |
| GenerateBlendFactor<true, false>( |
| state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); |
| GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, |
| constantColor, |
| src, |
| src1, |
| dst, |
| srcFactor); |
| |
| GenerateBlendFactor<true, false>( |
| state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); |
| GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, |
| constantColor, |
| src, |
| src1, |
| dst, |
| dstFactor); |
| |
| BlendFunc<true, false>( |
| state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); |
| BlendFunc<false, true>( |
| state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result); |
| } |
| else |
| { |
| GenerateBlendFactor<true, true>( |
| state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor); |
| GenerateBlendFactor<true, true>( |
| state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor); |
| |
| BlendFunc<true, true>( |
| state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result); |
| } |
| |
| // store results out |
| for (uint32_t i = 0; i < 4; ++i) |
| { |
| STORE(result[i], pResult, {0, i}); |
| } |
| } |
| else |
| { |
| // Gather for archrast stats |
| STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended}); |
| } |
| |
| if (state.blendState.logicOpEnable) |
| { |
| const SWR_FORMAT_INFO& info = GetFormatInfo(state.format); |
| Value* vMask[4]; |
| float scale[4]; |
| |
| if (!state.blendState.blendEnable) |
| { |
| Clamp(state.format, src); |
| Clamp(state.format, dst); |
| } |
| |
| for (uint32_t i = 0; i < 4; i++) |
| { |
| if (info.type[i] == SWR_TYPE_UNUSED) |
| { |
| continue; |
| } |
| |
| if (info.bpc[i] >= 32) |
| { |
| vMask[i] = VIMMED1(0xFFFFFFFF); |
| scale[i] = 0xFFFFFFFF; |
| } |
| else |
| { |
| vMask[i] = VIMMED1((1 << info.bpc[i]) - 1); |
| if (info.type[i] == SWR_TYPE_SNORM) |
| scale[i] = (1 << (info.bpc[i] - 1)) - 1; |
| else |
| scale[i] = (1 << info.bpc[i]) - 1; |
| } |
| |
| switch (info.type[i]) |
| { |
| default: |
| SWR_INVALID("Unsupported type for logic op: %d", info.type[i]); |
| break; |
| |
| case SWR_TYPE_UNKNOWN: |
| case SWR_TYPE_UNUSED: |
| // fallthrough |
| |
| case SWR_TYPE_UINT: |
| case SWR_TYPE_SINT: |
| src[i] = BITCAST(src[i], mSimdInt32Ty); |
| dst[i] = BITCAST(dst[i], mSimdInt32Ty); |
| break; |
| case SWR_TYPE_SNORM: |
| src[i] = FP_TO_SI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty); |
| dst[i] = FP_TO_SI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty); |
| break; |
| case SWR_TYPE_UNORM: |
| src[i] = FP_TO_UI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty); |
| dst[i] = FP_TO_UI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty); |
| break; |
| } |
| } |
| |
| LogicOpFunc(state.blendState.logicOpFunc, src, dst, result); |
| |
| // store results out |
| for (uint32_t i = 0; i < 4; ++i) |
| { |
| if (info.type[i] == SWR_TYPE_UNUSED) |
| { |
| continue; |
| } |
| |
| // clear upper bits from PS output not in RT format after doing logic op |
| result[i] = AND(result[i], vMask[i]); |
| |
| switch (info.type[i]) |
| { |
| default: |
| SWR_INVALID("Unsupported type for logic op: %d", info.type[i]); |
| break; |
| |
| case SWR_TYPE_UNKNOWN: |
| case SWR_TYPE_UNUSED: |
| // fallthrough |
| |
| case SWR_TYPE_UINT: |
| case SWR_TYPE_SINT: |
| result[i] = BITCAST(result[i], mSimdFP32Ty); |
| break; |
| case SWR_TYPE_SNORM: |
| result[i] = SHL(result[i], C(32 - info.bpc[i])); |
| result[i] = ASHR(result[i], C(32 - info.bpc[i])); |
| result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i])); |
| break; |
| case SWR_TYPE_UNORM: |
| result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i])); |
| break; |
| } |
| |
| STORE(result[i], pResult, {0, i}); |
| } |
| } |
| |
| if (state.desc.oMaskEnable) |
| { |
| assert(!(state.desc.alphaToCoverageEnable)); |
| // load current mask |
| Value* oMask = LOAD(ppoMask); |
| currentSampleMask = AND(oMask, currentSampleMask); |
| } |
| |
| if (state.desc.sampleMaskEnable) |
| { |
| Value* sampleMask = LOAD(pBlendState, {0, SWR_BLEND_STATE_sampleMask}); |
| currentSampleMask = AND(VBROADCAST(sampleMask), currentSampleMask); |
| } |
| |
| if (state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable || |
| state.desc.oMaskEnable) |
| { |
| // load coverage mask and mask off any lanes with no samples |
| Value* pMask = LOAD(ppMask); |
| Value* sampleMasked = SHL(C(1), sampleNum); |
| currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked)); |
| currentSampleMask = S_EXT(ICMP_UGT(currentSampleMask, VBROADCAST(C(0))), mSimdInt32Ty); |
| Value* outputMask = AND(pMask, currentSampleMask); |
| // store new mask |
| STORE(outputMask, GEP(ppMask, C(0))); |
| } |
| |
| RET_VOID(); |
| |
| JitManager::DumpToFile(blendFunc, ""); |
| |
| ::FunctionPassManager passes(JM()->mpCurrentModule); |
| |
| passes.add(createBreakCriticalEdgesPass()); |
| passes.add(createCFGSimplificationPass()); |
| passes.add(createEarlyCSEPass()); |
| passes.add(createPromoteMemoryToRegisterPass()); |
| passes.add(createCFGSimplificationPass()); |
| passes.add(createEarlyCSEPass()); |
| passes.add(createInstructionCombiningPass()); |
| passes.add(createConstantPropagationPass()); |
| passes.add(createSCCPPass()); |
| passes.add(createAggressiveDCEPass()); |
| |
| passes.add(createLowerX86Pass(this)); |
| |
| passes.run(*blendFunc); |
| |
| JitManager::DumpToFile(blendFunc, "optimized"); |
| |
| return blendFunc; |
| } |
| }; |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief JITs from fetch shader IR |
| /// @param hJitMgr - JitManager handle |
| /// @param func - LLVM function IR |
| /// @return PFN_FETCH_FUNC - pointer to fetch code |
| PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc) |
| { |
| const llvm::Function* func = (const llvm::Function*)hFunc; |
| JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); |
| PFN_BLEND_JIT_FUNC pfnBlend; |
| pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str())); |
| // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot |
| // add new IR to the module |
| pJitMgr->mIsModuleFinalized = true; |
| |
| return pfnBlend; |
| } |
| |
| ////////////////////////////////////////////////////////////////////////// |
| /// @brief JIT compiles blend shader |
| /// @param hJitMgr - JitManager handle |
| /// @param state - blend state to build function from |
| extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, |
| const BLEND_COMPILE_STATE& state) |
| { |
| JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr); |
| |
| pJitMgr->SetupNewModule(); |
| |
| BlendJit theJit(pJitMgr); |
| HANDLE hFunc = theJit.Create(state); |
| |
| return JitBlendFunc(hJitMgr, hFunc); |
| } |