| /* |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #version 460 |
| #pragma shader_stage(compute) |
| #extension GL_GOOGLE_include_directive : require |
| |
| #include "common.comp" |
| #include "dct.glsl" |
| |
| layout (constant_id = 0) const bool interlaced = false; |
| |
| layout (set = 0, binding = 0) readonly buffer quant_idx_buf { |
| uint8_t quant_idx[]; |
| }; |
| layout (set = 0, binding = 1) readonly buffer qmat_buf { |
| uint8_t qmat[]; |
| }; |
| layout (set = 0, binding = 2) uniform uimage2D dst[]; |
| |
| layout (push_constant, scalar) uniform pushConstants { |
| u8buf slice_data; |
| uint bitstream_size; |
| |
| uint16_t width; |
| uint16_t height; |
| uint16_t mb_width; |
| uint16_t mb_height; |
| uint16_t slice_width; |
| uint16_t slice_height; |
| uint8_t log2_slice_width; |
| uint8_t log2_chroma_w; |
| uint8_t depth; |
| uint8_t alpha_info; |
| uint8_t bottom_field; |
| }; |
| |
| uint get_px(uint tex_idx, ivec2 pos) |
| { |
| if (interlaced) |
| pos = ivec2(pos.x, (pos.y << 1) + bottom_field); |
| return uint(imageLoad(dst[nonuniformEXT(tex_idx)], pos).x); |
| } |
| |
| void put_px(uint tex_idx, ivec2 pos, uint v) |
| { |
| if (interlaced) |
| pos = ivec2(pos.x, (pos.y << 1) + bottom_field); |
| imageStore(dst[nonuniformEXT(tex_idx)], pos, uvec4(v)); |
| } |
| |
| void main(void) |
| { |
| uvec3 gid = gl_GlobalInvocationID, lid = gl_LocalInvocationID; |
| uint comp = gid.z, block = (lid.y << 2) | (lid.x >> 3), idx = lid.x & 0x7; |
| uint chroma_shift = comp != 0 ? log2_chroma_w : 0; |
| bool act = gid.x < mb_width << (4 - chroma_shift); |
| |
| /** |
| * Normalize coefficients to [-1, 1] for increased precision during the iDCT. |
| * DCT coeffs have the range of a 12-bit signed integer (7.4 Inverse Transform). |
| */ |
| const float norm = 1.0f / (1 << 11); |
| |
| /* Coalesced load of DCT coeffs in shared memory, inverse quantization */ |
| if (act) { |
| /* Table 15 */ |
| uint8_t qidx = quant_idx[(gid.y >> 1) * mb_width + (gid.x >> (4 - chroma_shift))]; |
| int qscale = qidx > 128 ? (qidx - 96) << 2 : qidx, mat = int(gid.z != 0) << 6; |
| |
| [[unroll]] for (uint i = 0; i < 8; ++i) { |
| uint cidx = (i << 3) + idx; |
| int c = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) + i))), 16); |
| float v = float(c * qscale * int(qmat[mat + cidx])) * norm; |
| blocks[block][i * 9 + idx] = v * idct_scale[cidx]; |
| } |
| } |
| |
| /* Column-wise iDCT */ |
| idct8(block, idx, 9); |
| barrier(); |
| |
| /* Remap [-1, 1] to [0, 2] to remove a per-element addition in the output loop */ |
| blocks[block][idx * 9] += 1.0f; |
| |
| /* Row-wise iDCT */ |
| idct8(block, idx * 9, 1); |
| barrier(); |
| |
| float fact = 1 << (depth - 1); |
| int maxv = (1 << depth) - 1; |
| |
| /* 7.5.1 Color Component Samples. Rescale, clamp and write back to global memory */ |
| if (act) { |
| [[unroll]] for (uint i = 0; i < 8; ++i) { |
| float v = round(blocks[block][i * 9 + idx] * fact); |
| put_px(comp, ivec2(gid.x, (gid.y << 3) + i), clamp(int(v), 0, maxv)); |
| } |
| } |
| } |