blob: 1c9eb46a7afe165c0718ab5f67ce2207e47f460e [file] [log] [blame]
/*
* FFv1 codec
*
* Copyright (c) 2024 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#define DECODE
#include "common.glsl"
#include "ffv1_common.glsl"
layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf {
u32vec2 slice_offsets[];
};
layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf {
uint32_t slice_status[];
};
layout (set = 1, binding = 4) uniform uimage2D dec[];
#ifndef GOLOMB
layout (set = 1, binding = 3, scalar) buffer slice_state_buf {
uint8_t slice_rc_state[];
};
#define READ(idx) get_rac_state(idx)
shared int sym_e;
shared bool rc_dec[CONTEXT_SIZE];
int get_isymbol(void)
{
sym_e = 0;
rc_dec[0] = true;
if (READ(0))
return 0;
int e = 1;
for (; e < 11; e++) {
rc_dec[e] = true;
if (!READ(e))
break;
}
int a = 1;
sym_e = e + 10;
rc_dec[sym_e] = true;
if (bits > 10 && e == 11) {
do {
rc_state[10] = zero_one_state[rc_state[10] + 256];
e++;
} while (READ(10));
a = READ(31) ? 0x3 : 0x2;
for (e -= 2; e >= 11; e--) {
rc_state[31] = zero_one_state[rc_state[31] +
(rc_data[31] ? 256 : 0)];
a <<= 1;
a |= int(READ(31));
}
rc_dec[31] = true;
}
e += 20;
for (; e >= 22; e--) {
a <<= 1;
a |= int(READ(e));
rc_dec[e] = true;
}
return READ(sym_e) ? -a : a;
}
void decode_line_pcm(ivec2 sp, int w, int y, int p)
{
if (gl_LocalInvocationID.x > 0)
return;
#ifndef RGB
if (p > 0 && p < 3) {
w = ceil_rshift(w, chroma_shift.x);
sp >>= chroma_shift;
}
#endif
for (int x = 0; x < w; x++) {
uint v = 0;
for (uint i = (rct_offset >> 1); i > 0; i >>= 1)
v |= get_rac_equi() ? i : 0;
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
}
}
void decode_line(ivec2 sp, int w,
int y, int p, uint state_off,
uint8_t quant_table_idx, int run_index)
{
#ifndef RGB
if (p > 0 && p < 3) {
w = ceil_rshift(w, chroma_shift.x);
sp >>= chroma_shift;
}
#endif
linecache_load(dec[p], sp, y, 0);
for (int x = 0; x < w; x++) {
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
quant_table_idx, extend_lookup[quant_table_idx]);
uint rc_off = state_off + CONTEXT_SIZE*abs(pr[0]) + gl_LocalInvocationID.x;
rc_dec[gl_LocalInvocationID.x] = false;
rc_state[gl_LocalInvocationID.x] = slice_rc_state[rc_off];
barrier();
if (gl_LocalInvocationID.x == 0) {
int diff = get_isymbol();
if (pr[0] < 0)
diff = -diff;
uint v = zero_extend(pr[1] + diff, bits);
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
linecache_next(TYPE(v));
}
/* Image write now visible to other invocs */
barrier();
if (rc_dec[gl_LocalInvocationID.x])
slice_rc_state[rc_off] =
zero_one_state[rc_state[gl_LocalInvocationID.x] +
(rc_data[gl_LocalInvocationID.x] ? 256 : 0)];
}
}
#else /* GOLOMB */
layout (set = 1, binding = 3, scalar) buffer slice_state_buf {
VlcState slice_vlc_state[];
};
GetBitContext gb;
void golomb_init(void)
{
if (version == 3 && micro_version > 1 || version > 3)
get_rac_internal(rc.range * 129 >> 8);
uint64_t ac_byte_count = rc.bs_off - rc.bs_start - 1;
init_get_bits(gb, u8buf(rc.bs_start + ac_byte_count),
int(rc.bs_end - rc.bs_start - ac_byte_count));
}
void decode_line(ivec2 sp, int w,
int y, int p, uint state_off,
uint8_t quant_table_idx, inout int run_index)
{
#ifndef RGB
if (p > 0 && p < 3) {
w = ceil_rshift(w, chroma_shift.x);
sp >>= chroma_shift;
}
#endif
linecache_load(dec[p], sp, y, 0);
int run_count = 0;
int run_mode = 0;
for (int x = 0; x < w; x++) {
ivec2 pos = sp + ivec2(x, y);
int diff;
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
quant_table_idx, extend_lookup[quant_table_idx]);
uint vlc_off = state_off + abs(pr[0]);
if (pr[0] == 0 && run_mode == 0)
run_mode = 1;
if (run_mode != 0) {
if (run_count == 0 && run_mode == 1) {
int tmp_idx = int(log2_run[run_index]);
if (get_bit(gb)) {
run_count = 1 << tmp_idx;
if (x + run_count <= w)
run_index++;
} else {
if (tmp_idx != 0) {
run_count = int(get_bits(gb, tmp_idx));
} else
run_count = 0;
if (run_index != 0)
run_index--;
run_mode = 2;
}
}
run_count--;
if (run_count < 0) {
run_mode = 0;
run_count = 0;
diff = read_vlc_symbol(gb, slice_vlc_state[vlc_off], bits);
if (diff >= 0)
diff++;
} else {
diff = 0;
}
} else {
diff = read_vlc_symbol(gb, slice_vlc_state[vlc_off], bits);
}
if (pr[0] < 0)
diff = -diff;
uint v = zero_extend(pr[1] + diff, bits);
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
linecache_next(TYPE(v));
}
}
#endif
#ifdef RGB
ivec4 transform_sample(ivec4 pix, ivec2 rct_coef)
{
pix.b -= rct_offset;
pix.r -= rct_offset;
pix.g -= (pix.b*rct_coef.g + pix.r*rct_coef.r) >> 2;
pix.b += pix.g;
pix.r += pix.g;
return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
pix[fmt_lut[2]], pix[fmt_lut[3]]);
}
void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
{
memoryBarrierImage();
barrier();
for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
ivec2 lpos = sp + LADDR(ivec2(x, y));
ivec2 pos = sc.slice_pos + ivec2(x, y);
ivec4 pix;
pix.r = int(imageLoad(dec[2], lpos)[0]);
pix.g = int(imageLoad(dec[0], lpos)[0]);
pix.b = int(imageLoad(dec[1], lpos)[0]);
if (transparency)
pix.a = int(imageLoad(dec[3], lpos)[0]);
if (apply_rct)
pix = transform_sample(pix, sc.slice_rct_coef);
else
pix = ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
pix[fmt_lut[2]], pix[fmt_lut[3]]);
imageStore(dst[0], pos, pix);
if (planar_rgb) {
for (int i = 1; i < color_planes; i++)
imageStore(dst[i], pos, ivec4(pix[i]));
}
}
}
#endif
void decode_slice(in SliceContext sc, uint slice_idx)
{
int w = sc.slice_dim.x;
ivec2 sp = sc.slice_pos;
#ifdef RGB
sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
#endif
#ifndef GOLOMB
/* PCM coding */
if (sc.slice_coding_mode == 1) {
#ifdef RGB
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
decode_line_pcm(sp, w, y, p);
writeout_rgb(sc, sp, w, y, false);
}
#else
for (int p = 0; p < planes; p++) {
int h = sc.slice_dim.y;
if (p > 0 && p < 3)
h = ceil_rshift(h, chroma_shift.y);
for (int y = 0; y < h; y++)
decode_line_pcm(sp, w, y, p);
}
#endif
return;
}
#endif
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
u32vec4 slice_state_off = (slice_idx*codec_planes +
uvec4(0, 1, 1, 2))*plane_state_size;
#ifdef GOLOMB
slice_state_off >>= 3; // division by VLC_STATE_SIZE
golomb_init();
#endif
#ifdef RGB
int run_index = 0;
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
decode_line(sp, w, y, p,
slice_state_off[p], quant_table_idx[p], run_index);
writeout_rgb(sc, sp, w, y, true);
}
#else
for (int p = 0; p < planes; p++) {
int h = sc.slice_dim.y;
if (p > 0 && p < 3)
h = ceil_rshift(h, chroma_shift.y);
int run_index = 0;
for (int y = 0; y < h; y++)
decode_line(sp, w, y, p,
slice_state_off[p], quant_table_idx[p], run_index);
}
#endif
}
void main(void)
{
uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
if (gl_LocalInvocationID.x == 0)
rc = slice_ctx[slice_idx].c;
barrier();
decode_slice(slice_ctx[slice_idx], slice_idx);
if (gl_LocalInvocationID.x == 0) {
uint overread = 0;
if (rc.bs_off >= (rc.bs_end + MAX_OVERREAD))
overread = rc.bs_off - rc.bs_end;
slice_status[2*slice_idx + 1] = overread;
}
}