| /* |
| * FFv1 codec |
| * |
| * Copyright (c) 2024 Lynne <dev@lynne.ee> |
| * |
| * This file is part of FFmpeg. |
| * |
| * FFmpeg is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * FFmpeg is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with FFmpeg; if not, write to the Free Software |
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| */ |
| |
| #ifndef GOLOMB |
| #ifdef CACHED_SYMBOL_READER |
| shared uint8_t state[CONTEXT_SIZE]; |
| #define WRITE(c, off, val) put_rac_direct(c, state[off], val) |
| #else |
| #define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val) |
| #endif |
| |
| /* Note - only handles signed values */ |
| void put_symbol(inout RangeCoder c, uint state_off, int v) |
| { |
| bool is_nil = (v == 0); |
| WRITE(c, 0, is_nil); |
| if (is_nil) |
| return; |
| |
| const int a = abs(v); |
| const int e = findMSB(a); |
| |
| for (int i = 0; i < e; i++) |
| WRITE(c, 1 + min(i, 9), true); |
| WRITE(c, 1 + min(e, 9), false); |
| |
| for (int i = e - 1; i >= 0; i--) |
| WRITE(c, 22 + min(i, 9), bool(bitfieldExtract(a, i, 1))); |
| |
| WRITE(c, 22 - 11 + min(e, 10), v < 0); |
| } |
| |
| void encode_line_pcm(inout SliceContext sc, readonly uimage2D img, |
| ivec2 sp, int y, int p, int comp, int bits) |
| { |
| int w = sc.slice_dim.x; |
| |
| #ifdef CACHED_SYMBOL_READER |
| if (gl_LocalInvocationID.x > 0) |
| return; |
| #endif |
| |
| #ifndef RGB |
| if (p > 0 && p < 3) { |
| w = ceil_rshift(w, chroma_shift.x); |
| sp >>= chroma_shift; |
| } |
| #endif |
| |
| for (int x = 0; x < w; x++) { |
| uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]; |
| for (int i = (bits - 1); i >= 0; i--) |
| put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1))); |
| } |
| } |
| |
| void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, |
| ivec2 sp, int y, int p, int comp, int bits, |
| uint8_t quant_table_idx, const int run_index) |
| { |
| int w = sc.slice_dim.x; |
| |
| #ifndef RGB |
| if (p > 0 && p < 3) { |
| w = ceil_rshift(w, chroma_shift.x); |
| sp >>= chroma_shift; |
| } |
| #endif |
| |
| for (int x = 0; x < w; x++) { |
| ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, |
| quant_table_idx, extend_lookup[quant_table_idx] > 0); |
| d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; |
| |
| if (d[0] < 0) |
| d = -d; |
| |
| d[1] = fold(d[1], bits); |
| |
| uint context_off = state_off + CONTEXT_SIZE*d[0]; |
| #ifdef CACHED_SYMBOL_READER |
| u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x); |
| state[gl_LocalInvocationID.x] = sb.v; |
| barrier(); |
| if (gl_LocalInvocationID.x == 0) |
| #endif |
| |
| put_symbol(sc.c, context_off, d[1]); |
| |
| #ifdef CACHED_SYMBOL_READER |
| barrier(); |
| sb.v = state[gl_LocalInvocationID.x]; |
| #endif |
| } |
| } |
| |
| #else /* GOLOMB */ |
| |
| void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, |
| ivec2 sp, int y, int p, int comp, int bits, |
| uint8_t quant_table_idx, inout int run_index) |
| { |
| int w = sc.slice_dim.x; |
| |
| #ifndef RGB |
| if (p > 0 && p < 3) { |
| w = ceil_rshift(w, chroma_shift.x); |
| sp >>= chroma_shift; |
| } |
| #endif |
| |
| int run_count = 0; |
| bool run_mode = false; |
| |
| for (int x = 0; x < w; x++) { |
| ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, |
| quant_table_idx, extend_lookup[quant_table_idx] > 0); |
| d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; |
| |
| if (d[0] < 0) |
| d = -d; |
| |
| d[1] = fold(d[1], bits); |
| |
| if (d[0] == 0) |
| run_mode = true; |
| |
| if (run_mode) { |
| if (d[1] != 0) { |
| /* A very unlikely loop */ |
| while (run_count >= 1 << log2_run[run_index]) { |
| run_count -= 1 << log2_run[run_index]; |
| run_index++; |
| put_bits(sc.pb, 1, 1); |
| } |
| |
| put_bits(sc.pb, 1 + log2_run[run_index], run_count); |
| if (run_index != 0) |
| run_index--; |
| run_count = 0; |
| run_mode = false; |
| if (d[1] > 0) |
| d[1]--; |
| } else { |
| run_count++; |
| } |
| } |
| |
| if (!run_mode) { |
| VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*d[0]); |
| Symbol sym = get_vlc_symbol(sb, d[1], bits); |
| put_bits(sc.pb, sym.bits, sym.val); |
| } |
| } |
| |
| if (run_mode) { |
| while (run_count >= (1 << log2_run[run_index])) { |
| run_count -= 1 << log2_run[run_index]; |
| run_index++; |
| put_bits(sc.pb, 1, 1); |
| } |
| |
| if (run_count > 0) |
| put_bits(sc.pb, 1, 1); |
| } |
| } |
| #endif |
| |
| #ifdef RGB |
| ivec4 load_components(ivec2 pos) |
| { |
| ivec4 pix = ivec4(imageLoad(src[0], pos)); |
| if (planar_rgb != 0) { |
| for (int i = 1; i < (3 + transparency); i++) |
| pix[i] = int(imageLoad(src[i], pos)[0]); |
| } |
| |
| return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], |
| pix[fmt_lut[2]], pix[fmt_lut[3]]); |
| } |
| |
| void transform_sample(inout ivec4 pix, ivec2 rct_coef) |
| { |
| pix.b -= pix.g; |
| pix.r -= pix.g; |
| pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; |
| pix.b += rct_offset; |
| pix.r += rct_offset; |
| } |
| |
| void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) |
| { |
| for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { |
| ivec2 lpos = sp + LADDR(ivec2(x, y)); |
| ivec2 pos = sc.slice_pos + ivec2(x, y); |
| |
| ivec4 pix = load_components(pos); |
| |
| if (expectEXT(apply_rct, true)) |
| transform_sample(pix, sc.slice_rct_coef); |
| |
| imageStore(tmp, lpos, pix); |
| } |
| } |
| #endif |
| |
| void encode_slice(inout SliceContext sc, const uint slice_idx) |
| { |
| ivec2 sp = sc.slice_pos; |
| |
| #ifndef RGB |
| int bits = bits_per_raw_sample; |
| #else |
| int bits = 9; |
| if (bits != 8 || sc.slice_coding_mode != 0) |
| bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); |
| |
| sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE; |
| #endif |
| |
| #ifndef GOLOMB |
| if (sc.slice_coding_mode == 1) { |
| #ifndef RGB |
| for (int c = 0; c < components; c++) { |
| |
| int h = sc.slice_dim.y; |
| if (c > 0 && c < 3) |
| h = ceil_rshift(h, chroma_shift.y); |
| |
| /* Takes into account dual-plane YUV formats */ |
| int p = min(c, planes - 1); |
| int comp = c - p; |
| |
| for (int y = 0; y < h; y++) |
| encode_line_pcm(sc, src[p], sp, y, p, comp, bits); |
| } |
| #else |
| for (int y = 0; y < sc.slice_dim.y; y++) { |
| preload_rgb(sc, sp, sc.slice_dim.x, y, false); |
| |
| encode_line_pcm(sc, tmp, sp, y, 0, 1, bits); |
| encode_line_pcm(sc, tmp, sp, y, 0, 2, bits); |
| encode_line_pcm(sc, tmp, sp, y, 0, 0, bits); |
| if (transparency == 1) |
| encode_line_pcm(sc, tmp, sp, y, 0, 3, bits); |
| } |
| #endif |
| } else |
| #endif |
| { |
| u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; |
| u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; |
| |
| #ifndef RGB |
| for (int c = 0; c < components; c++) { |
| int run_index = 0; |
| |
| int h = sc.slice_dim.y; |
| if (c > 0 && c < 3) |
| h = ceil_rshift(h, chroma_shift.y); |
| |
| int p = min(c, planes - 1); |
| int comp = c - p; |
| |
| for (int y = 0; y < h; y++) |
| encode_line(sc, src[p], slice_state_off[c], sp, y, p, |
| comp, bits, quant_table_idx[c], run_index); |
| } |
| #else |
| int run_index = 0; |
| for (int y = 0; y < sc.slice_dim.y; y++) { |
| preload_rgb(sc, sp, sc.slice_dim.x, y, true); |
| |
| encode_line(sc, tmp, slice_state_off[0], |
| sp, y, 0, 1, bits, quant_table_idx[0], run_index); |
| encode_line(sc, tmp, slice_state_off[1], |
| sp, y, 0, 2, bits, quant_table_idx[1], run_index); |
| encode_line(sc, tmp, slice_state_off[2], |
| sp, y, 0, 0, bits, quant_table_idx[2], run_index); |
| if (transparency == 1) |
| encode_line(sc, tmp, slice_state_off[3], |
| sp, y, 0, 3, bits, quant_table_idx[3], run_index); |
| } |
| #endif |
| } |
| } |
| |
| void finalize_slice(inout SliceContext sc, const uint slice_idx) |
| { |
| #ifdef CACHED_SYMBOL_READER |
| if (gl_LocalInvocationID.x > 0) |
| return; |
| #endif |
| |
| #ifdef GOLOMB |
| uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb); |
| #else |
| uint32_t enc_len = rac_terminate(sc.c); |
| #endif |
| |
| u8buf bs = u8buf(sc.c.bytestream_start); |
| |
| /* Append slice length */ |
| u8vec4 enc_len_p = unpack8(enc_len); |
| bs[enc_len + 0].v = enc_len_p.z; |
| bs[enc_len + 1].v = enc_len_p.y; |
| bs[enc_len + 2].v = enc_len_p.x; |
| enc_len += 3; |
| |
| /* Calculate and write CRC */ |
| if (ec != 0) { |
| bs[enc_len].v = uint8_t(0); |
| enc_len++; |
| |
| uint32_t crc = crcref; |
| for (int i = 0; i < enc_len; i++) |
| crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); |
| |
| if (crcref != 0x00000000) |
| crc ^= 0x8CD88196; |
| |
| u8vec4 crc_p = unpack8(crc); |
| bs[enc_len + 0].v = crc_p.x; |
| bs[enc_len + 1].v = crc_p.y; |
| bs[enc_len + 2].v = crc_p.z; |
| bs[enc_len + 3].v = crc_p.w; |
| enc_len += 4; |
| } |
| |
| slice_results[slice_idx*2 + 0] = enc_len; |
| slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data); |
| } |
| |
| void main(void) |
| { |
| const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; |
| encode_slice(slice_ctx[slice_idx], slice_idx); |
| finalize_slice(slice_ctx[slice_idx], slice_idx); |
| } |