libavcodec/vulkan/dpx_unpack.comp.glsl - third_party/ffmpeg - Git at Google

 /*
  * Copyright (c) 2025 Lynne <dev@lynne.ee>
  *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
  * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */

 #version 460
 #pragma shader_stage(compute)
 #extension GL_GOOGLE_include_directive : require

 #include "common.comp"

 layout (constant_id = 0) const bool big_endian = false;
 layout (constant_id = 1) const bool packed_10bit = false;

 layout (set = 0, binding = 0) uniform writeonly uimage2D dst[];
 layout (set = 0, binding = 1, scalar) readonly buffer data_buf {
     uint32_t data[];
 };

 layout (push_constant, scalar) uniform pushConstants {
     int bits_per_comp;
     int nb_comp;
     int nb_images;
     int stride;
     int need_align;
     int padded_10bit;
     int shift;
 };

 uint32_t read_data(uint off)
 {
     if (big_endian)
         return reverse4(data[off]);
     return data[off];
 }

 i16vec4 parse_packed10_in_32(ivec2 pos, int stride)
 {
     uint32_t d = read_data(pos.y*stride + pos.x);
     i16vec4 v;
     d = d << 10 | d >> 22 & 0x3FFFFF;
     v[0] = int16_t(d & 0x3FF);
     d = d << 10 | d >> 22 & 0x3FFFFF;
     v[1] = int16_t(d & 0x3FF);
     d = d << 10 | d >> 22 & 0x3FFFFF;
     v[2] = int16_t(d & 0x3FF);
     v[3] = int16_t(0);
     return v;
 }

 i16vec4 parse_packed_in_32(ivec2 pos, int stride)
 {
     uint line_size = stride*bits_per_comp*nb_comp;
     line_size += line_size & 31;
     line_size += need_align << 3;

     uint line_off = pos.y*line_size;
     uint pix_off = pos.x*bits_per_comp*nb_comp;

     uint off = (line_off + pix_off) >> 5;
     uint bit = pix_off & 0x1f;

     uint32_t d0 = read_data(off + 0);
     uint32_t d1 = read_data(off + 1);

     uint64_t combined = (uint64_t(d1) << 32) | d0;
     combined >>= bit;

     return i16vec4(combined,
                    combined >> (bits_per_comp*1),
                    combined >> (bits_per_comp*2),
                    combined >> (bits_per_comp*3)) &
            int16_t((1 << bits_per_comp) - 1);
 }

 void main(void)
 {
     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
     if (!IS_WITHIN(pos, imageSize(dst[0])))
         return;

     i16vec4 p;
     if (packed_10bit)
         p = parse_packed10_in_32(pos, imageSize(dst[0]).x);
     else
         p = parse_packed_in_32(pos, imageSize(dst[0]).x);

     if (nb_images == 1) {
         imageStore(dst[0], pos, p);
     } else {
         const ivec4 fmt_lut = ivec4(2, 0, 1, 3);
         for (uint i = 0; i < nb_comp; i++)
             imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
     }
 }
	/*
	* Copyright (c) 2025 Lynne <dev@lynne.ee>
	*
	* This file is part of FFmpeg.
	*
	* FFmpeg is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2.1 of the License, or (at your option) any later version.
	*
	* FFmpeg is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with FFmpeg; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#version 460
	#pragma shader_stage(compute)
	#extension GL_GOOGLE_include_directive : require

	#include "common.comp"

	layout (constant_id = 0) const bool big_endian = false;
	layout (constant_id = 1) const bool packed_10bit = false;

	layout (set = 0, binding = 0) uniform writeonly uimage2D dst[];
	layout (set = 0, binding = 1, scalar) readonly buffer data_buf {
	uint32_t data[];
	};

	layout (push_constant, scalar) uniform pushConstants {
	int bits_per_comp;
	int nb_comp;
	int nb_images;
	int stride;
	int need_align;
	int padded_10bit;
	int shift;
	};

	uint32_t read_data(uint off)
	{
	if (big_endian)
	return reverse4(data[off]);
	return data[off];
	}

	i16vec4 parse_packed10_in_32(ivec2 pos, int stride)
	{
	uint32_t d = read_data(pos.y*stride + pos.x);
	i16vec4 v;
	d = d << 10 \| d >> 22 & 0x3FFFFF;
	v[0] = int16_t(d & 0x3FF);
	d = d << 10 \| d >> 22 & 0x3FFFFF;
	v[1] = int16_t(d & 0x3FF);
	d = d << 10 \| d >> 22 & 0x3FFFFF;
	v[2] = int16_t(d & 0x3FF);
	v[3] = int16_t(0);
	return v;
	}

	i16vec4 parse_packed_in_32(ivec2 pos, int stride)
	{
	uint line_size = stridebits_per_compnb_comp;
	line_size += line_size & 31;
	line_size += need_align << 3;

	uint line_off = pos.y*line_size;
	uint pix_off = pos.xbits_per_compnb_comp;

	uint off = (line_off + pix_off) >> 5;
	uint bit = pix_off & 0x1f;

	uint32_t d0 = read_data(off + 0);
	uint32_t d1 = read_data(off + 1);

	uint64_t combined = (uint64_t(d1) << 32) \| d0;
	combined >>= bit;

	return i16vec4(combined,
	combined >> (bits_per_comp*1),
	combined >> (bits_per_comp*2),
	combined >> (bits_per_comp*3)) &
	int16_t((1 << bits_per_comp) - 1);
	}

	void main(void)
	{
	ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
	if (!IS_WITHIN(pos, imageSize(dst[0])))
	return;

	i16vec4 p;
	if (packed_10bit)
	p = parse_packed10_in_32(pos, imageSize(dst[0]).x);
	else
	p = parse_packed_in_32(pos, imageSize(dst[0]).x);

	if (nb_images == 1) {
	imageStore(dst[0], pos, p);
	} else {
	const ivec4 fmt_lut = ivec4(2, 0, 1, 3);
	for (uint i = 0; i < nb_comp; i++)
	imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
	}
	}