blob: 50e91e23f9b88d0ab9d1f62c20d6e3f618476473 [file] [log] [blame]
// Copyright 2019 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// For implementation details, please refer to:
// https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_texture_compression_astc_hdr.txt
// Please refer to this document for operator precendence (slightly different from C):
// https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#operators
#version 450
precision highp int;
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(push_constant) uniform ImageFormatBlock {
uvec2 blockSize;
uint compFormat;
uint baseLayer;
uint sRGB;
uint smallBlock;
}
u_pushConstant;
layout(binding = 0, rgba32ui) readonly uniform uimage${type} u_image0;
layout(binding = 1, rgba8ui) writeonly uniform uimage${type} u_image1;
${UnquantTables}
// HDR CEM: 2, 3, 7, 11, 14, 15
const bool kHDRCEM[16] = {
false, false, true, true, false, false, false, true,
false, false, false, true, false, false, true, true,
};
// Encoding table for C.2.12
const uint kTritEncodings[256][5] = {
{0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
{0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
{0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
{0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0},
{0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0},
{0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
{0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0},
{0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2},
{0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
{0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0},
{0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0},
{0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
{0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0},
{0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0},
{0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
{0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2},
{0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0},
{0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
{0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0},
{0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0},
{0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
{0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0},
{0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0},
{0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
{0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2},
{0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2},
{0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
{0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2},
{0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2},
{0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
{0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2},
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2},
{0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
{0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1},
{0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1},
{0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
{0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1},
{0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1},
{0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
{0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2},
{0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1},
{0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
{0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1},
{0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1},
{0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
{0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1},
{0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1},
{0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
{0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1},
{0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1},
{0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
{0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1},
{0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1},
{0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
{0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1},
{0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2},
{0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
{0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2},
{0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2},
{0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
{0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2},
{0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2},
{0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
{0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2},
};
const uint kQuintEncodings[128][3] = {
{0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0},
{4, 4, 0}, {4, 4, 4}, {0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0},
{4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, {0, 2, 0}, {1, 2, 0},
{2, 2, 0}, {3, 2, 0}, {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4},
{0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0},
{4, 4, 3}, {4, 4, 4}, {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1},
{4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4}, {0, 1, 1}, {1, 1, 1},
{2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4},
{0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1},
{4, 2, 4}, {2, 4, 4}, {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
{4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, {0, 0, 2}, {1, 0, 2},
{2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4},
{0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2},
{2, 1, 4}, {3, 1, 4}, {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2},
{4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, {0, 3, 2}, {1, 3, 2},
{2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4},
{0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3}, {4, 0, 3}, {0, 4, 3},
{0, 0, 4}, {1, 0, 4}, {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3},
{4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4}, {0, 2, 3}, {1, 2, 3},
{2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
{0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3},
{0, 3, 4}, {1, 3, 4}};
const int kRQuantParamTableLength = 19;
// T, Q, B values in Table c.2.16, including binaries, in reversed order
const uint kRQuantParamTable[kRQuantParamTableLength][3] = {
{0, 0, 8}, // 255
{1, 0, 6}, // 191
{0, 1, 5}, // 159
{0, 0, 7}, // 127
{1, 0, 5}, // 95
{0, 1, 4}, // 79
{0, 0, 6}, // 63
{1, 0, 4}, // 47
{0, 1, 3}, // 39
{0, 0, 5}, // 31
{1, 0, 3}, // 23
{0, 1, 2}, // 19
{0, 0, 4}, // 15
{1, 0, 2}, // 11
{0, 1, 1}, // 9
{0, 0, 3}, // 7
{1, 0, 1}, // 5
//{0, 1, 0}, // 4
{0, 0, 2}, // 3
//{1, 0, 0}, // 2
{0, 0, 1}, // 1
};
uint bit(uint u, int bit) {
return (u >> bit) & 1;
}
uint bits128(uvec4 u, uint bitStart, uint bitCount) {
uint firstIdx = bitStart / 32;
uint firstOffset = bitStart % 32;
uint bitMask = (1 << bitCount) - 1;
if (firstIdx == ((bitStart + bitCount - 1) / 32)) {
return (u[3 - firstIdx] >> firstOffset) & bitMask;
} else {
uint firstCount = 32 - firstOffset;
uint ret = u[3 - firstIdx - 1] << firstCount;
ret |= ((u[3 - firstIdx] >> firstOffset) & ((1 << firstCount) - 1));
return ret & bitMask;
}
}
uint bits128fillZeros(uvec4 u, uint bitStart, uint bitEnd, uint bitCount) {
if (bitEnd <= bitStart) {
return 0;
}
return bits128(u, bitStart, min(bitEnd - bitStart, bitCount));
}
uint get_bit_count(uint num_vals, uint trits, uint quints, uint bits) {
// See section C.2.22 for the formula used here.
uint trit_bit_count = ((num_vals * 8 * trits) + 4) / 5;
uint quint_bit_count = ((num_vals * 7 * quints) + 2) / 3;
uint base_bit_count = num_vals * bits;
return trit_bit_count + quint_bit_count + base_bit_count;
}
void get_pack_size(uint trits,
uint quints,
uint bits,
out uint pack,
out uint packedSize) {
if (trits == 1) {
pack = 5;
packedSize = 8 + 5 * bits;
} else if (quints == 1) {
pack = 3;
packedSize = 7 + 3 * bits;
} else {
pack = 1;
packedSize = bits;
}
}
uint[5] decode_trit(uvec4 data, uint start, uint end, uint n) {
// We either have three quints or five trits
const int kNumVals = 5;
const int kInterleavedBits[5] = {2, 2, 1, 2, 1};
// Decode the block
uint m[kNumVals];
uint encoded = 0;
uint encoded_bits_read = 0;
for (int i = 0; i < kNumVals; ++i) {
m[i] = bits128fillZeros(data, start, end, n);
start += n;
uint encoded_bits =
bits128fillZeros(data, start, end, kInterleavedBits[i]);
start += kInterleavedBits[i];
encoded |= encoded_bits << encoded_bits_read;
encoded_bits_read += kInterleavedBits[i];
}
uint[kNumVals] result;
for (int i = 0; i < kNumVals; ++i) {
result[i] = kTritEncodings[encoded][i] << n | m[i];
}
return result;
}
uint[3] decode_quint(uvec4 data, uint start, uint end, uint n) {
// We either have three quints or five trits
const int kNumVals = 3;
const int kInterleavedBits[3] = {3, 2, 2};
// Decode the block
uint m[kNumVals];
uint encoded = 0;
uint encoded_bits_read = 0;
uint bitMask = (1 << n) - 1;
for (int i = 0; i < kNumVals; ++i) {
m[i] = bits128fillZeros(data, start, end, n);
start += n;
uint encoded_bits =
bits128fillZeros(data, start, end, kInterleavedBits[i]);
start += kInterleavedBits[i];
encoded |= encoded_bits << encoded_bits_read;
encoded_bits_read += kInterleavedBits[i];
}
uint[kNumVals] result;
for (int i = 0; i < kNumVals; ++i) {
result[i] = kQuintEncodings[encoded][i] << n | m[i];
}
return result;
}
uint get_v_count(uint cem) {
return (cem / 4 + 1) * 2;
}
const uint kLDRLumaDirect = 0;
const uint kLDRLumaBaseOffset = 1;
const uint kHDRLumaLargeRange = 2;
const uint kHDRLumaSmallRange = 3;
const uint kLDRLumaAlphaDirect = 4;
const uint kLDRLumaAlphaBaseOffset = 5;
const uint kLDRRGBBaseScale = 6;
const uint kHDRRGBBaseScale = 7;
const uint kLDRRGBDirect = 8;
const uint kLDRRGBBaseOffset = 9;
const uint kLDRRGBBaseScaleTwoA = 10;
const uint kHDRRGBDirect = 11;
const uint kLDRRGBADirect = 12;
const uint kLDRRGBABaseOffset = 13;
const uint kHDRRGBDirectLDRAlpha = 14;
const uint kHDRRGBDirectHDRAlpha = 15;
void swap(inout ivec4 v1, inout ivec4 v2) {
ivec4 tmp = v1;
v1 = v2;
v2 = tmp;
}
void bit_transfer_signed(inout int a, inout int b) {
b >>= 1;
b |= (a & 0x80);
a >>= 1;
a &= 0x3F;
if ((a & 0x20) != 0)
a -= 0x40;
}
void blue_contract(inout ivec4 val) {
val.r = (val.r + val.b) / 2;
val.g = (val.g + val.b) / 2;
}
void decode_ldr_for_mode(const uint[40] vals,
uint start_idx,
uint mode,
out uvec4 c1,
out uvec4 c2) {
int v0 = int(vals[start_idx + 0]);
int v1 = int(vals[start_idx + 1]);
int v2 = int(vals[start_idx + 2]);
int v3 = int(vals[start_idx + 3]);
int v4 = int(vals[start_idx + 4]);
int v5 = int(vals[start_idx + 5]);
int v6 = int(vals[start_idx + 6]);
int v7 = int(vals[start_idx + 7]);
ivec4 endpoint_low_rgba;
ivec4 endpoint_high_rgba;
switch (mode) {
case kLDRLumaDirect: {
endpoint_low_rgba = ivec4(v0, v0, v0, 255);
endpoint_high_rgba = ivec4(v1, v1, v1, 255);
} break;
case kLDRLumaBaseOffset: {
const int l0 = (v0 >> 2) | (v1 & 0xC0);
const int l1 = min(l0 + (v1 & 0x3F), 0xFF);
endpoint_low_rgba = ivec4(l0, l0, l0, 255);
endpoint_high_rgba = ivec4(l1, l1, l1, 255);
} break;
case kLDRLumaAlphaDirect: {
endpoint_low_rgba = ivec4(v0, v0, v0, v2);
endpoint_high_rgba = ivec4(v1, v1, v1, v3);
} break;
case kLDRLumaAlphaBaseOffset: {
bit_transfer_signed(v1, v0);
bit_transfer_signed(v3, v2);
endpoint_low_rgba = clamp(ivec4(v0, v0, v0, v2), 0, 255);
const int high_luma = v0 + v1;
endpoint_high_rgba = clamp(
ivec4(high_luma, high_luma, high_luma, v2 + v3), 0, 255);
} break;
case kLDRRGBBaseScale: {
endpoint_high_rgba = ivec4(v0, v1, v2, 255);
for (int i = 0; i < 3; ++i) {
const int x = endpoint_high_rgba[i];
endpoint_low_rgba[i] = (x * v3) >> 8;
}
endpoint_low_rgba[3] = 255;
} break;
case kLDRRGBDirect: {
const int s0 = v0 + v2 + v4;
const int s1 = v1 + v3 + v5;
endpoint_low_rgba = ivec4(v0, v2, v4, 255);
endpoint_high_rgba = ivec4(v1, v3, v5, 255);
if (s1 < s0) {
swap(endpoint_low_rgba, endpoint_high_rgba);
blue_contract(endpoint_low_rgba);
blue_contract(endpoint_high_rgba);
}
} break;
case kLDRRGBBaseOffset: {
bit_transfer_signed(v1, v0);
bit_transfer_signed(v3, v2);
bit_transfer_signed(v5, v4);
endpoint_low_rgba = ivec4(v0, v2, v4, 255);
endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, 255);
if (v1 + v3 + v5 < 0) {
swap(endpoint_low_rgba, endpoint_high_rgba);
blue_contract(endpoint_low_rgba);
blue_contract(endpoint_high_rgba);
}
endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255);
endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255);
} break;
case kLDRRGBBaseScaleTwoA: {
// Base
endpoint_low_rgba = endpoint_high_rgba = ivec4(v0, v1, v2, 255);
// Scale
endpoint_low_rgba = (endpoint_low_rgba * v3) >> 8;
// Two A
endpoint_low_rgba[3] = v4;
endpoint_high_rgba[3] = v5;
} break;
case kLDRRGBADirect: {
const uint s0 = v0 + v2 + v4;
const uint s1 = v1 + v3 + v5;
endpoint_low_rgba = ivec4(v0, v2, v4, v6);
endpoint_high_rgba = ivec4(v1, v3, v5, v7);
if (s1 < s0) {
swap(endpoint_low_rgba, endpoint_high_rgba);
blue_contract(endpoint_low_rgba);
blue_contract(endpoint_high_rgba);
}
} break;
case kLDRRGBABaseOffset: {
bit_transfer_signed(v1, v0);
bit_transfer_signed(v3, v2);
bit_transfer_signed(v5, v4);
bit_transfer_signed(v7, v6);
endpoint_low_rgba = ivec4(v0, v2, v4, v6);
endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7);
if (v1 + v3 + v5 < 0) {
swap(endpoint_low_rgba, endpoint_high_rgba);
blue_contract(endpoint_low_rgba);
blue_contract(endpoint_high_rgba);
}
endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255);
endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255);
} break;
default:
// Unimplemented color encoding.
// TODO(google): Is this the correct error handling?
endpoint_high_rgba = endpoint_low_rgba = ivec4(0, 0, 0, 0);
}
c1 = uvec4(endpoint_low_rgba);
c2 = uvec4(endpoint_high_rgba);
}
uint hash52(uint p) {
p ^= p >> 15;
p -= p << 17;
p += p << 7;
p += p << 4;
p ^= p >> 5;
p += p << 16;
p ^= p >> 7;
p ^= p >> 3;
p ^= p << 6;
p ^= p >> 17;
return p;
}
uint select_partition(uint seed, uint x, uint y, uint partitioncount) {
if (partitioncount == 1) {
return 0;
}
uint z = 0;
if (u_pushConstant.smallBlock != 0) {
x <<= 1;
y <<= 1;
}
seed += (partitioncount - 1) * 1024;
uint rnum = hash52(seed);
uint seed1 = rnum & 0xF;
uint seed2 = (rnum >> 4) & 0xF;
uint seed3 = (rnum >> 8) & 0xF;
uint seed4 = (rnum >> 12) & 0xF;
uint seed5 = (rnum >> 16) & 0xF;
uint seed6 = (rnum >> 20) & 0xF;
uint seed7 = (rnum >> 24) & 0xF;
uint seed8 = (rnum >> 28) & 0xF;
uint seed9 = (rnum >> 18) & 0xF;
uint seed10 = (rnum >> 22) & 0xF;
uint seed11 = (rnum >> 26) & 0xF;
uint seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
seed1 *= seed1;
seed2 *= seed2;
seed3 *= seed3;
seed4 *= seed4;
seed5 *= seed5;
seed6 *= seed6;
seed7 *= seed7;
seed8 *= seed8;
seed9 *= seed9;
seed10 *= seed10;
seed11 *= seed11;
seed12 *= seed12;
uint sh1, sh2, sh3;
if ((seed & 1) != 0) {
sh1 = ((seed & 2) != 0 ? 4 : 5);
sh2 = (partitioncount == 3 ? 6 : 5);
} else {
sh1 = (partitioncount == 3 ? 6 : 5);
sh2 = ((seed & 2) != 0 ? 4 : 5);
}
sh3 = ((seed & 0x10) != 0) ? sh1 : sh2;
seed1 >>= sh1;
seed2 >>= sh2;
seed3 >>= sh1;
seed4 >>= sh2;
seed5 >>= sh1;
seed6 >>= sh2;
seed7 >>= sh1;
seed8 >>= sh2;
seed9 >>= sh3;
seed10 >>= sh3;
seed11 >>= sh3;
seed12 >>= sh3;
uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
a &= 0x3F;
b &= 0x3F;
c &= 0x3F;
d &= 0x3F;
if (partitioncount < 4)
d = 0;
if (partitioncount < 3)
c = 0;
if (a >= b && a >= c && a >= d)
return 0;
else if (b >= c && b >= d)
return 1;
else if (c >= d)
return 2;
else
return 3;
}
uvec4[144] single_color_block(uvec4 color) {
uvec4 ret[144];
for (int h = 0; h < u_pushConstant.blockSize.y; h++) {
for (int w = 0; w < u_pushConstant.blockSize.x; w++) {
ret[h * u_pushConstant.blockSize.x + w] = color;
}
}
return ret;
}
uvec4[144] error_color_block() {
return single_color_block(uvec4(0xff, 0, 0xff, 0xff));
}
uvec4[144] astc_decode_block(const uvec4 u) {
uint d;
uint hdr;
uint b;
uint a;
uint r;
uint width;
uint height;
uvec4 cem;
uint weightGrid[120];
const uint u3 = u[3];
const uint b87 = u3 >> 7 & 3;
const uint b65 = u3 >> 5 & 3;
const uint b32 = u3 >> 2 & 3;
a = b65;
b = b87;
d = bit(u3, 10);
hdr = bit(u3, 9);
if ((u3 & 3) == 0) {
r = b32 << 1 | bit(u3, 4);
if (b87 == 0) {
width = 12;
height = a + 2;
} else if (b87 == 1) {
width = a + 2;
height = 12;
} else if (b87 == 3) {
if (b65 == 0) {
width = 6;
height = 10;
} else if (b65 == 1) {
width = 10;
height = 6;
} else if ((u3 & 0xDFF) == 0xDFC) {
// Void-extent
// In void extend, the last 12 bits should be
// 1 1 D 1 1 1 1 1 1 1 0 0
// Where D is the HDR bit
uvec4 color = uvec4(u[1] >> 8 & 0xff, u[1] >> 24 & 0xff,
u[0] >> 8 & 0xff, u[0] >> 24 & 0xff);
return single_color_block(color);
} else { // reserved
return error_color_block();
}
} else { // b87 == 2
b = u3 >> 9 & 3;
width = a + 6;
height = b + 6;
d = 0;
hdr = 0;
}
} else {
r = (u3 & 3) << 1 | bit(u3, 4);
if (b32 == 0) {
width = b + 4;
height = a + 2;
} else if (b32 == 1) {
width = b + 8;
height = a + 2;
} else if (b32 == 2) {
width = a + 2;
height = b + 8;
} else if (bit(u3, 8) == 0) {
width = a + 2;
height = (b & 1) + 6;
} else {
width = (b & 1) + 2;
height = a + 2;
}
}
if (width > u_pushConstant.blockSize.x ||
height > u_pushConstant.blockSize.y) {
return error_color_block();
}
// Decode weight
uint trits = 0;
uint quints = 0;
uint bits = 0;
const uint weightCounts = height * width * (d + 1);
const int kMaxNumWeights = 64;
if (kMaxNumWeights < weightCounts) {
return error_color_block();
}
{
if (hdr == 0) {
switch (r) {
case 2:
bits = 1;
break;
case 3:
trits = 1;
break;
case 4:
bits = 2;
break;
case 5:
quints = 1;
break;
case 6:
trits = 1;
bits = 1;
break;
case 7:
bits = 3;
break;
default:
return error_color_block();
}
} else {
switch (r) {
case 2:
bits = 1;
quints = 1;
break;
case 3:
trits = 1;
bits = 2;
break;
case 4:
bits = 4;
break;
case 5:
quints = 1;
bits = 2;
break;
case 6:
trits = 1;
bits = 3;
break;
case 7:
bits = 5;
break;
default:
return error_color_block();
}
}
uint packedSize = 0;
uint pack = 0;
get_pack_size(trits, quints, bits, pack, packedSize);
uint srcIdx = 0;
uint dstIdx = 0;
uvec4 uReversed = bitfieldReverse(u);
const uint weightBitCount =
get_bit_count(weightCounts, trits, quints, bits);
const int kWeightGridMinBitLength = 24;
const int kWeightGridMaxBitLength = 96;
if (weightBitCount < kWeightGridMinBitLength ||
weightBitCount > kWeightGridMaxBitLength) {
return error_color_block();
}
uReversed =
uvec4(uReversed[3], uReversed[2], uReversed[1], uReversed[0]);
const uint kUnquantBinMulTable[] = {0x3f, 0x15, 0x9, 0x4, 0x2, 0x1};
const uint kUnquantBinMovTable[] = {0x8, 0x8, 0x8, 0x2, 0x4, 0x8};
while (dstIdx < weightCounts) {
if (trits == 1) {
uint decoded[5] =
decode_trit(uReversed, srcIdx, weightBitCount, bits);
// uint decoded[5] = {0, 0, 0, 0, 0};
for (int i = 0; i < 5; i++) {
weightGrid[dstIdx] = kUnquantTritWeightMap
[kUnquantTritWeightMapBitIdx[bits] + decoded[i]];
if (weightGrid[dstIdx] > 32) {
weightGrid[dstIdx] += 1;
}
dstIdx++;
if (dstIdx >= weightCounts) {
break;
}
}
} else if (quints == 1) {
uint decoded[3] =
decode_quint(uReversed, srcIdx, weightBitCount, bits);
for (int i = 0; i < 3; i++) {
// TODO: handle overflow in the last
weightGrid[dstIdx] = kUnquantQuintWeightMap
[kUnquantQuintWeightMapBitIdx[bits] + decoded[i]];
if (weightGrid[dstIdx] > 32) {
weightGrid[dstIdx] += 1;
}
dstIdx++;
if (dstIdx >= weightCounts) {
break;
}
}
} else {
uint decodedRaw = bits128(uReversed, srcIdx, packedSize);
uint decoded = decodedRaw * kUnquantBinMulTable[bits - 1] |
decodedRaw >> kUnquantBinMovTable[bits - 1];
weightGrid[dstIdx] = decoded;
if (weightGrid[dstIdx] > 32) {
weightGrid[dstIdx] += 1;
}
dstIdx++;
}
srcIdx += packedSize;
}
}
uint partitionCount = (u3 >> 11 & 3) + 1;
if (d == 1 && partitionCount == 4) {
return error_color_block();
}
const uint weightStart =
128 - get_bit_count(weightCounts, trits, quints, bits);
uint dualPlaneStart = 0;
// Decode cem mode
if (partitionCount == 1) {
// Single-partition mode
cem[0] = u3 >> 13 & 0xf;
dualPlaneStart = weightStart - d * 2;
} else {
// Multi-partition mode
// Calculate CEM for all 4 partitions, even when partitionCount < 4
uint partMode = u3 >> 23 & 3;
const uint kExtraMBitsTable[4] = {0, 2, 5, 8};
const uint extraMBitCount =
(partMode == 0) ? 0 : kExtraMBitsTable[partitionCount - 1];
const uint extraMStart = weightStart - extraMBitCount;
dualPlaneStart = extraMStart - d * 2;
if (partMode == 0) {
uint cem_all = u3 >> 25 & 0xf;
cem = uvec4(cem_all, cem_all, cem_all, cem_all);
} else {
uint cemBase = partMode - 1;
uvec4 cemHigh = cemBase + uvec4(bit(u3, 25), bit(u3, 26),
bit(u3, 27), bit(u3, 28));
const uint extraM = bits128(u, extraMStart, extraMBitCount);
const uint kMainMBitsTable[4] = {0, 2, 1, 0};
const uint mainMBitCount = kMainMBitsTable[partitionCount - 1];
const uint m = extraM << mainMBitCount |
((u3 >> 27 & 3) >> (2 - mainMBitCount));
cem = cemHigh << 2 |
uvec4(m & 3, m >> 2 & 3, m >> 4 & 3, m >> 6 & 3);
}
}
// Decode end points
uvec4 endPoints[4][2];
{
uint totalV = 0;
for (uint part = 0; part < partitionCount; part++) {
totalV += get_v_count(cem[part]);
}
const uint epStart = (partitionCount == 1) ? 17 : 29;
const uint totalAvailBits = dualPlaneStart - epStart;
if (totalAvailBits >= 128) {
// overflowed
return error_color_block();
}
uint epQuints = 0;
uint epTrits = 0;
uint epBits = 0;
uint i;
for (i = 0; i < kRQuantParamTableLength; i++) {
epTrits = kRQuantParamTable[i][0];
epQuints = kRQuantParamTable[i][1];
epBits = kRQuantParamTable[i][2];
if (get_bit_count(totalV, epTrits, epQuints, epBits) <=
totalAvailBits) {
break;
}
}
if (i >= kRQuantParamTableLength) {
return error_color_block();
}
const uint epBitCount =
get_bit_count(totalV, epTrits, epQuints, epBits);
const uint epEnd = epStart + epBitCount;
uint packedSize = 0;
uint pack = 0;
get_pack_size(epTrits, epQuints, epBits, pack, packedSize);
// Decode end point parameters into buffer
uint vBuffer[40];
uint srcIdx = epStart;
uint dstIdx = 0;
const uint kUnquantBinMulTable[8] = {0xff, 0x55, 0x24, 0x11,
0x8, 0x4, 0x2, 0x1};
const uint kUnquantBinMovTable[8] = {8, 8, 1, 8, 2, 4, 6, 8};
while (dstIdx < totalV) {
if (epTrits == 1) {
uint decoded[5] = decode_trit(u, srcIdx, epEnd, epBits);
for (int i = 0; i < 5; i++) {
vBuffer[dstIdx] = kUnquantTritColorMap
[kUnquantTritColorMapBitIdx[epBits] + decoded[i]];
dstIdx++;
if (dstIdx >= totalV) {
break;
}
}
} else if (epQuints == 1) {
uint decoded[3] = decode_quint(u, srcIdx, epEnd, epBits);
for (int i = 0; i < 3; i++) {
vBuffer[dstIdx] = kUnquantQuintColorMap
[kUnquantQuintColorMapBitIdx[epBits] + decoded[i]];
dstIdx++;
if (dstIdx >= totalV) {
break;
}
}
} else {
uint src = bits128(u, srcIdx, packedSize);
uint decoded = src * kUnquantBinMulTable[epBits - 1] |
src >> kUnquantBinMovTable[epBits - 1];
vBuffer[dstIdx] = decoded;
dstIdx++;
}
srcIdx += packedSize;
}
uint bufferIdx = 0;
for (uint part = 0; part < partitionCount; part++) {
// TODO: HDR support
decode_ldr_for_mode(vBuffer, bufferIdx, cem[part],
endPoints[part][0], endPoints[part][1]);
bufferIdx += get_v_count(cem[part]);
}
}
uvec4 ret[144];
{
uvec2 dst = (1024 + u_pushConstant.blockSize / 2) /
(u_pushConstant.blockSize - 1);
uint dd = d + 1;
for (uint h = 0; h < u_pushConstant.blockSize.y; h++) {
for (uint w = 0; w < u_pushConstant.blockSize.x; w++) {
uint part =
select_partition(u3 >> 13 & 1023, w, h, partitionCount);
if (kHDRCEM[cem[part]]) {
// HDR not supported
ret[h * u_pushConstant.blockSize.x + w] = uvec4(0xff, 0,
0xff, 0xff);
continue;
}
// Calculate weight
uvec2 st = uvec2(w, h);
uvec2 cst = dst * st;
uvec2 gst = (cst * (uvec2(width, height) - 1) + 32) >> 6;
uvec2 jst = gst >> 4;
uvec2 fst = gst & 0xf;
uint v0 = jst.x + jst.y * width;
uvec2 p00 = uvec2(weightGrid[v0 * dd], weightGrid[v0 * dd + 1]);
uvec2 p01 = uvec2(weightGrid[(v0 + 1) * dd],
weightGrid[(v0 + 1) * dd + 1]);
uvec2 p10 = uvec2(weightGrid[(v0 + width) * dd],
weightGrid[(v0 + width) * dd + 1]);
uvec2 p11 = uvec2(weightGrid[(v0 + width + 1) * dd],
weightGrid[(v0 + width + 1) * dd + 1]);
uint w11 = (fst.x * fst.y + 8) >> 4;
uint w10 = fst.y - w11;
uint w01 = fst.x - w11;
uint w00 = 16 - fst.x - fst.y + w11;
uvec2 i = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >>
4;
uvec4 c0 = endPoints[part][0];
uvec4 c1 = endPoints[part][1];
uvec4 c = (c0 * (64 - i[0]) + c1 * i[0] + 32) / 64;
if (d == 1) {
uint ccs = bits128(u, dualPlaneStart, 2);
c[ccs] = (c0[ccs] * (64 - i[1]) + c1[ccs] * i[1] + 32) / 64;
}
ret[h * u_pushConstant.blockSize.x + w] = c;
}
}
}
return ret;
}
ivec2 getPos1DArray(ivec3 pos) {
return ivec2(pos.x, pos.z);
}
ivec3 getPos2DArray(ivec3 pos) {
return pos;
}
ivec3 getPos3D(ivec3 pos) {
return pos;
}
uint block_y_size_1DArray() {
return 1;
}
uint block_y_size_2DArray() {
return u_pushConstant.blockSize.y;
}
uint block_y_size_3D() {
return u_pushConstant.blockSize.y;
}
uvec4 flip32(uvec4 a) {
return ((a & 0xff) << 24) | ((a & 0xff00) << 8) | ((a & 0xff0000) >> 8) |
((a & 0xff000000) >> 24);
}
void main(void) {
ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
pos.z += int(u_pushConstant.baseLayer);
uvec4 srcBlock = uvec4(imageLoad(u_image0, getPos${type}(pos)));
srcBlock = uvec4(srcBlock[3], srcBlock[2], srcBlock[1], srcBlock[0]);
uvec4[144] decompressed = astc_decode_block(srcBlock);
for (uint y = 0; y < block_y_size_${type}(); y++) {
for (uint x = 0; x < u_pushConstant.blockSize.x; x++) {
imageStore(u_image1,
getPos${type}(ivec3(
pos.xy * u_pushConstant.blockSize + ivec2(x, y),
pos.z)),
decompressed[y * u_pushConstant.blockSize.x + x]);
}
}
}