| #version 450 core | |
| #extension GL_AMD_gpu_shader_half_float: enable | |
| #extension GL_ARB_gpu_shader_int64: enable | |
| void main() | |
| { | |
| } | |
| // Half float literals | |
| void literal() | |
| { | |
| const float16_t f16c = 0.000001hf; | |
| const f16vec2 f16cv = f16vec2(-0.25HF, 0.03HF); | |
| f16vec2 f16v; | |
| f16v.x = f16c; | |
| f16v += f16cv; | |
| } | |
| // Block memory layout | |
| struct S | |
| { | |
| float16_t x; // rule 1: align = 2, takes offsets 0-1 | |
| f16vec2 y; // rule 2: align = 4, takes offsets 4-7 | |
| f16vec3 z; // rule 3: align = 8, takes offsets 8-13 | |
| }; | |
| layout(column_major, std140) uniform B1 | |
| { | |
| float16_t a; // rule 1: align = 2, takes offsets 0-1 | |
| f16vec2 b; // rule 2: align = 4, takes offsets 4-7 | |
| f16vec3 c; // rule 3: align = 8, takes offsets 8-15 | |
| float16_t d[2]; // rule 4: align = 16, array stride = 16, | |
| // takes offsets 16-47 | |
| f16mat2x3 e; // rule 5: align = 16, matrix stride = 16, | |
| // takes offsets 48-79 | |
| f16mat2x3 f[2]; // rule 6: align = 16, matrix stride = 16, | |
| // array stride = 32, f[0] takes | |
| // offsets 80-111, f[1] takes offsets | |
| // 112-143 | |
| S g; // rule 9: align = 16, g.x takes offsets | |
| // 144-145, g.y takes offsets 148-151, | |
| // g.z takes offsets 152-159 | |
| S h[2]; // rule 10: align = 16, array stride = 16, h[0] | |
| // takes offsets 160-175, h[1] takes | |
| // offsets 176-191 | |
| }; | |
| layout(row_major, std430) buffer B2 | |
| { | |
| float16_t o; // rule 1: align = 2, takes offsets 0-1 | |
| f16vec2 p; // rule 2: align = 4, takes offsets 4-7 | |
| f16vec3 q; // rule 3: align = 8, takes offsets 8-13 | |
| float16_t r[2]; // rule 4: align = 2, array stride = 2, takes | |
| // offsets 14-17 | |
| f16mat2x3 s; // rule 7: align = 4, matrix stride = 4, takes | |
| // offsets 20-31 | |
| f16mat2x3 t[2]; // rule 8: align = 4, matrix stride = 4, array | |
| // stride = 12, t[0] takes offsets | |
| // 32-43, t[1] takes offsets 44-55 | |
| S u; // rule 9: align = 8, u.x takes offsets | |
| // 56-57, u.y takes offsets 60-63, u.z | |
| // takes offsets 64-69 | |
| S v[2]; // rule 10: align = 8, array stride = 16, v[0] | |
| // takes offsets 72-87, v[1] takes | |
| // offsets 88-103 | |
| }; | |
| // Specialization constant | |
| layout(constant_id = 100) const float16_t sf16 = 0.125hf; | |
| layout(constant_id = 101) const float sf = 0.25; | |
| layout(constant_id = 102) const double sd = 0.5lf; | |
| const float f16_to_f = float(sf16); | |
| const double f16_to_d = float(sf16); | |
| const float16_t f_to_f16 = float16_t(sf); | |
| const float16_t d_to_f16 = float16_t(sd); | |
| void operators() | |
| { | |
| float16_t f16; | |
| f16vec2 f16v; | |
| f16mat2x2 f16m; | |
| bool b; | |
| // Arithmetic | |
| f16v += f16v; | |
| f16v -= f16v; | |
| f16v *= f16v; | |
| f16v /= f16v; | |
| f16v++; | |
| f16v--; | |
| ++f16m; | |
| --f16m; | |
| f16v = -f16v; | |
| f16m = -f16m; | |
| f16 = f16v.x + f16v.y; | |
| f16 = f16v.x - f16v.y; | |
| f16 = f16v.x * f16v.y; | |
| f16 = f16v.x / f16v.y; | |
| // Relational | |
| b = (f16v.x != f16); | |
| b = (f16v.y == f16); | |
| b = (f16v.x > f16); | |
| b = (f16v.y < f16); | |
| b = (f16v.x >= f16); | |
| b = (f16v.y <= f16); | |
| // Vector/matrix operations | |
| f16v = f16v * f16; | |
| f16m = f16m * f16; | |
| f16v = f16m * f16v; | |
| f16v = f16v * f16m; | |
| f16m = f16m * f16m; | |
| } | |
| void typeCast() | |
| { | |
| bvec3 bv; | |
| vec3 fv; | |
| dvec3 dv; | |
| ivec3 iv; | |
| uvec3 uv; | |
| i64vec3 i64v; | |
| u64vec3 u64v; | |
| f16vec3 f16v; | |
| f16v = f16vec3(bv); // bool -> float16 | |
| bv = bvec3(f16v); // float16 -> bool | |
| f16v = f16vec3(fv); // float -> float16 | |
| fv = vec3(f16v); // float16 -> float | |
| f16v = f16vec3(dv); // double -> float16 | |
| dv = dvec3(dv); // float16 -> double | |
| f16v = f16vec3(iv); // int -> float16 | |
| iv = ivec3(f16v); // float16 -> int | |
| f16v = f16vec3(uv); // uint -> float16 | |
| uv = uvec3(f16v); // float16 -> uint | |
| f16v = f16vec3(i64v); // int64 -> float16 | |
| i64v = i64vec3(f16v); // float16 -> int64 | |
| f16v = f16vec3(u64v); // uint64 -> float16 | |
| u64v = u64vec3(f16v); // float16 -> uint64 | |
| } | |
| void builtinAngleTrigFuncs() | |
| { | |
| f16vec4 f16v1, f16v2; | |
| f16v2 = radians(f16v1); | |
| f16v2 = degrees(f16v1); | |
| f16v2 = sin(f16v1); | |
| f16v2 = cos(f16v1); | |
| f16v2 = tan(f16v1); | |
| f16v2 = asin(f16v1); | |
| f16v2 = acos(f16v1); | |
| f16v2 = atan(f16v1, f16v2); | |
| f16v2 = atan(f16v1); | |
| f16v2 = sinh(f16v1); | |
| f16v2 = cosh(f16v1); | |
| f16v2 = tanh(f16v1); | |
| f16v2 = asinh(f16v1); | |
| f16v2 = acosh(f16v1); | |
| f16v2 = atanh(f16v1); | |
| } | |
| void builtinExpFuncs() | |
| { | |
| f16vec2 f16v1, f16v2; | |
| f16v2 = pow(f16v1, f16v2); | |
| f16v2 = exp(f16v1); | |
| f16v2 = log(f16v1); | |
| f16v2 = exp2(f16v1); | |
| f16v2 = log2(f16v1); | |
| f16v2 = sqrt(f16v1); | |
| f16v2 = inversesqrt(f16v1); | |
| } | |
| void builtinCommonFuncs() | |
| { | |
| f16vec3 f16v1, f16v2, f16v3; | |
| float16_t f16; | |
| bool b; | |
| bvec3 bv; | |
| ivec3 iv; | |
| f16v2 = abs(f16v1); | |
| f16v2 = sign(f16v1); | |
| f16v2 = floor(f16v1); | |
| f16v2 = trunc(f16v1); | |
| f16v2 = round(f16v1); | |
| f16v2 = roundEven(f16v1); | |
| f16v2 = ceil(f16v1); | |
| f16v2 = fract(f16v1); | |
| f16v2 = mod(f16v1, f16v2); | |
| f16v2 = mod(f16v1, f16); | |
| f16v3 = modf(f16v1, f16v2); | |
| f16v3 = min(f16v1, f16v2); | |
| f16v3 = min(f16v1, f16); | |
| f16v3 = max(f16v1, f16v2); | |
| f16v3 = max(f16v1, f16); | |
| f16v3 = clamp(f16v1, f16, f16v2.x); | |
| f16v3 = clamp(f16v1, f16v2, f16vec3(f16)); | |
| f16v3 = mix(f16v1, f16v2, f16); | |
| f16v3 = mix(f16v1, f16v2, f16v3); | |
| f16v3 = mix(f16v1, f16v2, bv); | |
| f16v3 = step(f16v1, f16v2); | |
| f16v3 = step(f16, f16v3); | |
| f16v3 = smoothstep(f16v1, f16v2, f16v3); | |
| f16v3 = smoothstep(f16, f16v1.x, f16v2); | |
| b = isnan(f16); | |
| bv = isinf(f16v1); | |
| f16v3 = fma(f16v1, f16v2, f16v3); | |
| f16v2 = frexp(f16v1, iv); | |
| f16v2 = ldexp(f16v1, iv); | |
| } | |
| void builtinPackUnpackFuncs() | |
| { | |
| uint u; | |
| f16vec2 f16v; | |
| u = packFloat2x16(f16v); | |
| f16v = unpackFloat2x16(u); | |
| } | |
| void builtinGeometryFuncs() | |
| { | |
| float16_t f16; | |
| f16vec3 f16v1, f16v2, f16v3; | |
| f16 = length(f16v1); | |
| f16 = distance(f16v1, f16v2); | |
| f16 = dot(f16v1, f16v2); | |
| f16v3 = cross(f16v1, f16v2); | |
| f16v2 = normalize(f16v1); | |
| f16v3 = faceforward(f16v1, f16v2, f16v3); | |
| f16v3 = reflect(f16v1, f16v2); | |
| f16v3 = refract(f16v1, f16v2, f16); | |
| } | |
| void builtinMatrixFuncs() | |
| { | |
| f16mat2x3 f16m1, f16m2, f16m3; | |
| f16mat3x2 f16m4; | |
| f16mat3 f16m5; | |
| f16mat4 f16m6, f16m7; | |
| f16vec3 f16v1; | |
| f16vec2 f16v2; | |
| float16_t f16; | |
| f16m3 = matrixCompMult(f16m1, f16m2); | |
| f16m1 = outerProduct(f16v1, f16v2); | |
| f16m4 = transpose(f16m1); | |
| f16 = determinant(f16m5); | |
| f16m6 = inverse(f16m7); | |
| } | |
| void builtinVecRelFuncs() | |
| { | |
| f16vec3 f16v1, f16v2; | |
| bvec3 bv; | |
| bv = lessThan(f16v1, f16v2); | |
| bv = lessThanEqual(f16v1, f16v2); | |
| bv = greaterThan(f16v1, f16v2); | |
| bv = greaterThanEqual(f16v1, f16v2); | |
| bv = equal(f16v1, f16v2); | |
| bv = notEqual(f16v1, f16v2); | |
| } | |
| in f16vec3 if16v; | |
| void builtinFragProcFuncs() | |
| { | |
| f16vec3 f16v; | |
| // Derivative | |
| f16v.x = dFdx(if16v.x); | |
| f16v.y = dFdy(if16v.y); | |
| f16v.xy = dFdxFine(if16v.xy); | |
| f16v.xy = dFdyFine(if16v.xy); | |
| f16v = dFdxCoarse(if16v); | |
| f16v = dFdxCoarse(if16v); | |
| f16v.x = fwidth(if16v.x); | |
| f16v.xy = fwidthFine(if16v.xy); | |
| f16v = fwidthCoarse(if16v); | |
| // Interpolation | |
| f16v.x = interpolateAtCentroid(if16v.x); | |
| f16v.xy = interpolateAtSample(if16v.xy, 1); | |
| f16v = interpolateAtOffset(if16v, f16vec2(0.5hf)); | |
| } |