| /* |
| * Copyright © 2018 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "isl/isl.h" |
| |
| #include "brw_nir.h" |
| #include "compiler/nir/nir_builder.h" |
| #include "compiler/nir/nir_format_convert.h" |
| |
| /* The higher compiler layers use the GL enums for image formats even if |
| * they come in from SPIR-V or Vulkan. We need to turn them into an ISL |
| * enum before we can use them. |
| */ |
| static enum isl_format |
| isl_format_for_gl_format(uint32_t gl_format) |
| { |
| switch (gl_format) { |
| case GL_R8: return ISL_FORMAT_R8_UNORM; |
| case GL_R8_SNORM: return ISL_FORMAT_R8_SNORM; |
| case GL_R8UI: return ISL_FORMAT_R8_UINT; |
| case GL_R8I: return ISL_FORMAT_R8_SINT; |
| case GL_RG8: return ISL_FORMAT_R8G8_UNORM; |
| case GL_RG8_SNORM: return ISL_FORMAT_R8G8_SNORM; |
| case GL_RG8UI: return ISL_FORMAT_R8G8_UINT; |
| case GL_RG8I: return ISL_FORMAT_R8G8_SINT; |
| case GL_RGBA8: return ISL_FORMAT_R8G8B8A8_UNORM; |
| case GL_RGBA8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM; |
| case GL_RGBA8UI: return ISL_FORMAT_R8G8B8A8_UINT; |
| case GL_RGBA8I: return ISL_FORMAT_R8G8B8A8_SINT; |
| case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT; |
| case GL_RGB10_A2: return ISL_FORMAT_R10G10B10A2_UNORM; |
| case GL_RGB10_A2UI: return ISL_FORMAT_R10G10B10A2_UINT; |
| case GL_R16: return ISL_FORMAT_R16_UNORM; |
| case GL_R16_SNORM: return ISL_FORMAT_R16_SNORM; |
| case GL_R16F: return ISL_FORMAT_R16_FLOAT; |
| case GL_R16UI: return ISL_FORMAT_R16_UINT; |
| case GL_R16I: return ISL_FORMAT_R16_SINT; |
| case GL_RG16: return ISL_FORMAT_R16G16_UNORM; |
| case GL_RG16_SNORM: return ISL_FORMAT_R16G16_SNORM; |
| case GL_RG16F: return ISL_FORMAT_R16G16_FLOAT; |
| case GL_RG16UI: return ISL_FORMAT_R16G16_UINT; |
| case GL_RG16I: return ISL_FORMAT_R16G16_SINT; |
| case GL_RGBA16: return ISL_FORMAT_R16G16B16A16_UNORM; |
| case GL_RGBA16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM; |
| case GL_RGBA16F: return ISL_FORMAT_R16G16B16A16_FLOAT; |
| case GL_RGBA16UI: return ISL_FORMAT_R16G16B16A16_UINT; |
| case GL_RGBA16I: return ISL_FORMAT_R16G16B16A16_SINT; |
| case GL_R32F: return ISL_FORMAT_R32_FLOAT; |
| case GL_R32UI: return ISL_FORMAT_R32_UINT; |
| case GL_R32I: return ISL_FORMAT_R32_SINT; |
| case GL_RG32F: return ISL_FORMAT_R32G32_FLOAT; |
| case GL_RG32UI: return ISL_FORMAT_R32G32_UINT; |
| case GL_RG32I: return ISL_FORMAT_R32G32_SINT; |
| case GL_RGBA32F: return ISL_FORMAT_R32G32B32A32_FLOAT; |
| case GL_RGBA32UI: return ISL_FORMAT_R32G32B32A32_UINT; |
| case GL_RGBA32I: return ISL_FORMAT_R32G32B32A32_SINT; |
| case GL_NONE: return ISL_FORMAT_UNSUPPORTED; |
| default: |
| assert(!"Invalid image format"); |
| return ISL_FORMAT_UNSUPPORTED; |
| } |
| } |
| |
| static nir_ssa_def * |
| _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset) |
| { |
| nir_intrinsic_instr *load = |
| nir_intrinsic_instr_create(b->shader, |
| nir_intrinsic_image_deref_load_param_intel); |
| load->src[0] = nir_src_for_ssa(&deref->dest.ssa); |
| nir_intrinsic_set_base(load, offset / 4); |
| |
| switch (offset) { |
| case BRW_IMAGE_PARAM_OFFSET_OFFSET: |
| case BRW_IMAGE_PARAM_SWIZZLING_OFFSET: |
| load->num_components = 2; |
| break; |
| case BRW_IMAGE_PARAM_TILING_OFFSET: |
| case BRW_IMAGE_PARAM_SIZE_OFFSET: |
| load->num_components = 3; |
| break; |
| case BRW_IMAGE_PARAM_STRIDE_OFFSET: |
| load->num_components = 4; |
| break; |
| default: |
| unreachable("Invalid param offset"); |
| } |
| nir_ssa_dest_init(&load->instr, &load->dest, |
| load->num_components, 32, NULL); |
| |
| nir_builder_instr_insert(b, &load->instr); |
| return &load->dest.ssa; |
| } |
| |
| #define load_image_param(b, d, o) \ |
| _load_image_param(b, d, BRW_IMAGE_PARAM_##o##_OFFSET) |
| |
| static nir_ssa_def * |
| image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref, |
| nir_ssa_def *coord) |
| { |
| nir_ssa_def *size = load_image_param(b, deref, SIZE); |
| nir_ssa_def *cmp = nir_ilt(b, coord, size); |
| |
| unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type); |
| nir_ssa_def *in_bounds = nir_imm_true(b); |
| for (unsigned i = 0; i < coord_comps; i++) |
| in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i)); |
| |
| return in_bounds; |
| } |
| |
| /** Calculate the offset in memory of the texel given by \p coord. |
| * |
| * This is meant to be used with untyped surface messages to access a tiled |
| * surface, what involves taking into account the tiling and swizzling modes |
| * of the surface manually so it will hopefully not happen very often. |
| * |
| * The tiling algorithm implemented here matches either the X or Y tiling |
| * layouts supported by the hardware depending on the tiling coefficients |
| * passed to the program as uniforms. See Volume 1 Part 2 Section 4.5 |
| * "Address Tiling Function" of the IVB PRM for an in-depth explanation of |
| * the hardware tiling format. |
| */ |
| static nir_ssa_def * |
| image_address(nir_builder *b, const struct gen_device_info *devinfo, |
| nir_deref_instr *deref, nir_ssa_def *coord) |
| { |
| if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D && |
| glsl_sampler_type_is_array(deref->type)) { |
| /* It's easier if 1D arrays are treated like 2D arrays */ |
| coord = nir_vec3(b, nir_channel(b, coord, 0), |
| nir_imm_int(b, 0), |
| nir_channel(b, coord, 1)); |
| } else { |
| unsigned dims = glsl_get_sampler_coordinate_components(deref->type); |
| coord = nir_channels(b, coord, (1 << dims) - 1); |
| } |
| |
| nir_ssa_def *offset = load_image_param(b, deref, OFFSET); |
| nir_ssa_def *tiling = load_image_param(b, deref, TILING); |
| nir_ssa_def *stride = load_image_param(b, deref, STRIDE); |
| |
| /* Shift the coordinates by the fixed surface offset. It may be non-zero |
| * if the image is a single slice of a higher-dimensional surface, or if a |
| * non-zero mipmap level of the surface is bound to the pipeline. The |
| * offset needs to be applied here rather than at surface state set-up time |
| * because the desired slice-level may start mid-tile, so simply shifting |
| * the surface base address wouldn't give a well-formed tiled surface in |
| * the general case. |
| */ |
| nir_ssa_def *xypos = (coord->num_components == 1) ? |
| nir_vec2(b, coord, nir_imm_int(b, 0)) : |
| nir_channels(b, coord, 0x3); |
| xypos = nir_iadd(b, xypos, offset); |
| |
| /* The layout of 3-D textures in memory is sort-of like a tiling |
| * format. At each miplevel, the slices are arranged in rows of |
| * 2^level slices per row. The slice row is stored in tmp.y and |
| * the slice within the row is stored in tmp.x. |
| * |
| * The layout of 2-D array textures and cubemaps is much simpler: |
| * Depending on whether the ARYSPC_LOD0 layout is in use it will be |
| * stored in memory as an array of slices, each one being a 2-D |
| * arrangement of miplevels, or as a 2D arrangement of miplevels, |
| * each one being an array of slices. In either case the separation |
| * between slices of the same LOD is equal to the qpitch value |
| * provided as stride.w. |
| * |
| * This code can be made to handle either 2D arrays and 3D textures |
| * by passing in the miplevel as tile.z for 3-D textures and 0 in |
| * tile.z for 2-D array textures. |
| * |
| * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface |
| * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion |
| * of the hardware 3D texture and 2D array layouts. |
| */ |
| if (coord->num_components > 2) { |
| /* Decompose z into a major (tmp.y) and a minor (tmp.x) |
| * index. |
| */ |
| nir_ssa_def *z = nir_channel(b, coord, 2); |
| nir_ssa_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0), |
| nir_channel(b, tiling, 2)); |
| nir_ssa_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2)); |
| |
| /* Take into account the horizontal (tmp.x) and vertical (tmp.y) |
| * slice offset. |
| */ |
| xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y), |
| nir_channels(b, stride, 0xc))); |
| } |
| |
| nir_ssa_def *addr; |
| if (coord->num_components > 1) { |
| /* Calculate the major/minor x and y indices. In order to |
| * accommodate both X and Y tiling, the Y-major tiling format is |
| * treated as being a bunch of narrow X-tiles placed next to each |
| * other. This means that the tile width for Y-tiling is actually |
| * the width of one sub-column of the Y-major tile where each 4K |
| * tile has 8 512B sub-columns. |
| * |
| * The major Y value is the row of tiles in which the pixel lives. |
| * The major X value is the tile sub-column in which the pixel |
| * lives; for X tiling, this is the same as the tile column, for Y |
| * tiling, each tile has 8 sub-columns. The minor X and Y indices |
| * are the position within the sub-column. |
| */ |
| |
| /* Calculate the minor x and y indices. */ |
| nir_ssa_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0), |
| nir_channels(b, tiling, 0x3)); |
| nir_ssa_def *major = nir_ushr(b, xypos, nir_channels(b, tiling, 0x3)); |
| |
| /* Calculate the texel index from the start of the tile row and the |
| * vertical coordinate of the row. |
| * Equivalent to: |
| * tmp.x = (major.x << tile.y << tile.x) + |
| * (minor.y << tile.x) + minor.x |
| * tmp.y = major.y << tile.y |
| */ |
| nir_ssa_def *idx_x, *idx_y; |
| idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1)); |
| idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1)); |
| idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0)); |
| idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0)); |
| idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1)); |
| |
| /* Add it to the start of the tile row. */ |
| nir_ssa_def *idx; |
| idx = nir_imul(b, idx_y, nir_channel(b, stride, 1)); |
| idx = nir_iadd(b, idx, idx_x); |
| |
| /* Multiply by the Bpp value. */ |
| addr = nir_imul(b, idx, nir_channel(b, stride, 0)); |
| |
| if (devinfo->gen < 8 && !devinfo->is_baytrail) { |
| /* Take into account the two dynamically specified shifts. Both are |
| * used to implement swizzling of X-tiled surfaces. For Y-tiled |
| * surfaces only one bit needs to be XOR-ed with bit 6 of the memory |
| * address, so a swz value of 0xff (actually interpreted as 31 by the |
| * hardware) will be provided to cause the relevant bit of tmp.y to |
| * be zero and turn the first XOR into the identity. For linear |
| * surfaces or platforms lacking address swizzling both shifts will |
| * be 0xff causing the relevant bits of both tmp.x and .y to be zero, |
| * what effectively disables swizzling. |
| */ |
| nir_ssa_def *swizzle = load_image_param(b, deref, SWIZZLING); |
| nir_ssa_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0)); |
| nir_ssa_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1)); |
| |
| /* XOR tmp.x and tmp.y with bit 6 of the memory address. */ |
| nir_ssa_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1), |
| nir_imm_int(b, 1 << 6)); |
| addr = nir_ixor(b, addr, bit); |
| } |
| } else { |
| /* Multiply by the Bpp/stride value. Note that the addr.y may be |
| * non-zero even if the image is one-dimensional because a vertical |
| * offset may have been applied above to select a non-zero slice or |
| * level of a higher-dimensional texture. |
| */ |
| nir_ssa_def *idx; |
| idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1)); |
| idx = nir_iadd(b, nir_channel(b, xypos, 0), idx); |
| addr = nir_imul(b, idx, nir_channel(b, stride, 0)); |
| } |
| |
| return addr; |
| } |
| |
| struct format_info { |
| const struct isl_format_layout *fmtl; |
| unsigned chans; |
| unsigned bits[4]; |
| }; |
| |
| static struct format_info |
| get_format_info(enum isl_format fmt) |
| { |
| const struct isl_format_layout *fmtl = isl_format_get_layout(fmt); |
| |
| return (struct format_info) { |
| .fmtl = fmtl, |
| .chans = isl_format_get_num_channels(fmt), |
| .bits = { |
| fmtl->channels.r.bits, |
| fmtl->channels.g.bits, |
| fmtl->channels.b.bits, |
| fmtl->channels.a.bits |
| }, |
| }; |
| } |
| |
| static nir_ssa_def * |
| convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo, |
| nir_ssa_def *color, |
| enum isl_format image_fmt, enum isl_format lower_fmt, |
| unsigned dest_components) |
| { |
| if (image_fmt == lower_fmt) |
| goto expand_vec; |
| |
| if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) { |
| assert(lower_fmt == ISL_FORMAT_R32_UINT); |
| color = nir_format_unpack_11f11f10f(b, color); |
| goto expand_vec; |
| } |
| |
| struct format_info image = get_format_info(image_fmt); |
| struct format_info lower = get_format_info(lower_fmt); |
| |
| const bool needs_sign_extension = |
| isl_format_has_snorm_channel(image_fmt) || |
| isl_format_has_sint_channel(image_fmt); |
| |
| /* We only check the red channel to detect if we need to pack/unpack */ |
| assert(image.bits[0] != lower.bits[0] || |
| memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0); |
| |
| if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) { |
| if (needs_sign_extension) |
| color = nir_format_unpack_sint(b, color, image.bits, image.chans); |
| else |
| color = nir_format_unpack_uint(b, color, image.bits, image.chans); |
| } else { |
| /* All these formats are homogeneous */ |
| for (unsigned i = 1; i < image.chans; i++) |
| assert(image.bits[i] == image.bits[0]); |
| |
| /* On IVB, we rely on the undocumented behavior that typed reads from |
| * surfaces of the unsupported R8 and R16 formats return useful data in |
| * their least significant bits. However, the data in the high bits is |
| * garbage so we have to discard it. |
| */ |
| if (devinfo->gen == 7 && !devinfo->is_haswell && |
| (lower_fmt == ISL_FORMAT_R16_UINT || |
| lower_fmt == ISL_FORMAT_R8_UINT)) |
| color = nir_format_mask_uvec(b, color, lower.bits); |
| |
| if (image.bits[0] != lower.bits[0]) { |
| color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0], |
| image.bits[0]); |
| } |
| |
| if (needs_sign_extension) |
| color = nir_format_sign_extend_ivec(b, color, image.bits); |
| } |
| |
| switch (image.fmtl->channels.r.type) { |
| case ISL_UNORM: |
| assert(isl_format_has_uint_channel(lower_fmt)); |
| color = nir_format_unorm_to_float(b, color, image.bits); |
| break; |
| |
| case ISL_SNORM: |
| assert(isl_format_has_uint_channel(lower_fmt)); |
| color = nir_format_snorm_to_float(b, color, image.bits); |
| break; |
| |
| case ISL_SFLOAT: |
| if (image.bits[0] == 16) |
| color = nir_unpack_half_2x16_split_x(b, color); |
| break; |
| |
| case ISL_UINT: |
| case ISL_SINT: |
| break; |
| |
| default: |
| unreachable("Invalid image channel type"); |
| } |
| |
| expand_vec: |
| assert(dest_components == 1 || dest_components == 4); |
| assert(color->num_components <= dest_components); |
| if (color->num_components == dest_components) |
| return color; |
| |
| nir_ssa_def *comps[4]; |
| for (unsigned i = 0; i < color->num_components; i++) |
| comps[i] = nir_channel(b, color, i); |
| |
| for (unsigned i = color->num_components; i < 3; i++) |
| comps[i] = nir_imm_int(b, 0); |
| |
| if (color->num_components < 4) { |
| if (isl_format_has_int_channel(image_fmt)) |
| comps[3] = nir_imm_int(b, 1); |
| else |
| comps[3] = nir_imm_float(b, 1); |
| } |
| |
| return nir_vec(b, comps, dest_components); |
| } |
| |
| static bool |
| lower_image_load_instr(nir_builder *b, |
| const struct gen_device_info *devinfo, |
| nir_intrinsic_instr *intrin) |
| { |
| nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); |
| nir_variable *var = nir_deref_instr_get_variable(deref); |
| const enum isl_format image_fmt = |
| isl_format_for_gl_format(var->data.image.format); |
| |
| if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) { |
| const enum isl_format lower_fmt = |
| isl_lower_storage_image_format(devinfo, image_fmt); |
| const unsigned dest_components = intrin->num_components; |
| |
| /* Use an undef to hold the uses of the load while we do the color |
| * conversion. |
| */ |
| nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32); |
| nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder)); |
| |
| intrin->num_components = isl_format_get_num_channels(lower_fmt); |
| intrin->dest.ssa.num_components = intrin->num_components; |
| |
| b->cursor = nir_after_instr(&intrin->instr); |
| |
| nir_ssa_def *color = convert_color_for_load(b, devinfo, |
| &intrin->dest.ssa, |
| image_fmt, lower_fmt, |
| dest_components); |
| |
| nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color)); |
| nir_instr_remove(placeholder->parent_instr); |
| } else { |
| const struct isl_format_layout *image_fmtl = |
| isl_format_get_layout(image_fmt); |
| /* We have a matching typed format for everything 32b and below */ |
| assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128); |
| enum isl_format raw_fmt = (image_fmtl->bpb == 64) ? |
| ISL_FORMAT_R32G32_UINT : |
| ISL_FORMAT_R32G32B32A32_UINT; |
| const unsigned dest_components = intrin->num_components; |
| |
| b->cursor = nir_instr_remove(&intrin->instr); |
| |
| nir_ssa_def *coord = intrin->src[1].ssa; |
| |
| nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord); |
| if (devinfo->gen == 7 && !devinfo->is_haswell) { |
| /* Check whether the first stride component (i.e. the Bpp value) |
| * is greater than four, what on Gen7 indicates that a surface of |
| * type RAW has been bound for untyped access. Reading or writing |
| * to a surface of type other than RAW using untyped surface |
| * messages causes a hang on IVB and VLV. |
| */ |
| nir_ssa_def *stride = load_image_param(b, deref, STRIDE); |
| nir_ssa_def *is_raw = |
| nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0)); |
| do_load = nir_iand(b, do_load, is_raw); |
| } |
| nir_push_if(b, do_load); |
| |
| nir_ssa_def *addr = image_address(b, devinfo, deref, coord); |
| nir_intrinsic_instr *load = |
| nir_intrinsic_instr_create(b->shader, |
| nir_intrinsic_image_deref_load_raw_intel); |
| load->src[0] = nir_src_for_ssa(&deref->dest.ssa); |
| load->src[1] = nir_src_for_ssa(addr); |
| load->num_components = image_fmtl->bpb / 32; |
| nir_ssa_dest_init(&load->instr, &load->dest, |
| load->num_components, 32, NULL); |
| nir_builder_instr_insert(b, &load->instr); |
| |
| nir_push_else(b, NULL); |
| |
| nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32); |
| |
| nir_pop_if(b, NULL); |
| |
| nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero); |
| |
| nir_ssa_def *color = convert_color_for_load(b, devinfo, value, |
| image_fmt, raw_fmt, |
| dest_components); |
| |
| nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color)); |
| } |
| |
| return true; |
| } |
| |
| static nir_ssa_def * |
| convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo, |
| nir_ssa_def *color, |
| enum isl_format image_fmt, enum isl_format lower_fmt) |
| { |
| struct format_info image = get_format_info(image_fmt); |
| struct format_info lower = get_format_info(lower_fmt); |
| |
| color = nir_channels(b, color, (1 << image.chans) - 1); |
| |
| if (image_fmt == lower_fmt) |
| return color; |
| |
| if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) { |
| assert(lower_fmt == ISL_FORMAT_R32_UINT); |
| return nir_format_pack_11f11f10f(b, color); |
| } |
| |
| switch (image.fmtl->channels.r.type) { |
| case ISL_UNORM: |
| assert(isl_format_has_uint_channel(lower_fmt)); |
| color = nir_format_float_to_unorm(b, color, image.bits); |
| break; |
| |
| case ISL_SNORM: |
| assert(isl_format_has_uint_channel(lower_fmt)); |
| color = nir_format_float_to_snorm(b, color, image.bits); |
| break; |
| |
| case ISL_SFLOAT: |
| if (image.bits[0] == 16) |
| color = nir_format_float_to_half(b, color); |
| break; |
| |
| case ISL_UINT: |
| color = nir_format_clamp_uint(b, color, image.bits); |
| break; |
| |
| case ISL_SINT: |
| color = nir_format_clamp_sint(b, color, image.bits); |
| break; |
| |
| default: |
| unreachable("Invalid image channel type"); |
| } |
| |
| if (image.bits[0] < 32 && |
| (isl_format_has_snorm_channel(image_fmt) || |
| isl_format_has_sint_channel(image_fmt))) |
| color = nir_format_mask_uvec(b, color, image.bits); |
| |
| if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) { |
| color = nir_format_pack_uint(b, color, image.bits, image.chans); |
| } else { |
| /* All these formats are homogeneous */ |
| for (unsigned i = 1; i < image.chans; i++) |
| assert(image.bits[i] == image.bits[0]); |
| |
| if (image.bits[0] != lower.bits[0]) { |
| color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0], |
| lower.bits[0]); |
| } |
| } |
| |
| return color; |
| } |
| |
| static bool |
| lower_image_store_instr(nir_builder *b, |
| const struct gen_device_info *devinfo, |
| nir_intrinsic_instr *intrin) |
| { |
| nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); |
| nir_variable *var = nir_deref_instr_get_variable(deref); |
| |
| /* For write-only surfaces, we trust that the hardware can just do the |
| * conversion for us. |
| */ |
| if (var->data.image.access & ACCESS_NON_READABLE) |
| return false; |
| |
| const enum isl_format image_fmt = |
| isl_format_for_gl_format(var->data.image.format); |
| |
| if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) { |
| const enum isl_format lower_fmt = |
| isl_lower_storage_image_format(devinfo, image_fmt); |
| |
| /* Color conversion goes before the store */ |
| b->cursor = nir_before_instr(&intrin->instr); |
| |
| nir_ssa_def *color = convert_color_for_store(b, devinfo, |
| intrin->src[3].ssa, |
| image_fmt, lower_fmt); |
| intrin->num_components = isl_format_get_num_channels(lower_fmt); |
| nir_instr_rewrite_src(&intrin->instr, &intrin->src[3], |
| nir_src_for_ssa(color)); |
| } else { |
| const struct isl_format_layout *image_fmtl = |
| isl_format_get_layout(image_fmt); |
| /* We have a matching typed format for everything 32b and below */ |
| assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128); |
| enum isl_format raw_fmt = (image_fmtl->bpb == 64) ? |
| ISL_FORMAT_R32G32_UINT : |
| ISL_FORMAT_R32G32B32A32_UINT; |
| |
| b->cursor = nir_instr_remove(&intrin->instr); |
| |
| nir_ssa_def *coord = intrin->src[1].ssa; |
| |
| nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord); |
| if (devinfo->gen == 7 && !devinfo->is_haswell) { |
| /* Check whether the first stride component (i.e. the Bpp value) |
| * is greater than four, what on Gen7 indicates that a surface of |
| * type RAW has been bound for untyped access. Reading or writing |
| * to a surface of type other than RAW using untyped surface |
| * messages causes a hang on IVB and VLV. |
| */ |
| nir_ssa_def *stride = load_image_param(b, deref, STRIDE); |
| nir_ssa_def *is_raw = |
| nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0)); |
| do_store = nir_iand(b, do_store, is_raw); |
| } |
| nir_push_if(b, do_store); |
| |
| nir_ssa_def *addr = image_address(b, devinfo, deref, coord); |
| nir_ssa_def *color = convert_color_for_store(b, devinfo, |
| intrin->src[3].ssa, |
| image_fmt, raw_fmt); |
| |
| nir_intrinsic_instr *store = |
| nir_intrinsic_instr_create(b->shader, |
| nir_intrinsic_image_deref_store_raw_intel); |
| store->src[0] = nir_src_for_ssa(&deref->dest.ssa); |
| store->src[1] = nir_src_for_ssa(addr); |
| store->src[2] = nir_src_for_ssa(color); |
| store->num_components = image_fmtl->bpb / 32; |
| nir_builder_instr_insert(b, &store->instr); |
| |
| nir_pop_if(b, NULL); |
| } |
| |
| return true; |
| } |
| |
| static bool |
| lower_image_atomic_instr(nir_builder *b, |
| const struct gen_device_info *devinfo, |
| nir_intrinsic_instr *intrin) |
| { |
| if (devinfo->is_haswell || devinfo->gen >= 8) |
| return false; |
| |
| nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); |
| |
| b->cursor = nir_instr_remove(&intrin->instr); |
| |
| /* Use an undef to hold the uses of the load conversion. */ |
| nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32); |
| nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder)); |
| |
| /* Check the first component of the size field to find out if the |
| * image is bound. Necessary on IVB for typed atomics because |
| * they don't seem to respect null surfaces and will happily |
| * corrupt or read random memory when no image is bound. |
| */ |
| nir_ssa_def *size = load_image_param(b, deref, SIZE); |
| nir_ssa_def *zero = nir_imm_int(b, 0); |
| nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero)); |
| |
| nir_builder_instr_insert(b, &intrin->instr); |
| |
| nir_pop_if(b, NULL); |
| |
| nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero); |
| nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result)); |
| |
| return true; |
| } |
| |
| static bool |
| lower_image_size_instr(nir_builder *b, |
| const struct gen_device_info *devinfo, |
| nir_intrinsic_instr *intrin) |
| { |
| nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); |
| nir_variable *var = nir_deref_instr_get_variable(deref); |
| |
| /* For write-only images, we have an actual image surface so we fall back |
| * and let the back-end emit a TXS for this. |
| */ |
| if (var->data.image.access & ACCESS_NON_READABLE) |
| return false; |
| |
| /* If we have a matching typed format, then we have an actual image surface |
| * so we fall back and let the back-end emit a TXS for this. |
| */ |
| const enum isl_format image_fmt = |
| isl_format_for_gl_format(var->data.image.format); |
| if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) |
| return false; |
| |
| b->cursor = nir_instr_remove(&intrin->instr); |
| |
| nir_ssa_def *size = load_image_param(b, deref, SIZE); |
| |
| nir_ssa_def *comps[4] = { NULL, NULL, NULL, NULL }; |
| |
| enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type); |
| unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type); |
| for (unsigned c = 0; c < coord_comps; c++) { |
| if (c == 2 && dim == GLSL_SAMPLER_DIM_CUBE) { |
| comps[2] = nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6)); |
| } else { |
| comps[c] = nir_channel(b, size, c); |
| } |
| } |
| |
| for (unsigned c = coord_comps; c < intrin->dest.ssa.num_components; ++c) |
| comps[c] = nir_imm_int(b, 1); |
| |
| nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components); |
| nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec)); |
| |
| return true; |
| } |
| |
| bool |
| brw_nir_lower_image_load_store(nir_shader *shader, |
| const struct gen_device_info *devinfo) |
| { |
| bool progress = false; |
| |
| nir_foreach_function(function, shader) { |
| if (function->impl == NULL) |
| continue; |
| |
| nir_foreach_block_safe(block, function->impl) { |
| nir_builder b; |
| nir_builder_init(&b, function->impl); |
| |
| nir_foreach_instr_safe(instr, block) { |
| if (instr->type != nir_instr_type_intrinsic) |
| continue; |
| |
| nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); |
| switch (intrin->intrinsic) { |
| case nir_intrinsic_image_deref_load: |
| if (lower_image_load_instr(&b, devinfo, intrin)) |
| progress = true; |
| break; |
| |
| case nir_intrinsic_image_deref_store: |
| if (lower_image_store_instr(&b, devinfo, intrin)) |
| progress = true; |
| break; |
| |
| case nir_intrinsic_image_deref_atomic_add: |
| case nir_intrinsic_image_deref_atomic_min: |
| case nir_intrinsic_image_deref_atomic_max: |
| case nir_intrinsic_image_deref_atomic_and: |
| case nir_intrinsic_image_deref_atomic_or: |
| case nir_intrinsic_image_deref_atomic_xor: |
| case nir_intrinsic_image_deref_atomic_exchange: |
| case nir_intrinsic_image_deref_atomic_comp_swap: |
| if (lower_image_atomic_instr(&b, devinfo, intrin)) |
| progress = true; |
| break; |
| |
| case nir_intrinsic_image_deref_size: |
| if (lower_image_size_instr(&b, devinfo, intrin)) |
| progress = true; |
| break; |
| |
| default: |
| /* Nothing to do */ |
| break; |
| } |
| } |
| } |
| |
| if (progress) |
| nir_metadata_preserve(function->impl, nir_metadata_none); |
| } |
| |
| return progress; |
| } |