blob: 663c9674c494dd45500dc6504ae8e53f0fa7a19e [file] [log] [blame]
/*
* Copyright © 2015 Connor Abbott
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_fs.h"
#include "brw_cfg.h"
#include "brw_fs_builder.h"
using namespace brw;
static bool
supports_type_conversion(const fs_inst *inst) {
switch (inst->opcode) {
case BRW_OPCODE_MOV:
case SHADER_OPCODE_MOV_INDIRECT:
return true;
case BRW_OPCODE_SEL:
return inst->dst.type == get_exec_type(inst);
default:
/* FIXME: We assume the opcodes don't explicitly mentioned
* before just work fine with arbitrary conversions.
*/
return true;
}
}
bool
fs_visitor::lower_conversions()
{
bool progress = false;
foreach_block_and_inst(block, fs_inst, inst, cfg) {
const fs_builder ibld(this, block, inst);
fs_reg dst = inst->dst;
bool saturate = inst->saturate;
if (supports_type_conversion(inst)) {
if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
/* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
* Single Precision Float":
*
* The upper Dword of every Qword will be written with undefined
* value when converting DF to F.
*
* So we need to allocate a temporary that's two registers, and then do
* a strided MOV to get the lower DWord of every Qword that has the
* result.
*/
fs_reg temp = ibld.vgrf(get_exec_type(inst));
fs_reg strided_temp = subscript(temp, dst.type, 0);
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
inst->dst = strided_temp;
inst->saturate = false;
/* As it is an strided destination, we write n-times more being n the
* size ratio between source and destination types. Update
* size_written accordingly.
*/
inst->size_written = inst->dst.component_size(inst->exec_size);
ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate;
progress = true;
}
} else {
fs_reg temp0 = ibld.vgrf(get_exec_type(inst));
assert(inst->size_written == inst->dst.component_size(inst->exec_size));
inst->dst = temp0;
/* As it is an strided destination, we write n-times more being n the
* size ratio between source and destination types. Update
* size_written accordingly.
*/
inst->size_written = inst->dst.component_size(inst->exec_size);
inst->saturate = false;
/* Now, do the conversion to original destination's type. In next iteration,
* we will lower it if it is a d2f conversion.
*/
ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate;
progress = true;
}
}
if (progress)
invalidate_live_intervals();
return progress;
}