| /* |
| * Copyright © 2013 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /** |
| * \file opt_cse.cpp |
| * |
| * constant subexpression elimination at the GLSL IR level. |
| * |
| * Compare to brw_fs_cse.cpp for a more complete CSE implementation. This one |
| * is generic and handles texture operations, but it's rather simple currently |
| * and doesn't support modification of variables in the available expressions |
| * list, so it can't do variables other than uniforms or shader inputs. |
| */ |
| |
| #include "ir.h" |
| #include "ir_visitor.h" |
| #include "ir_rvalue_visitor.h" |
| #include "ir_basic_block.h" |
| #include "ir_optimization.h" |
| #include "ir_builder.h" |
| #include "glsl_types.h" |
| |
| using namespace ir_builder; |
| |
| static bool debug = false; |
| |
| namespace { |
| |
| /** |
| * This is the record of an available expression for common subexpression |
| * elimination. |
| */ |
| class ae_entry : public exec_node |
| { |
| public: |
| ae_entry(ir_instruction *base_ir, ir_rvalue **val) |
| : val(val), base_ir(base_ir) |
| { |
| assert(val); |
| assert(*val); |
| assert(base_ir); |
| |
| var = NULL; |
| } |
| |
| void init(ir_instruction *base_ir, ir_rvalue **val) |
| { |
| this->val = val; |
| this->base_ir = base_ir; |
| this->var = NULL; |
| |
| assert(val); |
| assert(*val); |
| assert(base_ir); |
| } |
| |
| /** |
| * The pointer to the expression that we might be able to reuse |
| * |
| * Note the double pointer -- this is the place in the base_ir expression |
| * tree that we would rewrite to move the expression out to a new variable |
| * assignment. |
| */ |
| ir_rvalue **val; |
| |
| /** |
| * Root instruction in the basic block where the expression appeared. |
| * |
| * This is used so that we can insert the new variable declaration into the |
| * instruction stream (since *val is just somewhere in base_ir's expression |
| * tree). |
| */ |
| ir_instruction *base_ir; |
| |
| /** |
| * The variable that the expression has been stored in, if it's been CSEd |
| * once already. |
| */ |
| ir_variable *var; |
| }; |
| |
| class cse_visitor : public ir_rvalue_visitor { |
| public: |
| cse_visitor(exec_list *validate_instructions) |
| : validate_instructions(validate_instructions) |
| { |
| progress = false; |
| mem_ctx = ralloc_context(NULL); |
| this->ae = new(mem_ctx) exec_list; |
| } |
| ~cse_visitor() |
| { |
| ralloc_free(mem_ctx); |
| } |
| |
| virtual ir_visitor_status visit_enter(ir_function_signature *ir); |
| virtual ir_visitor_status visit_enter(ir_loop *ir); |
| virtual ir_visitor_status visit_enter(ir_if *ir); |
| virtual ir_visitor_status visit_enter(ir_call *ir); |
| virtual void handle_rvalue(ir_rvalue **rvalue); |
| |
| bool progress; |
| |
| private: |
| void *mem_ctx; |
| |
| ir_rvalue *try_cse(ir_rvalue *rvalue); |
| void add_to_ae(ir_rvalue **rvalue); |
| |
| /** |
| * Move all nodes from the ae list to the free list |
| */ |
| void empty_ae_list(); |
| |
| /** |
| * Get and initialize a new ae_entry |
| * |
| * This will either come from the free list or be freshly allocated. |
| */ |
| ae_entry *get_ae_entry(ir_rvalue **rvalue); |
| |
| /** List of ae_entry: The available expressions to reuse */ |
| exec_list *ae; |
| |
| /** |
| * The whole shader, so that we can validate_ir_tree in debug mode. |
| * |
| * This proved quite useful when trying to get the tree manipulation |
| * right. |
| */ |
| exec_list *validate_instructions; |
| |
| /** |
| * List of available-for-use ae_entry objects. |
| */ |
| exec_list free_ae_entries; |
| }; |
| |
| /** |
| * Visitor to walk an expression tree to check that all variables referenced |
| * are constants. |
| */ |
| class is_cse_candidate_visitor : public ir_hierarchical_visitor |
| { |
| public: |
| |
| is_cse_candidate_visitor() |
| : ok(true) |
| { |
| } |
| |
| virtual ir_visitor_status visit(ir_dereference_variable *ir); |
| |
| bool ok; |
| }; |
| |
| |
| class contains_rvalue_visitor : public ir_rvalue_visitor |
| { |
| public: |
| |
| contains_rvalue_visitor(ir_rvalue *val) |
| : val(val) |
| { |
| found = false; |
| } |
| |
| virtual void handle_rvalue(ir_rvalue **rvalue); |
| |
| bool found; |
| |
| private: |
| ir_rvalue *val; |
| }; |
| |
| } /* unnamed namespace */ |
| |
| static void |
| dump_ae(exec_list *ae) |
| { |
| int i = 0; |
| |
| printf("CSE: AE contents:\n"); |
| foreach_in_list(ae_entry, entry, ae) { |
| printf("CSE: AE %2d (%p): ", i, entry); |
| (*entry->val)->print(); |
| printf("\n"); |
| |
| if (entry->var) |
| printf("CSE: in var %p:\n", entry->var); |
| |
| i++; |
| } |
| } |
| |
| ir_visitor_status |
| is_cse_candidate_visitor::visit(ir_dereference_variable *ir) |
| { |
| /* Currently, since we don't handle kills of the ae based on variables |
| * getting assigned, we can only handle constant variables. |
| */ |
| if (ir->var->data.read_only) { |
| return visit_continue; |
| } else { |
| if (debug) |
| printf("CSE: non-candidate: var %s is not read only\n", ir->var->name); |
| ok = false; |
| return visit_stop; |
| } |
| } |
| |
| void |
| contains_rvalue_visitor::handle_rvalue(ir_rvalue **rvalue) |
| { |
| if (*rvalue == val) |
| found = true; |
| } |
| |
| static bool |
| contains_rvalue(ir_rvalue *haystack, ir_rvalue *needle) |
| { |
| contains_rvalue_visitor v(needle); |
| haystack->accept(&v); |
| return v.found; |
| } |
| |
| static bool |
| is_cse_candidate(ir_rvalue *ir) |
| { |
| /* Our temporary variable assignment generation isn't ready to handle |
| * anything bigger than a vector. |
| */ |
| if (!ir->type->is_vector() && !ir->type->is_scalar()) { |
| if (debug) |
| printf("CSE: non-candidate: not a vector/scalar\n"); |
| return false; |
| } |
| |
| /* Only handle expressions and textures currently. We may want to extend |
| * to variable-index array dereferences at some point. |
| */ |
| switch (ir->ir_type) { |
| case ir_type_expression: |
| case ir_type_texture: |
| break; |
| default: |
| if (debug) |
| printf("CSE: non-candidate: not an expression/texture\n"); |
| return false; |
| } |
| |
| is_cse_candidate_visitor v; |
| |
| ir->accept(&v); |
| |
| return v.ok; |
| } |
| |
| /** |
| * Tries to find and return a reference to a previous computation of a given |
| * expression. |
| * |
| * Walk the list of available expressions checking if any of them match the |
| * rvalue, and if so, move the previous copy of the expression to a temporary |
| * and return a reference of the temporary. |
| */ |
| ir_rvalue * |
| cse_visitor::try_cse(ir_rvalue *rvalue) |
| { |
| foreach_in_list(ae_entry, entry, ae) { |
| if (debug) { |
| printf("Comparing to AE %p: ", entry); |
| (*entry->val)->print(); |
| printf("\n"); |
| } |
| |
| if (!rvalue->equals(*entry->val)) |
| continue; |
| |
| if (debug) { |
| printf("CSE: Replacing: "); |
| (*entry->val)->print(); |
| printf("\n"); |
| printf("CSE: with: "); |
| rvalue->print(); |
| printf("\n"); |
| } |
| |
| if (!entry->var) { |
| ir_instruction *base_ir = entry->base_ir; |
| |
| ir_variable *var = new(rvalue) ir_variable(rvalue->type, |
| "cse", |
| ir_var_temporary); |
| |
| /* Write the previous expression result into a new variable. */ |
| base_ir->insert_before(var); |
| ir_assignment *assignment = assign(var, *entry->val); |
| base_ir->insert_before(assignment); |
| |
| /* Replace the expression in the original tree with a deref of the |
| * variable, but keep tracking the expression for further reuse. |
| */ |
| *entry->val = new(rvalue) ir_dereference_variable(var); |
| entry->val = &assignment->rhs; |
| |
| entry->var = var; |
| |
| /* Update the base_irs in the AE list. We have to be sure that |
| * they're correct -- expressions from our base_ir that weren't moved |
| * need to stay in this base_ir (so that later consumption of them |
| * puts new variables between our new variable and our base_ir), but |
| * expressions from our base_ir that we *did* move need base_ir |
| * updated so that any further elimination from inside gets its new |
| * assignments put before our new assignment. |
| */ |
| foreach_in_list(ae_entry, fixup_entry, ae) { |
| if (contains_rvalue(assignment->rhs, *fixup_entry->val)) |
| fixup_entry->base_ir = assignment; |
| } |
| |
| if (debug) |
| dump_ae(ae); |
| } |
| |
| /* Replace the expression in our current tree with the variable. */ |
| return new(rvalue) ir_dereference_variable(entry->var); |
| } |
| |
| return NULL; |
| } |
| |
| void |
| cse_visitor::empty_ae_list() |
| { |
| free_ae_entries.append_list(ae); |
| } |
| |
| ae_entry * |
| cse_visitor::get_ae_entry(ir_rvalue **rvalue) |
| { |
| ae_entry *entry = (ae_entry *) free_ae_entries.pop_head(); |
| if (entry) { |
| entry->init(base_ir, rvalue); |
| } else { |
| entry = new(mem_ctx) ae_entry(base_ir, rvalue); |
| } |
| |
| return entry; |
| } |
| |
| /** Add the rvalue to the list of available expressions for CSE. */ |
| void |
| cse_visitor::add_to_ae(ir_rvalue **rvalue) |
| { |
| if (debug) { |
| printf("CSE: Add to AE: "); |
| (*rvalue)->print(); |
| printf("\n"); |
| } |
| |
| ae->push_tail(get_ae_entry(rvalue)); |
| |
| if (debug) |
| dump_ae(ae); |
| } |
| |
| void |
| cse_visitor::handle_rvalue(ir_rvalue **rvalue) |
| { |
| if (!*rvalue) |
| return; |
| |
| if (debug) { |
| printf("CSE: handle_rvalue "); |
| (*rvalue)->print(); |
| printf("\n"); |
| } |
| |
| if (!is_cse_candidate(*rvalue)) |
| return; |
| |
| ir_rvalue *new_rvalue = try_cse(*rvalue); |
| if (new_rvalue) { |
| *rvalue = new_rvalue; |
| progress = true; |
| |
| if (debug) |
| validate_ir_tree(validate_instructions); |
| } else { |
| add_to_ae(rvalue); |
| } |
| } |
| |
| ir_visitor_status |
| cse_visitor::visit_enter(ir_if *ir) |
| { |
| handle_rvalue(&ir->condition); |
| |
| empty_ae_list(); |
| visit_list_elements(this, &ir->then_instructions); |
| |
| empty_ae_list(); |
| visit_list_elements(this, &ir->else_instructions); |
| |
| empty_ae_list(); |
| return visit_continue_with_parent; |
| } |
| |
| ir_visitor_status |
| cse_visitor::visit_enter(ir_function_signature *ir) |
| { |
| empty_ae_list(); |
| visit_list_elements(this, &ir->body); |
| |
| empty_ae_list(); |
| return visit_continue_with_parent; |
| } |
| |
| ir_visitor_status |
| cse_visitor::visit_enter(ir_loop *ir) |
| { |
| empty_ae_list(); |
| visit_list_elements(this, &ir->body_instructions); |
| |
| empty_ae_list(); |
| return visit_continue_with_parent; |
| } |
| |
| ir_visitor_status |
| cse_visitor::visit_enter(ir_call *) |
| { |
| /* Because call is an exec_list of ir_rvalues, handle_rvalue gets passed a |
| * pointer to the (ir_rvalue *) on the stack. Since we save those pointers |
| * in the AE list, we can't let handle_rvalue get called. |
| */ |
| return visit_continue_with_parent; |
| } |
| |
| /** |
| * Does a (uniform-value) constant subexpression elimination pass on the code |
| * present in the instruction stream. |
| */ |
| bool |
| do_cse(exec_list *instructions) |
| { |
| cse_visitor v(instructions); |
| |
| visit_list_elements(&v, instructions); |
| |
| return v.progress; |
| } |