| /* |
| * Copyright © 2022 Valve Corporation |
| * |
| * SPDX-License-Identifier: MIT |
| */ |
| #include "helpers.h" |
| |
| using namespace aco; |
| |
| BEGIN_TEST(insert_waitcnt.ds_ordered_count) |
| if (!setup_cs(NULL, GFX10_3)) |
| return; |
| |
| Operand def0(PhysReg(256), v1); |
| Operand def1(PhysReg(257), v1); |
| Operand def2(PhysReg(258), v1); |
| Operand gds_base(PhysReg(259), v1); |
| Operand chan_counter(PhysReg(260), v1); |
| Operand m(m0, s1); |
| |
| Instruction* ds_instr; |
| //>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile |
| //! s_waitcnt lgkmcnt(0) |
| ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true); |
| ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); |
| |
| //! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw |
| ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, gds_base, chan_counter, m, 0u, 0u, true); |
| ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw); |
| |
| //! s_waitcnt lgkmcnt(0) |
| //! ds_ordered_count %0:v[2], %0:v[3], %0:m0 offset0:3840 gds storage:gds semantics:volatile |
| ds_instr = bld.ds(aco_opcode::ds_ordered_count, def2, gds_base, m, 3840u, 0u, true); |
| ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); |
| |
| finish_waitcnt_test(); |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.clause) |
| if (!setup_cs(NULL, GFX11)) |
| return; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Definition def_v5(PhysReg(261), v1); |
| Definition def_v6(PhysReg(262), v1); |
| Definition def_v7(PhysReg(263), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand op_v4(PhysReg(260), v1); |
| Operand op_v5(PhysReg(261), v1); |
| Operand op_v6(PhysReg(262), v1); |
| Operand op_v7(PhysReg(263), v1); |
| Operand desc0(PhysReg(0), s4); |
| |
| //>> p_unit_test 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); |
| |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //! v1: %0:v[6] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //! v1: %0:v[7] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v0, Operand::zero(), 0, false); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v6, desc0, op_v0, Operand::zero(), 0, false); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v7, desc0, op_v0, Operand::zero(), 0, false); |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[4], 0 |
| //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[5], 0 |
| //! v1: %0:v[6] = buffer_load_dword %0:s[0-3], %0:v[6], 0 |
| //! v1: %0:v[7] = buffer_load_dword %0:s[0-3], %0:v[7], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v4, Operand::zero(), 0, false); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v5, Operand::zero(), 0, false); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v6, desc0, op_v6, Operand::zero(), 0, false); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v7, desc0, op_v7, Operand::zero(), 0, false); |
| //! s_waitcnt vmcnt(0) |
| //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[4] |
| //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[5] |
| //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[6] |
| //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[7] |
| bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v4, 0, false); |
| bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v5, 0, false); |
| bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v6, 0, false); |
| bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v7, 0, false); |
| |
| //>> p_unit_test 1 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| |
| //! s4: %0:s[4-7] = s_load_dwordx4 %0:s[0-1], 0 |
| bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg(4), s4), Operand(PhysReg(0), s2), |
| Operand::zero()); |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); |
| //! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //! v1: %0:v[5] = buffer_load_dword %0:s[4-7], %0:v[4], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v5, Operand(PhysReg(4), s4), op_v4, Operand::zero(), |
| 0, false); |
| |
| //>> p_unit_test 2 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); |
| //! v_nop |
| bld.vop1(aco_opcode::v_nop); |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[4], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v4, Operand::zero(), 0, false); |
| |
| finish_waitcnt_test(); |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.vmem) |
| if (!setup_cs(NULL, GFX10)) |
| return; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc0(PhysReg(0), s4); |
| |
| //>> BB0 |
| //! /* logical preds: / linear preds: / kind: top-level, */ |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); |
| |
| //>> BB1 |
| //! /* logical preds: / linear preds: / kind: */ |
| //! v1: %0:v[4] = ds_read_b32 %0:v[0] |
| bld.reset(program->create_and_insert_block()); |
| bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); |
| |
| bld.reset(program->create_and_insert_block()); |
| program->blocks[2].linear_preds.push_back(0); |
| program->blocks[2].linear_preds.push_back(1); |
| program->blocks[2].logical_preds.push_back(0); |
| program->blocks[2].logical_preds.push_back(1); |
| |
| //>> BB2 |
| //! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */ |
| //! s_waitcnt lgkmcnt(0) |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); |
| |
| finish_waitcnt_test(); |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.lds) |
| if (!setup_cs(NULL, GFX10)) |
| return; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc0(PhysReg(0), s4); |
| |
| //>> BB0 |
| //! /* logical preds: / linear preds: / kind: top-level, */ |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); |
| |
| //>> BB1 |
| //! /* logical preds: / linear preds: / kind: */ |
| //! v1: %0:v[4] = ds_read_b32 %0:v[0] |
| bld.reset(program->create_and_insert_block()); |
| bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); |
| |
| bld.reset(program->create_and_insert_block()); |
| program->blocks[2].linear_preds.push_back(0); |
| program->blocks[2].linear_preds.push_back(1); |
| program->blocks[2].logical_preds.push_back(0); |
| program->blocks[2].logical_preds.push_back(1); |
| |
| //>> BB2 |
| //! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */ |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = ds_read_b32 %0:v[0] |
| bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); |
| |
| finish_waitcnt_test(); |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.waw.vmem_types) |
| for (amd_gfx_level gfx : {GFX11, GFX12}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| //>> p_unit_test 0 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| //>> p_unit_test 1 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> p_unit_test 2 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), |
| Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); |
| |
| //>> p_unit_test 3 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //~gfx12! s_wait_samplecnt imm:0 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> p_unit_test 4 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_samplecnt imm:0 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| //>> p_unit_test 5 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_samplecnt imm:0 |
| //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), |
| Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); |
| |
| //>> p_unit_test 6 |
| //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d |
| //~gfx12! s_wait_bvhcnt imm:0 |
| //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); |
| bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), |
| Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); |
| bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), |
| Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); |
| |
| //>> p_unit_test 7 |
| //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_bvhcnt imm:0 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7)); |
| bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), |
| Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| //>> p_unit_test 8 |
| //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_bvhcnt imm:0 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8)); |
| bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), |
| Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> BB9 |
| //! /* logical preds: / linear preds: / kind: */ |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| //>> BB10 |
| //! /* logical preds: / linear preds: / kind: */ |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| bld.reset(program->create_and_insert_block()); |
| program->blocks[11].linear_preds.push_back(9); |
| program->blocks[11].linear_preds.push_back(10); |
| program->blocks[11].logical_preds.push_back(9); |
| program->blocks[11].logical_preds.push_back(10); |
| |
| //>> BB11 |
| //! /* logical preds: BB9, BB10, / linear preds: BB9, BB10, / kind: uniform, */ |
| //! p_unit_test 9 |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| //>> BB12 |
| //! /* logical preds: / linear preds: / kind: */ |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> BB13 |
| //! /* logical preds: / linear preds: / kind: */ |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| bld.reset(program->create_and_insert_block()); |
| program->blocks[14].linear_preds.push_back(12); |
| program->blocks[14].linear_preds.push_back(13); |
| program->blocks[14].logical_preds.push_back(12); |
| program->blocks[14].logical_preds.push_back(13); |
| |
| //>> BB14 |
| //! /* logical preds: BB12, BB13, / linear preds: BB12, BB13, / kind: uniform, */ |
| //! p_unit_test 10 |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //~gfx12! s_wait_samplecnt imm:0 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.waw.point_sample_accel) |
| for (radeon_family family : {CHIP_GFX1150, CHIP_GFX1153}) { |
| if (!setup_cs(NULL, GFX11_5, family, family == CHIP_GFX1153 ? "_3" : "_0")) |
| continue; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| /* image_sample has point sample acceleration, but image_sample_b does not. Both are VMEM |
| * sample instructions. */ |
| |
| //>> p_unit_test 0 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //~gfx11_5_0! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> p_unit_test 1 |
| //! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //~gfx11_5_0! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> p_unit_test 2 |
| //! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> p_unit_test 3 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //~gfx11_5_0! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> p_unit_test 4 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0); |
| |
| //>> p_unit_test 5 |
| //! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //! v1: %0:v[4] = image_sample_b %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_sample_b, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| |
| //>> p_unit_test 5 |
| //! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d |
| //! v1: %0:v[4] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0); |
| bld.mimg(aco_opcode::image_load, def_v4, desc_s8, Operand(s4), Operand(v1), op_v0); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.vmem) |
| if (!setup_cs(NULL, GFX12)) |
| return; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Definition def_v5(PhysReg(261), v1); |
| Definition def_v6(PhysReg(262), v1); |
| Definition def_v7(PhysReg(263), v1); |
| Definition def_v8(PhysReg(264), v1); |
| Definition def_v9(PhysReg(265), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand op_v4(PhysReg(260), v1); |
| Operand op_v5(PhysReg(261), v1); |
| Operand op_v6(PhysReg(262), v1); |
| Operand op_v7(PhysReg(263), v1); |
| Operand op_v8(PhysReg(264), v1); |
| Operand op_v9(PhysReg(265), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //! v1: %0:v[5] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //! v1: %0:v[6] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d unrm r128 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| bld.mimg(aco_opcode::image_sample, def_v5, desc_s8, desc_s4, Operand(v1), op_v0); |
| Instruction* instr = |
| bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v6, desc_s4, Operand(s4), Operand(v1), |
| Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))) |
| .instr; |
| instr->mimg().unrm = true; |
| instr->mimg().r128 = true; |
| |
| //! v1: %0:v[7] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d |
| //! v1: %0:v[8] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| //! v1: %0:v[9] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d unrm r128 |
| bld.mimg(aco_opcode::image_load, def_v7, desc_s8, Operand(s4), Operand(v1), op_v0, 0x1); |
| bld.mimg(aco_opcode::image_sample, def_v8, desc_s8, desc_s4, Operand(v1), op_v0); |
| instr = bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v9, desc_s4, Operand(s4), |
| Operand(v1), Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))) |
| .instr; |
| instr->mimg().unrm = true; |
| instr->mimg().r128 = true; |
| |
| //! s_wait_loadcnt imm:1 |
| //! p_unit_test 0, %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4); |
| //! s_wait_samplecnt imm:1 |
| //! p_unit_test 1, %0:v[5] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_v5); |
| //! s_wait_bvhcnt imm:1 |
| //! p_unit_test 2, %0:v[6] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_v6); |
| //! s_wait_loadcnt imm:0 |
| //! p_unit_test 3, %0:v[7] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_v7); |
| //! s_wait_samplecnt imm:0 |
| //! p_unit_test 4, %0:v[8] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4), op_v8); |
| //! s_wait_bvhcnt imm:0 |
| //! p_unit_test 5, %0:v[9] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5), op_v9); |
| |
| /* Despite not using a sampler, this uses samplecnt. */ |
| //! v1: %0:v[5] = image_msaa_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d |
| //! s_wait_samplecnt imm:0 |
| //! p_unit_test 6, %0:v[5] |
| bld.mimg(aco_opcode::image_msaa_load, def_v5, desc_s8, Operand(s4), Operand(v1), op_v0); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6), op_v5); |
| |
| finish_waitcnt_test(); |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.lds_smem) |
| for (amd_gfx_level gfx : {GFX11, GFX12}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Definition def_v5(PhysReg(261), v1); |
| Definition def_s4(PhysReg(4), s1); |
| Definition def_s5(PhysReg(5), s1); |
| Operand op_s0(PhysReg(0), s1); |
| Operand op_s4(PhysReg(4), s1); |
| Operand op_s5(PhysReg(5), s1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand op_v4(PhysReg(260), v1); |
| Operand op_v5(PhysReg(261), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| |
| //>> v1: %0:v[4] = ds_read_b32 %0:v[0] |
| //! s1: %0:s[4] = s_buffer_load_dword %0:s[0-3], %0:s[0] |
| //! v1: %0:v[5] = ds_read_b32 %0:v[0] |
| //! s1: %0:s[5] = s_buffer_load_dword %0:s[0-3], %0:s[0] |
| bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); |
| bld.smem(aco_opcode::s_buffer_load_dword, def_s4, desc_s4, op_s0); |
| bld.ds(aco_opcode::ds_read_b32, def_v5, op_v0); |
| bld.smem(aco_opcode::s_buffer_load_dword, def_s5, desc_s4, op_s0); |
| |
| //~gfx11! s_waitcnt lgkmcnt(1) |
| //~gfx12! s_wait_dscnt imm:1 |
| //! p_unit_test 0, %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4); |
| //~gfx11! s_waitcnt lgkmcnt(0) |
| //~gfx12! s_wait_kmcnt imm:0 |
| //! p_unit_test 1, %0:s[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_s4); |
| //~gfx12! s_wait_dscnt imm:0 |
| //! p_unit_test 2, %0:v[5] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_v5); |
| //! p_unit_test 3, %0:s[5] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_s5); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.sendmsg_smem) |
| for (amd_gfx_level gfx : {GFX11, GFX12}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition def_s4(PhysReg(4), s1); |
| Definition def_s5(PhysReg(5), s1); |
| Definition def_s6(PhysReg(6), s1); |
| Definition def_s7(PhysReg(7), s1); |
| Operand op_s0(PhysReg(0), s1); |
| Operand op_s4(PhysReg(4), s1); |
| Operand op_s5(PhysReg(5), s1); |
| Operand op_s6(PhysReg(6), s1); |
| Operand op_s7(PhysReg(7), s1); |
| Operand desc_s4(PhysReg(0), s4); |
| |
| //>> s1: %0:s[4] = s_sendmsg_rtn_b32 3 sendmsg(rtn_get_realtime) |
| //! s1: %0:s[5] = s_buffer_load_dword %0:s[0-3], %0:s[0] |
| //! s1: %0:s[6] = s_sendmsg_rtn_b32 3 sendmsg(rtn_get_realtime) |
| //! s1: %0:s[7] = s_buffer_load_dword %0:s[0-3], %0:s[0] |
| bld.sop1(aco_opcode::s_sendmsg_rtn_b32, def_s4, Operand::c32(sendmsg_rtn_get_realtime)); |
| bld.smem(aco_opcode::s_buffer_load_dword, def_s5, desc_s4, op_s0); |
| bld.sop1(aco_opcode::s_sendmsg_rtn_b32, def_s6, Operand::c32(sendmsg_rtn_get_realtime)); |
| bld.smem(aco_opcode::s_buffer_load_dword, def_s7, desc_s4, op_s0); |
| |
| //~gfx12! s_wait_kmcnt imm:1 |
| //~gfx11! s_waitcnt lgkmcnt(1) |
| //! p_unit_test 0, %0:s[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_s4); |
| //~gfx12! s_wait_kmcnt imm:0 |
| //~gfx11! s_waitcnt lgkmcnt(0) |
| //! p_unit_test 1, %0:s[5] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_s5); |
| //! p_unit_test 2, %0:s[6] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_s6); |
| //! p_unit_test 3, %0:s[7] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_s7); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.vmem_ds) |
| if (!setup_cs(NULL, GFX12)) |
| return; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Definition def_v5(PhysReg(261), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand op_v1(PhysReg(257), v1); |
| Operand op_v4(PhysReg(260), v1); |
| Operand op_v5(PhysReg(261), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| |
| program->workgroup_size = 128; |
| program->wgp_mode = true; |
| |
| //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| //! v1: %0:v[5] = ds_read_b32 %0:v[0] |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| bld.ds(aco_opcode::ds_read_b32, def_v5, op_v0); |
| |
| //! s_wait_loadcnt_dscnt dscnt(0) loadcnt(0) |
| //! p_unit_test 0, %0:v[4], %0:v[5] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4, op_v5); |
| |
| //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[1] storage:buffer |
| //! v1: %0:v[5] = ds_write_b32 %0:v[0], %0:v[1] storage:shared |
| Instruction* instr = |
| bld.mubuf(aco_opcode::buffer_store_dword, desc_s4, op_v0, Operand::zero(), op_v1, 0, false) |
| .instr; |
| instr->mubuf().sync = memory_sync_info(storage_buffer); |
| instr = bld.ds(aco_opcode::ds_write_b32, def_v5, op_v0, op_v1).instr; |
| instr->ds().sync = memory_sync_info(storage_shared); |
| |
| //! s_wait_storecnt_dscnt dscnt(0) storecnt(0) |
| bld.barrier(aco_opcode::p_barrier, |
| memory_sync_info(storage_buffer | storage_shared, semantic_acqrel, scope_workgroup)); |
| |
| finish_waitcnt_test(); |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.waw.vmem_ds_valu) |
| for (amd_gfx_level gfx : {GFX10_3, GFX11, GFX12}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| |
| emit_divergent_if_else( |
| program.get(), bld, Operand::c64(1), |
| [&]() |
| { |
| //>> p_unit_test 1 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| }, |
| [&]() |
| { |
| //>> p_unit_test 2 |
| //~gfx11! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! v1: %0:v[4] = v_mov_b32 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.vop1(aco_opcode::v_mov_b32, def_v4, Operand::zero()); |
| }); |
| //>> p_unit_test 3 |
| //~gfx(10_3|11)! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! p_unit_test %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1)); |
| |
| emit_divergent_if_else( |
| program.get(), bld, Operand::c64(1), |
| [&]() |
| { |
| //>> p_unit_test 4 |
| //! v1: %0:v[4] = ds_read_b32 %0:v[0] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); |
| bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); |
| }, |
| [&]() |
| { |
| //>> p_unit_test 5 |
| //~gfx11! s_waitcnt lgkmcnt(0) |
| //~gfx12! s_wait_dscnt imm:0 |
| //! v1: %0:v[4] = v_mov_b32 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.vop1(aco_opcode::v_mov_b32, def_v4, Operand::zero()); |
| }); |
| //>> p_unit_test 6 |
| //~gfx(10_3|11)! s_waitcnt lgkmcnt(0) |
| //~gfx12! s_wait_dscnt imm:0 |
| //! p_unit_test %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1)); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.waw.vmem_different_halves) |
| if (!setup_cs(NULL, GFX12)) |
| return; |
| |
| Definition def_v4_lo(PhysReg(260), v2b); |
| Definition def_v4_hi(PhysReg(260).advance(2), v2b); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| //>> p_unit_test 0 |
| //! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0 |
| //! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); |
| bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| |
| //>> p_unit_test 1 |
| //! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0 |
| //! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| |
| //>> p_unit_test 2 |
| //! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0 |
| //! s_wait_loadcnt imm:0 |
| //! v2b: %0:v[4][0:16] = buffer_load_short_d16 %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| bld.mubuf(aco_opcode::buffer_load_short_d16, def_v4_lo, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| |
| //>> p_unit_test 3 |
| //! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0 |
| //! s_wait_loadcnt imm:0 |
| //! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| |
| //>> p_unit_test 4 |
| //! v2b: %0:v[4][0:16] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d d16 |
| //! s_wait_samplecnt imm:0 |
| //! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); |
| Instruction* instr = |
| bld.mimg(aco_opcode::image_sample, def_v4_lo, desc_s8, desc_s4, Operand(v1), op_v0); |
| instr->mimg().dmask = 0x1; |
| instr->mimg().d16 = true; |
| bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| |
| //>> p_unit_test 5 |
| //! v2b: %0:v[4][16:32] = buffer_load_short_d16_hi %0:s[0-3], %0:v[0], 0 |
| //! s_wait_loadcnt imm:0 |
| //! v2b: %0:v[4][0:16] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d d16 |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.mubuf(aco_opcode::buffer_load_short_d16_hi, def_v4_hi, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| instr = bld.mimg(aco_opcode::image_sample, def_v4_lo, desc_s8, desc_s4, Operand(v1), op_v0); |
| instr->mimg().dmask = 0x1; |
| instr->mimg().d16 = true; |
| |
| finish_waitcnt_test(); |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.waw.vmem_different_lanes) |
| for (amd_gfx_level gfx : {GFX10_3, GFX11, GFX12}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| emit_divergent_if_else( |
| program.get(), bld, Operand::c64(1), |
| [&]() |
| { |
| //>> p_unit_test 1 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| }, |
| [&]() |
| { |
| //>> p_unit_test 2 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| }); |
| //>> p_unit_test 3 |
| //~gfx(10_3|11)! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! p_unit_test %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1)); |
| |
| emit_divergent_if_else( |
| program.get(), bld, Operand::c64(1), |
| [&]() |
| { |
| //>> p_unit_test 4 |
| //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| }, |
| [&]() |
| { |
| //>> p_unit_test 5 |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); |
| }); |
| //>> p_unit_test 6 |
| //~gfx(10_3|11)! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //~gfx12! s_wait_samplecnt imm:0 |
| //! p_unit_test %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1)); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.divergent_branch.inc_counter) |
| for (amd_gfx_level gfx : {GFX10_3, GFX11, GFX12}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Definition def_v5(PhysReg(261), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| emit_divergent_if_else( |
| program.get(), bld, Operand::c64(1), |
| [&]() |
| { |
| //>> p_unit_test 1 |
| //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| }, |
| [&]() |
| { |
| //>> p_unit_test 2 |
| //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc_s4, op_v0, Operand::zero(), 0, |
| false); |
| }); |
| //>> p_unit_test 3 |
| //~gfx(10_3|11)! s_waitcnt vmcnt(1) |
| //~gfx12! s_wait_loadcnt imm:1 |
| //! p_unit_test %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1)); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.divergent_branch.no_skip) |
| for (amd_gfx_level gfx : {GFX10_3, GFX11, GFX12}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition def_v4(PhysReg(260), v1); |
| Operand op_v0(PhysReg(256), v1); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 |
| bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); |
| |
| //>> p_unit_test 1 |
| //~gfx(10_3|11)! s_waitcnt vmcnt(0) |
| //~gfx12! s_wait_loadcnt imm:0 |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| program->blocks[1].linear_preds.push_back(0); |
| program->blocks[1].logical_preds.push_back(0); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1)); |
| |
| //>> p_unit_test 2 |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| program->blocks[2].linear_preds.push_back(1); |
| program->blocks[2].logical_preds.push_back(1); |
| program->blocks[2].logical_preds.push_back(0); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(PhysReg(260), v1)); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.flat.wait_zero) |
| for (amd_gfx_level gfx : {GFX9, GFX10}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition dest0(PhysReg(260), v1); |
| Definition dest1(PhysReg(261), v1); |
| Operand offset(PhysReg(256), v1); |
| Operand addr(PhysReg(256), v2); |
| |
| //>> p_unit_test 0 |
| //! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //~gfx9! s_waitcnt vmcnt(0) |
| //~gfx10! s_waitcnt vmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); |
| bld.global(aco_opcode::global_load_dword, dest0, addr, Operand(s1)); |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 1 |
| //! v1: %0:v[4] = ds_read_b32 %0:v[0] |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //~gfx9! s_waitcnt lgkmcnt(0) |
| //~gfx10! s_waitcnt lgkmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.ds(aco_opcode::ds_read_b32, dest0, offset); |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 2 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef |
| //~gfx9! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //~gfx10! s_waitcnt lgkmcnt(0) vmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.flat(aco_opcode::flat_load_dword, dest0, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.global(aco_opcode::global_load_dword, dest1, addr, Operand(s1)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 3 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //! v1: %0:v[5] = ds_read_b32 %0:v[0] |
| //~gfx9! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //~gfx10! s_waitcnt lgkmcnt(1) vmcnt(0) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.flat(aco_opcode::flat_load_dword, dest0, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.ds(aco_opcode::ds_read_b32, dest1, offset); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 4 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //~gfx9! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //~gfx10! s_waitcnt lgkmcnt(1) vmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); |
| bld.flat(aco_opcode::flat_load_dword, dest0, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 5 |
| //! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef |
| //! s_waitcnt vmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.global(aco_opcode::global_load_dword, dest0, addr, Operand(s1)); |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 6 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef |
| //! v1: %0:v[5] = global_load_dword %0:v[0-1], s1: undef |
| //! s_waitcnt vmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); |
| bld.flat(aco_opcode::flat_load_dword, dest0, addr, Operand(s1)); |
| bld.global(aco_opcode::global_load_dword, dest1, addr, Operand(s1)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 7 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef |
| //! s_waitcnt vmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7)); |
| bld.flat(aco_opcode::flat_load_dword, dest0, addr, Operand(s1)); |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1)); |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| //>> p_unit_test 8 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //~gfx9! s_waitcnt vmcnt(0) |
| //~gfx10! s_waitcnt vmcnt(1) |
| //! p_unit_test %0:v[4] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8)); |
| bld.flat(aco_opcode::flat_load_dword, dest0, addr, Operand(s1)); |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.pseudo(aco_opcode::p_unit_test, Operand(dest0.physReg(), v1)); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.flat.waw) |
| for (amd_gfx_level gfx : {GFX9, GFX10}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| /* Flat might use either LDS or VMEM, so WaW always needs a wait. */ |
| Definition dest(PhysReg(260), v1); |
| Operand offset(PhysReg(256), v1); |
| Operand addr(PhysReg(256), v2); |
| Operand desc_s4(PhysReg(0), s4); |
| Operand desc_s8(PhysReg(8), s8); |
| |
| //>> p_unit_test 0 |
| //! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); |
| bld.global(aco_opcode::global_load_dword, dest, addr, Operand(s1)); |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| |
| //>> p_unit_test 1 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.global(aco_opcode::global_load_dword, dest, addr, Operand(s1)); |
| |
| //>> p_unit_test 2 |
| //! v1: %0:v[4] = ds_read_b32 %0:v[0] |
| //! s_waitcnt lgkmcnt(0) |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.ds(aco_opcode::ds_read_b32, dest, offset); |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| |
| //>> p_unit_test 3 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = ds_read_b32 %0:v[0] |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.ds(aco_opcode::ds_read_b32, dest, offset); |
| |
| /* In theory, we don't need a wait here, but we don't optimize this. */ |
| //>> p_unit_test 4 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| |
| //>> p_unit_test 5 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.mimg(aco_opcode::image_sample, dest, desc_s8, desc_s4, Operand(v1), offset); |
| |
| //>> p_unit_test 6 |
| //! v1: %0:v[4] = flat_load_dword %0:v[0-1], s1: undef |
| //! s_waitcnt vmcnt(0) |
| //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); |
| bld.flat(aco_opcode::flat_load_dword, dest, addr, Operand(s1)); |
| bld.mimg(aco_opcode::image_sample, dest, desc_s8, desc_s4, Operand(v1), offset); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |
| |
| BEGIN_TEST(insert_waitcnt.flat.barrier) |
| for (amd_gfx_level gfx : {GFX9, GFX10}) { |
| if (!setup_cs(NULL, gfx)) |
| continue; |
| |
| Definition dest0(PhysReg(260), v1); |
| Definition dest1(PhysReg(261), v1); |
| Operand addr(PhysReg(256), v2); |
| Operand data(PhysReg(256), v1); |
| |
| //>> p_unit_test 0 |
| //! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef storage:buffer |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef may_use_lds |
| //~gfx9! s_waitcnt vmcnt(0) |
| //~gfx10! s_waitcnt vmcnt(1) |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); |
| bld.global(aco_opcode::global_load_dword, dest0, addr, Operand(s1), 0, |
| memory_sync_info(storage_buffer)); |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1)).instr->flat().may_use_lds = true; |
| bld.barrier(aco_opcode::p_barrier, |
| memory_sync_info(storage_buffer, semantic_acqrel, scope_device)); |
| |
| //>> p_unit_test 1 |
| //! v1: %0:v[5] = flat_load_dword %0:v[0-1], s1: undef may_use_lds storage:buffer |
| //! v1: %0:v[4] = global_load_dword %0:v[0-1], s1: undef |
| //~gfx9! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //~gfx10! s_waitcnt lgkmcnt(0) vmcnt(1) |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); |
| bld.flat(aco_opcode::flat_load_dword, dest1, addr, Operand(s1), 0, |
| memory_sync_info(storage_buffer)).instr->flat().may_use_lds = true; |
| bld.global(aco_opcode::global_load_dword, dest0, addr, Operand(s1), 0); |
| bld.barrier(aco_opcode::p_barrier, |
| memory_sync_info(storage_buffer, semantic_acqrel, scope_device)); |
| |
| //>> p_unit_test 2 |
| //! flat_store_dword %0:v[0-1], s1: undef, %0:v[0] may_use_lds storage:buffer |
| //~gfx9! s_waitcnt lgkmcnt(0) vmcnt(0) |
| //~gfx10! s_waitcnt_vscnt %0:null imm:0 |
| //~gfx10! s_waitcnt lgkmcnt(0) |
| bld.reset(program->create_and_insert_block()); |
| bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); |
| bld.flat(aco_opcode::flat_store_dword, addr, Operand(s1), data, 0, |
| memory_sync_info(storage_buffer)).instr->flat().may_use_lds = true; |
| bld.barrier(aco_opcode::p_barrier, |
| memory_sync_info(storage_buffer, semantic_acqrel, scope_device)); |
| |
| finish_waitcnt_test(); |
| } |
| END_TEST |