blob: af0579c2a5b161d3848c8ef44117ec4f43ee4f15 [file] [log] [blame]
/*
* Copyright © 2019 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <fcntl.h>
#include <string.h>
#include <xf86drm.h>
#include <sys/mman.h>
#include <gtest/gtest.h>
#include "c99_compat.h"
#include "common/xe/intel_engine.h"
#include "common/intel_gem.h"
#include "dev/intel_debug.h"
#include "dev/intel_device_info.h"
#include "dev/intel_kmd.h"
#include "intel_gem.h"
#include "isl/isl.h"
#include "drm-uapi/i915_drm.h"
#include "drm-uapi/xe_drm.h"
#include "genxml/gen_macros.h"
#include "util/macros.h"
class mi_builder_test;
struct address {
uint32_t gem_handle;
uint32_t offset;
};
#define __gen_address_type struct address
#define __gen_user_data ::mi_builder_test
uint64_t __gen_combine_address(mi_builder_test *test, void *location,
struct address addr, uint32_t delta);
void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
struct address __gen_get_batch_address(mi_builder_test *test,
void *location);
bool *__gen_get_write_fencing_status(mi_builder_test *test);
struct address
__gen_address_offset(address addr, uint64_t offset)
{
addr.offset += offset;
return addr;
}
#if GFX_VERx10 >= 75
#define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
#else
#define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
#endif
#define MI_BUILDER_NUM_ALLOC_GPRS 15
#define INPUT_DATA_OFFSET 0
#define OUTPUT_DATA_OFFSET 2048
#define MI_BUILDER_CAN_WRITE_BATCH GFX_VER >= 8
#define __genxml_cmd_length(cmd) cmd ## _length
#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
#define __genxml_cmd_header(cmd) cmd ## _header
#define __genxml_cmd_pack(cmd) cmd ## _pack
#include "genxml/genX_pack.h"
#include "mi_builder.h"
#define emit_cmd(cmd, name) \
for (struct cmd name = { __genxml_cmd_header(cmd) }, \
*_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
__builtin_expect(_dst != NULL, 1); \
__genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
#include <vector>
class mi_builder_test : public ::testing::Test {
public:
void SetUp() override;
void TearDown() override;
void *emit_dwords(int num_dwords);
void submit_batch();
inline address in_addr(uint32_t offset)
{
address addr;
addr.gem_handle = data_bo_handle;
addr.offset = INPUT_DATA_OFFSET + offset;
return addr;
}
inline address out_addr(uint32_t offset)
{
address addr;
addr.gem_handle = data_bo_handle;
addr.offset = OUTPUT_DATA_OFFSET + offset;
return addr;
}
inline mi_value in_mem64(uint32_t offset)
{
return mi_mem64(in_addr(offset));
}
inline mi_value in_mem32(uint32_t offset)
{
return mi_mem32(in_addr(offset));
}
inline mi_value out_mem64(uint32_t offset)
{
return mi_mem64(out_addr(offset));
}
inline mi_value out_mem32(uint32_t offset)
{
return mi_mem32(out_addr(offset));
}
int fd = -1;
intel_device_info devinfo;
uint32_t batch_bo_handle = 0;
uint64_t batch_bo_addr;
uint32_t batch_offset;
void *batch_map = NULL;
struct {
uint32_t vm_id = 0;
uint32_t queue_id = 0;
} xe;
struct {
uint32_t ctx_id = 0;
#if GFX_VER < 8
std::vector<drm_i915_gem_relocation_entry> relocs;
#endif
} i915;
uint32_t data_bo_handle = 0;
uint64_t data_bo_addr;
void *data_map = NULL;
char *input;
char *output;
uint64_t canary;
bool write_fence_status;
mi_builder b;
};
// 1 MB of batch should be enough for anyone, right?
#define BATCH_BO_SIZE (256 * 4096)
#define DATA_BO_SIZE 4096
void
mi_builder_test::SetUp()
{
drmDevicePtr devices[8];
int max_devices = drmGetDevices2(0, devices, 8);
ASSERT_GT(max_devices, 0);
int i;
for (i = 0; i < max_devices; i++) {
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
devices[i]->bustype == DRM_BUS_PCI &&
devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
if (fd < 0)
continue;
if (intel_get_kmd_type(fd) == INTEL_KMD_TYPE_I915) {
/* We don't really need to do this when running on hardware because
* we can just pull it from the drmDevice. However, without doing
* this, intel_dump_gpu gets a bit of heartburn and we can't use the
* --device option with it.
*/
int device_id;
ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_CHIPSET_ID, &device_id))
<< strerror(errno);
}
ASSERT_TRUE(intel_get_device_info_from_fd(fd, &devinfo, -1, -1));
if (devinfo.ver != GFX_VER ||
(devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
close(fd);
fd = -1;
continue;
}
/* Found a device! */
break;
}
}
drmFreeDevices(devices, max_devices);
ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
drmFreeDevices(devices, max_devices);
if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
ASSERT_TRUE(intel_gem_create_context(fd, &i915.ctx_id)) << strerror(errno);
if (GFX_VER >= 8) {
/* On gfx8+, we require softpin */
int has_softpin;
ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &has_softpin))
<< strerror(errno);
ASSERT_TRUE(has_softpin);
}
// Create the batch buffer
drm_i915_gem_create gem_create = drm_i915_gem_create();
gem_create.size = BATCH_BO_SIZE;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
(void *)&gem_create), 0) << strerror(errno);
batch_bo_handle = gem_create.handle;
#if GFX_VER >= 8
batch_bo_addr = 0xffffffffdff70000ULL;
#endif
if (devinfo.has_caching_uapi) {
drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
gem_caching.handle = batch_bo_handle;
gem_caching.caching = I915_CACHING_CACHED;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
(void *)&gem_caching), 0) << strerror(errno);
}
if (devinfo.has_mmap_offset) {
drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
gem_mmap_offset.handle = batch_bo_handle;
gem_mmap_offset.flags = devinfo.has_local_mem ?
I915_MMAP_OFFSET_FIXED :
I915_MMAP_OFFSET_WC;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
&gem_mmap_offset), 0) << strerror(errno);
batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, gem_mmap_offset.offset);
ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
} else {
drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
gem_mmap.handle = batch_bo_handle;
gem_mmap.offset = 0;
gem_mmap.size = BATCH_BO_SIZE;
gem_mmap.flags = 0;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
(void *)&gem_mmap), 0) << strerror(errno);
batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
}
// Create the data buffer
gem_create = drm_i915_gem_create();
gem_create.size = DATA_BO_SIZE;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
(void *)&gem_create), 0) << strerror(errno);
data_bo_handle = gem_create.handle;
#if GFX_VER >= 8
data_bo_addr = 0xffffffffefff0000ULL;
#endif
if (devinfo.has_caching_uapi) {
drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
gem_caching.handle = data_bo_handle;
gem_caching.caching = I915_CACHING_CACHED;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
(void *)&gem_caching), 0) << strerror(errno);
}
if (devinfo.has_mmap_offset) {
drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
gem_mmap_offset.handle = data_bo_handle;
gem_mmap_offset.flags = devinfo.has_local_mem ?
I915_MMAP_OFFSET_FIXED :
I915_MMAP_OFFSET_WC;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
&gem_mmap_offset), 0) << strerror(errno);
data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
fd, gem_mmap_offset.offset);
ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
} else {
drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
gem_mmap.handle = data_bo_handle;
gem_mmap.offset = 0;
gem_mmap.size = DATA_BO_SIZE;
gem_mmap.flags = 0;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
(void *)&gem_mmap), 0) << strerror(errno);
data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
}
} else {
assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
int err;
struct drm_xe_vm_create create = {
.flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_VM_CREATE, &create);
ASSERT_EQ(err, 0) << strerror(err);
xe.vm_id = create.vm_id;
struct drm_xe_engine_class_instance instance = {};
struct intel_query_engine_info *engines_info = xe_engine_get_info(fd);
assert(engines_info);
bool found_engine = false;
for (uint32_t i = 0; i < engines_info->num_engines; i++) {
struct intel_engine_class_instance *e = &engines_info->engines[i];
if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
instance.engine_instance = e->engine_instance;
instance.gt_id = e->gt_id;
found_engine = true;
break;
}
}
free(engines_info);
ASSERT_TRUE(found_engine);
struct drm_xe_exec_queue_create queue_create = {
.width = 1,
.num_placements = 1,
.vm_id = xe.vm_id,
.instances = (uintptr_t)&instance,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
ASSERT_EQ(err, 0) << strerror(err);
xe.queue_id = queue_create.exec_queue_id;
// Create the batch buffer.
{
struct drm_xe_gem_create gem_create = {
.size = BATCH_BO_SIZE,
.placement = 1u << devinfo.mem.sram.mem.instance,
.cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
ASSERT_EQ(err, 0) << strerror(err);
batch_bo_handle = gem_create.handle;
batch_bo_addr = 0x10000000;
struct drm_xe_gem_mmap_offset mm = {
.handle = batch_bo_handle,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
ASSERT_EQ(err, 0) << strerror(err);
batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, mm.offset);
ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
}
// Create the data buffer.
{
struct drm_xe_gem_create gem_create = {
.size = DATA_BO_SIZE,
.placement = 1u << devinfo.mem.sram.mem.instance,
.cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
ASSERT_EQ(err, 0) << strerror(err);
data_bo_handle = gem_create.handle;
data_bo_addr = 0x20000000;
struct drm_xe_gem_mmap_offset mm = {
.handle = data_bo_handle,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
ASSERT_EQ(err, 0) << strerror(err);
data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, mm.offset);
ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
}
}
// Start the batch at zero
batch_offset = 0;
input = (char *)data_map + INPUT_DATA_OFFSET;
output = (char *)data_map + OUTPUT_DATA_OFFSET;
// Fill the test data with garbage
memset(data_map, 139, DATA_BO_SIZE);
memset(&canary, 139, sizeof(canary));
write_fence_status = false;
struct isl_device isl_dev;
isl_device_init(&isl_dev, &devinfo);
mi_builder_init(&b, &devinfo, this);
const uint32_t mocs = isl_mocs(&isl_dev, 0, false);
mi_builder_set_mocs(&b, mocs);
}
void
mi_builder_test::TearDown()
{
int err;
if (data_map) {
err = munmap(data_map, DATA_BO_SIZE);
EXPECT_EQ(err, 0) << "unmap data bo failed";
}
if (data_bo_handle) {
struct drm_gem_close gem_close = { .handle = data_bo_handle };
err = intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
EXPECT_EQ(err, 0) << "close data bo failed";
}
if (batch_map) {
err = munmap(batch_map, BATCH_BO_SIZE);
EXPECT_EQ(err, 0) << "unmmap batch bo failed";
}
if (batch_bo_handle) {
struct drm_gem_close gem_close = { .handle = batch_bo_handle };
err = intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
EXPECT_EQ(err, 0) << "close batch bo failed";
}
if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
if (i915.ctx_id) {
struct drm_i915_gem_context_destroy destroy = {
.ctx_id = i915.ctx_id,
};
err = intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
EXPECT_EQ(err, 0) << "context destroy failed";
}
} else {
assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
if (xe.queue_id) {
struct drm_xe_exec_queue_destroy queue_destroy = {
.exec_queue_id = xe.queue_id,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
EXPECT_EQ(err, 0) << "queue_destroy failure";
}
if (xe.vm_id) {
struct drm_xe_vm_destroy destroy = {
.vm_id = xe.vm_id,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
EXPECT_EQ(err, 0) << "vm_destroy failure";
}
}
if (fd != -1)
close(fd);
}
void *
mi_builder_test::emit_dwords(int num_dwords)
{
void *ptr = (void *)((char *)batch_map + batch_offset);
batch_offset += num_dwords * 4;
assert(batch_offset < BATCH_BO_SIZE);
return ptr;
}
void
mi_builder_test::submit_batch()
{
mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
// Round batch up to an even number of dwords.
if (batch_offset & 4)
mi_builder_emit(&b, GENX(MI_NOOP), noop);
if (devinfo.kmd_type == INTEL_KMD_TYPE_I915) {
drm_i915_gem_exec_object2 objects[2];
memset(objects, 0, sizeof(objects));
objects[0].handle = data_bo_handle;
objects[0].relocation_count = 0;
objects[0].relocs_ptr = 0;
#if GFX_VER >= 8 /* On gfx8+, we pin everything */
objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
EXEC_OBJECT_PINNED |
EXEC_OBJECT_WRITE;
objects[0].offset = data_bo_addr;
#else
objects[0].flags = EXEC_OBJECT_WRITE;
objects[0].offset = -1;
#endif
objects[1].handle = batch_bo_handle;
#if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
objects[1].relocation_count = 0;
objects[1].relocs_ptr = 0;
objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
EXEC_OBJECT_PINNED;
objects[1].offset = batch_bo_addr;
#else
objects[1].relocation_count = i915.relocs.size();
objects[1].relocs_ptr = (uintptr_t)(void *)&i915.relocs[0];
objects[1].flags = 0;
objects[1].offset = -1;
#endif
drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
execbuf.buffers_ptr = (uintptr_t)(void *)objects;
execbuf.buffer_count = 2;
execbuf.batch_start_offset = 0;
execbuf.batch_len = batch_offset;
execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
execbuf.rsvd1 = i915.ctx_id;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
(void *)&execbuf), 0) << strerror(errno);
drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
gem_wait.bo_handle = batch_bo_handle;
gem_wait.timeout_ns = INT64_MAX;
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
(void *)&gem_wait), 0) << strerror(errno);
} else {
assert(devinfo.kmd_type == INTEL_KMD_TYPE_XE);
int err;
uint32_t sync_handles[2] = {};
for (int i = 0; i < 2; i++) {
struct drm_syncobj_create sync_create = {};
err = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
ASSERT_EQ(err, 0) << strerror(err);
sync_handles[i] = sync_create.handle;
}
struct drm_xe_vm_bind_op bind_ops[] = {
{
.obj = batch_bo_handle,
.pat_index = devinfo.pat.cached_coherent.index,
.range = BATCH_BO_SIZE,
.addr = batch_bo_addr,
.op = DRM_XE_VM_BIND_OP_MAP,
.flags = DRM_XE_VM_BIND_FLAG_READONLY,
},
{
.obj = data_bo_handle,
.pat_index = devinfo.pat.cached_coherent.index,
.range = DATA_BO_SIZE,
.addr = data_bo_addr,
.op = DRM_XE_VM_BIND_OP_MAP,
},
};
struct drm_xe_sync bind_syncs[] = {
{
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
.addr = 0,
},
};
bind_syncs[0].handle = sync_handles[0];
struct drm_xe_vm_bind bind = {
.vm_id = xe.vm_id,
.num_binds = ARRAY_SIZE(bind_ops),
.vector_of_binds = (uintptr_t)bind_ops,
.num_syncs = 1,
.syncs = (uintptr_t)bind_syncs,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
ASSERT_EQ(err, 0) << strerror(err);
struct drm_xe_sync exec_syncs[] = {
{
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
.addr = 0,
},
{
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
.addr = 0,
}
};
exec_syncs[0].handle = sync_handles[0];
exec_syncs[1].handle = sync_handles[1];
struct drm_xe_exec exec = {
.exec_queue_id = xe.queue_id,
.num_syncs = 2,
.syncs = (uintptr_t)exec_syncs,
.address = batch_bo_addr,
.num_batch_buffer = 1,
};
err = intel_ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
ASSERT_EQ(err, 0) << strerror(err);
struct drm_syncobj_wait wait = {
.handles = (uintptr_t)&sync_handles[1],
.timeout_nsec = INT64_MAX,
.count_handles = 1,
};
err = intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
ASSERT_EQ(err, 0) << strerror(err);
}
}
uint64_t
__gen_combine_address(mi_builder_test *test, void *location,
address addr, uint32_t delta)
{
#if GFX_VER >= 8
uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
test->data_bo_addr : test->batch_bo_addr;
return addr_u64 + addr.offset + delta;
#else
assert(test->devinfo.kmd_type == INTEL_KMD_TYPE_I915);
drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
reloc.delta = addr.offset + delta;
reloc.offset = (char *)location - (char *)test->batch_map;
reloc.presumed_offset = -1;
test->i915.relocs.push_back(reloc);
return reloc.delta;
#endif
}
bool *
__gen_get_write_fencing_status(mi_builder_test *test)
{
return &test->write_fence_status;
}
void *
__gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
{
return test->emit_dwords(num_dwords);
}
struct address
__gen_get_batch_address(mi_builder_test *test, void *location)
{
assert(location >= test->batch_map);
size_t offset = (char *)location - (char *)test->batch_map;
assert(offset < BATCH_BO_SIZE);
assert(offset <= UINT32_MAX);
return (struct address) {
.gem_handle = test->batch_bo_handle,
.offset = (uint32_t)offset,
};
}
#include "genxml/genX_pack.h"
#include "mi_builder.h"
TEST_F(mi_builder_test, imm_mem)
{
const uint64_t value = 0x0123456789abcdef;
mi_store(&b, out_mem64(0), mi_imm(value));
mi_store(&b, out_mem32(8), mi_imm(value));
submit_batch();
// 64 -> 64
EXPECT_EQ(*(uint64_t *)(output + 0), value);
// 64 -> 32
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
}
/* mem -> mem copies are only supported on HSW+ */
#if GFX_VERx10 >= 75
TEST_F(mi_builder_test, mem_mem)
{
const uint64_t value = 0x0123456789abcdef;
*(uint64_t *)input = value;
mi_store(&b, out_mem64(0), in_mem64(0));
mi_store(&b, out_mem32(8), in_mem64(0));
mi_store(&b, out_mem32(16), in_mem32(0));
mi_store(&b, out_mem64(24), in_mem32(0));
submit_batch();
// 64 -> 64
EXPECT_EQ(*(uint64_t *)(output + 0), value);
// 64 -> 32
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
// 32 -> 32
EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
// 32 -> 64
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
}
#endif
TEST_F(mi_builder_test, imm_reg)
{
const uint64_t value = 0x0123456789abcdef;
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
submit_batch();
// 64 -> 64
EXPECT_EQ(*(uint64_t *)(output + 0), value);
// 64 -> 32
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
}
TEST_F(mi_builder_test, mem_reg)
{
const uint64_t value = 0x0123456789abcdef;
*(uint64_t *)input = value;
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
submit_batch();
// 64 -> 64
EXPECT_EQ(*(uint64_t *)(output + 0), value);
// 64 -> 32
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
// 32 -> 32
EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
// 32 -> 64
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
}
TEST_F(mi_builder_test, memset)
{
const unsigned memset_size = 256;
mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
submit_batch();
uint32_t *out_u32 = (uint32_t *)output;
for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
EXPECT_EQ(out_u32[i], 0xdeadbeef);
}
TEST_F(mi_builder_test, memcpy)
{
const unsigned memcpy_size = 256;
uint8_t *in_u8 = (uint8_t *)input;
for (unsigned i = 0; i < memcpy_size; i++)
in_u8[i] = i;
mi_memcpy(&b, out_addr(0), in_addr(0), 256);
submit_batch();
uint8_t *out_u8 = (uint8_t *)output;
for (unsigned i = 0; i < memcpy_size; i++)
EXPECT_EQ(out_u8[i], i);
}
/* Start of MI_MATH section */
#if GFX_VERx10 >= 75
#define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
TEST_F(mi_builder_test, inot)
{
const uint64_t value = 0x0123456789abcdef;
const uint32_t value_lo = (uint32_t)value;
const uint32_t value_hi = (uint32_t)(value >> 32);
memcpy(input, &value, sizeof(value));
mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0)));
mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0))));
mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), ~value);
EXPECT_EQ(*(uint64_t *)(output + 8), value);
EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
}
/* Test adding of immediates of all kinds including
*
* - All zeroes
* - All ones
* - inverted constants
*/
TEST_F(mi_builder_test, add_imm)
{
const uint64_t value = 0x0123456789abcdef;
const uint64_t add = 0xdeadbeefac0ffee2;
memcpy(input, &value, sizeof(value));
mi_store(&b, out_mem64(0),
mi_iadd(&b, in_mem64(0), mi_imm(0)));
mi_store(&b, out_mem64(8),
mi_iadd(&b, in_mem64(0), mi_imm(-1)));
mi_store(&b, out_mem64(16),
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
mi_store(&b, out_mem64(24),
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
mi_store(&b, out_mem64(32),
mi_iadd(&b, in_mem64(0), mi_imm(add)));
mi_store(&b, out_mem64(40),
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
mi_store(&b, out_mem64(48),
mi_iadd(&b, mi_imm(0), in_mem64(0)));
mi_store(&b, out_mem64(56),
mi_iadd(&b, mi_imm(-1), in_mem64(0)));
mi_store(&b, out_mem64(64),
mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
mi_store(&b, out_mem64(72),
mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
mi_store(&b, out_mem64(80),
mi_iadd(&b, mi_imm(add), in_mem64(0)));
mi_store(&b, out_mem64(88),
mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
// And some add_imm just for good measure
mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), value);
EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);
EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);
EXPECT_EQ(*(uint64_t *)(output + 24), value);
EXPECT_EQ(*(uint64_t *)(output + 32), value + add);
EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);
EXPECT_EQ(*(uint64_t *)(output + 48), value);
EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);
EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);
EXPECT_EQ(*(uint64_t *)(output + 72), value);
EXPECT_EQ(*(uint64_t *)(output + 80), value + add);
EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);
EXPECT_EQ(*(uint64_t *)(output + 96), value);
EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
}
TEST_F(mi_builder_test, ult_uge_ieq_ine)
{
uint64_t values[8] = {
0x0123456789abcdef,
0xdeadbeefac0ffee2,
(uint64_t)-1,
1,
0,
1049571,
(uint64_t)-240058,
20204184,
};
memcpy(input, values, sizeof(values));
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
}
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
mi_imm(values[j])));
EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
mi_imm(values[j])));
EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
mi_imm(values[j])));
EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
mi_imm(values[j])));
}
}
}
TEST_F(mi_builder_test, z_nz)
{
uint64_t values[8] = {
0,
1,
UINT32_MAX,
UINT32_MAX + 1,
UINT64_MAX,
};
memcpy(input, values, sizeof(values));
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
uint64_t *out_u64 = (uint64_t *)(output + i * 16);
EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
}
}
TEST_F(mi_builder_test, iand)
{
const uint64_t values[2] = {
0x0123456789abcdef,
0xdeadbeefac0ffee2,
};
memcpy(input, values, sizeof(values));
mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
submit_batch();
EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
mi_imm(values[1])));
}
#if GFX_VER >= 8
TEST_F(mi_builder_test, imm_mem_relocated)
{
const uint64_t value = 0x0123456789abcdef;
struct mi_reloc_imm_token r0 = mi_store_relocated_imm(&b, out_mem64(0));
struct mi_reloc_imm_token r1 = mi_store_relocated_imm(&b, out_mem32(8));
mi_relocate_store_imm(r0, value);
mi_relocate_store_imm(r1, value);
submit_batch();
// 64 -> 64
EXPECT_EQ(*(uint64_t *)(output + 0), value);
// 64 -> 32
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
}
TEST_F(mi_builder_test, imm_reg_relocated)
{
const uint64_t value = 0x0123456789abcdef;
struct mi_reloc_imm_token r0, r1;
r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
r1 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
mi_relocate_store_imm(r0, canary);
mi_relocate_store_imm(r1, value);
r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
r1 = mi_store_relocated_imm(&b, mi_reg32(RSVD_TEMP_REG));
mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
mi_relocate_store_imm(r0, canary);
mi_relocate_store_imm(r1, value);
submit_batch();
// 64 -> 64
EXPECT_EQ(*(uint64_t *)(output + 0), value);
// 64 -> 32
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
}
#endif // GFX_VER >= 8
#if GFX_VERx10 >= 125
TEST_F(mi_builder_test, ishl)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
memcpy(input + 8, shifts, sizeof(shifts));
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
mi_store(&b, out_mem64(i * 8),
mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
}
}
TEST_F(mi_builder_test, ushr)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
memcpy(input + 8, shifts, sizeof(shifts));
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
mi_store(&b, out_mem64(i * 8),
mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
}
}
TEST_F(mi_builder_test, ushr_imm)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
const unsigned max_shift = 64;
for (unsigned i = 0; i <= max_shift; i++)
mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
submit_batch();
for (unsigned i = 0; i <= max_shift; i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
mi_ushr_imm(&b, mi_imm(value), i));
}
}
TEST_F(mi_builder_test, ishr)
{
const uint64_t values[] = {
0x0123456789abcdef,
0xfedcba9876543210,
};
memcpy(input, values, sizeof(values));
uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
memcpy(input + 16, shifts, sizeof(shifts));
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
mi_store(&b, out_mem64(i * 8 + j * 16),
mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
}
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
}
}
}
TEST_F(mi_builder_test, ishr_imm)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
const unsigned max_shift = 64;
for (unsigned i = 0; i <= max_shift; i++)
mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
submit_batch();
for (unsigned i = 0; i <= max_shift; i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
mi_ishr_imm(&b, mi_imm(value), i));
}
}
#endif /* if GFX_VERx10 >= 125 */
TEST_F(mi_builder_test, imul_imm)
{
uint64_t lhs[2] = {
0x0123456789abcdef,
0xdeadbeefac0ffee2,
};
memcpy(input, lhs, sizeof(lhs));
/* Some random 32-bit unsigned integers. The first four have been
* hand-chosen just to ensure some good low integers; the rest were
* generated with a python script.
*/
uint32_t rhs[20] = {
1, 2, 3, 5,
10800, 193, 64, 40,
3796, 256, 88, 473,
1421, 706, 175, 850,
39, 38985, 1941, 17,
};
for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
mi_store(&b, out_mem64(i * 160 + j * 8),
mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
}
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
}
}
}
TEST_F(mi_builder_test, ishl_imm)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
const unsigned max_shift = 64;
for (unsigned i = 0; i <= max_shift; i++)
mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
submit_batch();
for (unsigned i = 0; i <= max_shift; i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
mi_ishl_imm(&b, mi_imm(value), i));
}
}
TEST_F(mi_builder_test, ushr32_imm)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
const unsigned max_shift = 64;
for (unsigned i = 0; i <= max_shift; i++)
mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
submit_batch();
for (unsigned i = 0; i <= max_shift; i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
mi_ushr32_imm(&b, mi_imm(value), i));
}
}
TEST_F(mi_builder_test, udiv32_imm)
{
/* Some random 32-bit unsigned integers. The first four have been
* hand-chosen just to ensure some good low integers; the rest were
* generated with a python script.
*/
uint32_t values[20] = {
1, 2, 3, 5,
10800, 193, 64, 40,
3796, 256, 88, 473,
1421, 706, 175, 850,
39, 38985, 1941, 17,
};
memcpy(input, values, sizeof(values));
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
mi_store(&b, out_mem32(i * 80 + j * 4),
mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
}
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
}
}
}
TEST_F(mi_builder_test, store_if)
{
uint64_t u64 = 0xb453b411deadc0deull;
uint32_t u32 = 0x1337d00d;
/* Write values with the predicate enabled */
emit_cmd(GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_TRUE;
}
mi_store_if(&b, out_mem64(0), mi_imm(u64));
mi_store_if(&b, out_mem32(8), mi_imm(u32));
/* Set predicate to false, write garbage that shouldn't land */
emit_cmd(GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_FALSE;
}
mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), u64);
EXPECT_EQ(*(uint32_t *)(output + 8), u32);
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
}
#endif /* GFX_VERx10 >= 75 */
#if GFX_VERx10 >= 125
/*
* Indirect load/store tests. Only available on XE_HP+
*/
TEST_F(mi_builder_test, load_mem64_offset)
{
uint64_t values[8] = {
0x0123456789abcdef,
0xdeadbeefac0ffee2,
(uint64_t)-1,
1,
0,
1049571,
(uint64_t)-240058,
20204184,
};
memcpy(input, values, sizeof(values));
uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
memcpy(input + 64, offsets, sizeof(offsets));
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
mi_store(&b, out_mem64(i * 8),
mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
}
TEST_F(mi_builder_test, store_mem64_offset)
{
uint64_t values[8] = {
0x0123456789abcdef,
0xdeadbeefac0ffee2,
(uint64_t)-1,
1,
0,
1049571,
(uint64_t)-240058,
20204184,
};
memcpy(input, values, sizeof(values));
uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
memcpy(input + 64, offsets, sizeof(offsets));
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
in_mem64(i * 8));
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
}
#endif /* GFX_VERx10 >= 125 */
#if GFX_VER >= 9
/*
* Control-flow tests. Only available on Gfx9+
*/
TEST_F(mi_builder_test, goto)
{
const uint64_t value = 0xb453b411deadc0deull;
mi_store(&b, out_mem64(0), mi_imm(value));
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
mi_goto(&b, &t);
/* This one should be skipped */
mi_store(&b, out_mem64(0), mi_imm(0));
mi_goto_target(&b, &t);
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), value);
}
#define MI_PREDICATE_RESULT 0x2418
TEST_F(mi_builder_test, goto_if)
{
const uint64_t values[] = {
0xb453b411deadc0deull,
0x0123456789abcdefull,
0,
};
mi_store(&b, out_mem64(0), mi_imm(values[0]));
emit_cmd(GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_FALSE;
}
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
mi_store(&b, out_mem64(0), mi_imm(values[1]));
emit_cmd(GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_TRUE;
}
mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
/* This one should be skipped */
mi_store(&b, out_mem64(0), mi_imm(values[2]));
mi_goto_target(&b, &t);
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
}
TEST_F(mi_builder_test, loop_simple)
{
const uint64_t loop_count = 8;
mi_store(&b, out_mem64(0), mi_imm(0));
mi_loop(&b) {
mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
}
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
}
TEST_F(mi_builder_test, loop_break)
{
mi_loop(&b) {
mi_store(&b, out_mem64(0), mi_imm(1));
mi_break_if(&b, mi_imm(0));
mi_store(&b, out_mem64(0), mi_imm(2));
mi_break(&b);
mi_store(&b, out_mem64(0), mi_imm(3));
}
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), 2);
}
TEST_F(mi_builder_test, loop_continue)
{
const uint64_t loop_count = 8;
mi_store(&b, out_mem64(0), mi_imm(0));
mi_store(&b, out_mem64(8), mi_imm(0));
mi_loop(&b) {
mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
mi_store(&b, out_mem64(8), mi_imm(5));
mi_continue(&b);
mi_store(&b, out_mem64(8), mi_imm(10));
}
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
EXPECT_EQ(*(uint64_t *)(output + 8), 5);
}
TEST_F(mi_builder_test, loop_continue_if)
{
const uint64_t loop_count = 8;
mi_store(&b, out_mem64(0), mi_imm(0));
mi_store(&b, out_mem64(8), mi_imm(0));
mi_loop(&b) {
mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
mi_store(&b, out_mem64(8), mi_imm(5));
emit_cmd(GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_FALSE;
}
mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
mi_store(&b, out_mem64(8), mi_imm(10));
emit_cmd(GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
mip.CompareOperation = COMPARE_TRUE;
}
mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
mi_store(&b, out_mem64(8), mi_imm(15));
}
submit_batch();
EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
EXPECT_EQ(*(uint64_t *)(output + 8), 10);
}
#endif /* GFX_VER >= 9 */