src/panfrost/genxml/decode_common.c - third_party/mesa - Git at Google

 /*
  * Copyright (C) 2019 Alyssa Rosenzweig
  * Copyright (C) 2017-2018 Lyude Paul
  * Copyright (C) 2019 Collabora, Ltd.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */

 #include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>

 #include "util/macros.h"
 #include "util/u_debug.h"
 #include "util/u_hexdump.h"
 #include "lib/pan_props.h"
 #include "decode.h"

 #include "compiler/bifrost/disassemble.h"
 #include "compiler/valhall/disassemble.h"
 #include "midgard/disassemble.h"

 /* Used to distiguish dumped files, otherwise we would have to print the ctx
  * pointer, which is annoying for the user since it changes with every run */
 static int num_ctxs = 0;

 #define to_mapped_memory(x)                                                    \
    rb_node_data(struct pandecode_mapped_memory, x, node)

 /*
  * Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
  * is contained in the interval the node represents. This lets us store
  * intervals in our tree.
  */
 static int
 pandecode_cmp_key(const struct rb_node *lhs, const void *key)
 {
    struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
    uint64_t *gpu_va = (uint64_t *)key;

    if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
       return 0;
    else
       return mem->gpu_va - *gpu_va;
 }

 static int
 pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
 {
    return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
 }

 static struct pandecode_mapped_memory *
 pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context *ctx,
                                             uint64_t addr)
 {
    simple_mtx_assert_locked(&ctx->lock);

    struct rb_node *node =
       rb_tree_search(&ctx->mmap_tree, &addr, pandecode_cmp_key);

    return to_mapped_memory(node);
 }

 struct pandecode_mapped_memory *
 pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx,
                                          uint64_t addr)
 {
    simple_mtx_assert_locked(&ctx->lock);

    struct pandecode_mapped_memory *mem =
       pandecode_find_mapped_gpu_mem_containing_rw(ctx, addr);

    if (mem && mem->addr && !mem->ro) {
       mprotect(mem->addr, mem->length, PROT_READ);
       mem->ro = true;
       util_dynarray_append(&ctx->ro_mappings, struct pandecode_mapped_memory *,
                            mem);
    }

    return mem;
 }

 /*
  * To check for memory safety issues, validates that the given pointer in GPU
  * memory is valid, containing at least sz bytes. This function is a tool to
  * detect GPU-side memory bugs by validating pointers.
  */
 void
 pandecode_validate_buffer(struct pandecode_context *ctx, uint64_t addr,
                           size_t sz)
 {
    if (!addr) {
       pandecode_log(ctx, "// XXX: null pointer deref\n");
       return;
    }

    /* Find a BO */

    struct pandecode_mapped_memory *bo =
       pandecode_find_mapped_gpu_mem_containing(ctx, addr);

    if (!bo) {
       pandecode_log(ctx, "// XXX: invalid memory dereference\n");
       return;
    }

    /* Bounds check */

    unsigned offset = addr - bo->gpu_va;
    unsigned total = offset + sz;

    if (total > bo->length) {
       pandecode_log(ctx,
                     "// XXX: buffer overrun. "
                     "Chunk of size %zu at offset %d in buffer of size %zu. "
                     "Overrun by %zu bytes. \n",
                     sz, offset, bo->length, total - bo->length);
       return;
    }
 }

 void
 pandecode_map_read_write(struct pandecode_context *ctx)
 {
    simple_mtx_assert_locked(&ctx->lock);

    util_dynarray_foreach(&ctx->ro_mappings, struct pandecode_mapped_memory *,
                          mem) {
       (*mem)->ro = false;
       mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
    }
    util_dynarray_clear(&ctx->ro_mappings);
 }

 static void
 pandecode_add_name(struct pandecode_context *ctx,
                    struct pandecode_mapped_memory *mem, uint64_t gpu_va,
                    const char *name)
 {
    simple_mtx_assert_locked(&ctx->lock);

    if (!name) {
       /* If we don't have a name, assign one */

       snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
    } else {
       assert((strlen(name) + 1) < sizeof(mem->name));
       memcpy(mem->name, name, strlen(name) + 1);
    }
 }

 void
 pandecode_inject_mmap(struct pandecode_context *ctx, uint64_t gpu_va, void *cpu,
                       unsigned sz, const char *name)
 {
    simple_mtx_lock(&ctx->lock);

    /* First, search if we already mapped this and are just updating an address */

    struct pandecode_mapped_memory *existing =
       pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);

    if (existing && existing->gpu_va == gpu_va) {
       existing->length = sz;
       existing->addr = cpu;
       pandecode_add_name(ctx, existing, gpu_va, name);
    } else {
       /* Otherwise, add a fresh mapping */
       struct pandecode_mapped_memory *mapped_mem = NULL;

       mapped_mem = calloc(1, sizeof(*mapped_mem));
       mapped_mem->gpu_va = gpu_va;
       mapped_mem->length = sz;
       mapped_mem->addr = cpu;
       pandecode_add_name(ctx, mapped_mem, gpu_va, name);

       /* Add it to the tree */
       rb_tree_insert(&ctx->mmap_tree, &mapped_mem->node, pandecode_cmp);
    }

    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
                       unsigned sz)
 {
    simple_mtx_lock(&ctx->lock);

    struct pandecode_mapped_memory *mem =
       pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);

    if (mem) {
       assert(mem->gpu_va == gpu_va);
       assert(mem->length == sz);

       rb_tree_remove(&ctx->mmap_tree, &mem->node);
       free(mem);
    }

    simple_mtx_unlock(&ctx->lock);
 }

 char *
 pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr)
 {
    simple_mtx_assert_locked(&ctx->lock);

    struct pandecode_mapped_memory *mapped;
    char *out = malloc(128);

    /* Try to find the corresponding mapped zone */

    mapped = pandecode_find_mapped_gpu_mem_containing_rw(ctx, ptr);

    if (mapped) {
       snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
       return out;
    }

    /* Just use the raw address if other options are exhausted */

    snprintf(out, 128, "0x%" PRIx64, ptr);
    return out;
 }

 void
 pandecode_dump_file_open(struct pandecode_context *ctx)
 {
    simple_mtx_assert_locked(&ctx->lock);

    /* This does a getenv every frame, so it is possible to use
     * setenv to change the base at runtime.
     */
    const char *dump_file_base =
       debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
    if (!strcmp(dump_file_base, "stderr"))
       ctx->dump_stream = stderr;
    else if (!ctx->dump_stream) {
       char buffer[1024];
       snprintf(buffer, sizeof(buffer), "%s.ctx-%d.%04d", dump_file_base,
                ctx->id, ctx->dump_frame_count);
       printf("pandecode: dump command stream to file %s\n", buffer);
       ctx->dump_stream = fopen(buffer, "w");
       if (!ctx->dump_stream)
          fprintf(stderr,
                  "pandecode: failed to open command stream log file %s\n",
                  buffer);
    }
 }

 static void
 pandecode_dump_file_close(struct pandecode_context *ctx)
 {
    simple_mtx_assert_locked(&ctx->lock);

    if (ctx->dump_stream && ctx->dump_stream != stderr) {
       if (fclose(ctx->dump_stream))
          perror("pandecode: dump file");

       ctx->dump_stream = NULL;
    }
 }

 struct pandecode_context *
 pandecode_create_context(bool to_stderr)
 {
    struct pandecode_context *ctx = calloc(1, sizeof(*ctx));

    /* Not thread safe, but we shouldn't ever hit this, and even if we do, the
     * worst that could happen is having the files dumped with their filenames
     * in a different order. */
    ctx->id = num_ctxs++;

    /* This will be initialized later and can be changed at run time through
     * the PANDECODE_DUMP_FILE environment variable.
     */
    ctx->dump_stream = to_stderr ? stderr : NULL;

    rb_tree_init(&ctx->mmap_tree);
    util_dynarray_init(&ctx->ro_mappings, NULL);

    simple_mtx_t mtx_init = SIMPLE_MTX_INITIALIZER;
    memcpy(&ctx->lock, &mtx_init, sizeof(simple_mtx_t));

    return ctx;
 }

 void
 pandecode_next_frame(struct pandecode_context *ctx)
 {
    simple_mtx_lock(&ctx->lock);

    pandecode_dump_file_close(ctx);
    ctx->dump_frame_count++;

    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_destroy_context(struct pandecode_context *ctx)
 {
    simple_mtx_lock(&ctx->lock);

    rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &ctx->mmap_tree,
                         node) {
       rb_tree_remove(&ctx->mmap_tree, &it->node);
       free(it);
    }

    util_dynarray_fini(&ctx->ro_mappings);
    pandecode_dump_file_close(ctx);

    simple_mtx_unlock(&ctx->lock);

    free(ctx);
 }

 void
 pandecode_dump_mappings(struct pandecode_context *ctx)
 {
    simple_mtx_lock(&ctx->lock);

    pandecode_dump_file_open(ctx);

    rb_tree_foreach(struct pandecode_mapped_memory, it, &ctx->mmap_tree, node) {
       if (!it->addr || !it->length)
          continue;

       fprintf(ctx->dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
               it->gpu_va);

       u_hexdump(ctx->dump_stream, it->addr, it->length, false);
       fprintf(ctx->dump_stream, "\n");
    }

    fflush(ctx->dump_stream);
    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
                          unsigned gpu_id)
 {
    simple_mtx_lock(&ctx->lock);

    switch (pan_arch(gpu_id)) {
    case 4:
       pandecode_abort_on_fault_v4(ctx, jc_gpu_va);
       break;
    case 5:
       pandecode_abort_on_fault_v5(ctx, jc_gpu_va);
       break;
    case 6:
       pandecode_abort_on_fault_v6(ctx, jc_gpu_va);
       break;
    case 7:
       pandecode_abort_on_fault_v7(ctx, jc_gpu_va);
       break;
    case 9:
       pandecode_abort_on_fault_v9(ctx, jc_gpu_va);
       break;
    default:
       unreachable("Unsupported architecture");
    }

    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, unsigned gpu_id)
 {
    simple_mtx_lock(&ctx->lock);

    switch (pan_arch(gpu_id)) {
    case 4:
       pandecode_jc_v4(ctx, jc_gpu_va, gpu_id);
       break;
    case 5:
       pandecode_jc_v5(ctx, jc_gpu_va, gpu_id);
       break;
    case 6:
       pandecode_jc_v6(ctx, jc_gpu_va, gpu_id);
       break;
    case 7:
       pandecode_jc_v7(ctx, jc_gpu_va, gpu_id);
       break;
    case 9:
       pandecode_jc_v9(ctx, jc_gpu_va, gpu_id);
       break;
    default:
       unreachable("Unsupported architecture");
    }

    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
                        uint32_t size, unsigned gpu_id, uint32_t *regs)
 {
    simple_mtx_lock(&ctx->lock);

    switch (pan_arch(gpu_id)) {
    case 10:
       pandecode_interpret_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs);
       break;
    case 12:
       pandecode_interpret_cs_v12(ctx, queue_gpu_va, size, gpu_id, regs);
       break;
    case 13:
       pandecode_interpret_cs_v13(ctx, queue_gpu_va, size, gpu_id, regs);
       break;
    default:
       unreachable("Unsupported architecture");
    }

    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
                    uint32_t size, unsigned gpu_id)
 {
    simple_mtx_lock(&ctx->lock);

    switch (pan_arch(gpu_id)) {
    case 10:
       pandecode_cs_binary_v10(ctx, bin_gpu_va, size, gpu_id);
       break;
    case 12:
       pandecode_cs_binary_v12(ctx, bin_gpu_va, size, gpu_id);
       break;
    case 13:
       pandecode_cs_binary_v13(ctx, bin_gpu_va, size, gpu_id);
       break;
    default:
       unreachable("Unsupported architecture");
    }

    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
                    uint32_t size, unsigned gpu_id)
 {
    simple_mtx_lock(&ctx->lock);

    switch (pan_arch(gpu_id)) {
    case 10:
       pandecode_cs_trace_v10(ctx, trace_gpu_va, size, gpu_id);
       break;
    case 12:
       pandecode_cs_trace_v12(ctx, trace_gpu_va, size, gpu_id);
       break;
    case 13:
       pandecode_cs_trace_v13(ctx, trace_gpu_va, size, gpu_id);
       break;
    default:
       unreachable("Unsupported architecture");
    }

    simple_mtx_unlock(&ctx->lock);
 }

 void
 pandecode_shader_disassemble(struct pandecode_context *ctx, uint64_t shader_ptr,
                              unsigned gpu_id)
 {
    uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);

    /* Compute maximum possible size */
    struct pandecode_mapped_memory *mem =
       pandecode_find_mapped_gpu_mem_containing(ctx, shader_ptr);
    size_t sz = mem->length - (shader_ptr - mem->gpu_va);

    /* Print some boilerplate to clearly denote the assembly (which doesn't
     * obey indentation rules), and actually do the disassembly! */

    pandecode_log_cont(ctx, "\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n",
                       code, shader_ptr, sz);

    if (pan_arch(gpu_id) >= 9) {
       disassemble_valhall(ctx->dump_stream, (const uint64_t *)code, sz, true);
    } else if (pan_arch(gpu_id) >= 6)
       disassemble_bifrost(ctx->dump_stream, code, sz, false);
    else
       disassemble_midgard(ctx->dump_stream, code, sz, gpu_id, true);

    pandecode_log_cont(ctx, "\n\n");
 }
	/*
	* Copyright (C) 2019 Alyssa Rosenzweig
	* Copyright (C) 2017-2018 Lyude Paul
	* Copyright (C) 2019 Collabora, Ltd.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/

	#include <assert.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <sys/mman.h>

	#include "util/macros.h"
	#include "util/u_debug.h"
	#include "util/u_hexdump.h"
	#include "lib/pan_props.h"
	#include "decode.h"

	#include "compiler/bifrost/disassemble.h"
	#include "compiler/valhall/disassemble.h"
	#include "midgard/disassemble.h"

	/* Used to distiguish dumped files, otherwise we would have to print the ctx
	* pointer, which is annoying for the user since it changes with every run */
	static int num_ctxs = 0;

	#define to_mapped_memory(x) \
	rb_node_data(struct pandecode_mapped_memory, x, node)

	/*
	* Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
	* is contained in the interval the node represents. This lets us store
	* intervals in our tree.
	*/
	static int
	pandecode_cmp_key(const struct rb_node lhs, const void key)
	{
	struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
	uint64_t gpu_va = (uint64_t )key;

	if (mem->gpu_va <= gpu_va && gpu_va < (mem->gpu_va + mem->length))
	return 0;
	else
	return mem->gpu_va - *gpu_va;
	}

	static int
	pandecode_cmp(const struct rb_node lhs, const struct rb_node rhs)
	{
	return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
	}

	static struct pandecode_mapped_memory *
	pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context *ctx,
	uint64_t addr)
	{
	simple_mtx_assert_locked(&ctx->lock);

	struct rb_node *node =
	rb_tree_search(&ctx->mmap_tree, &addr, pandecode_cmp_key);

	return to_mapped_memory(node);
	}

	struct pandecode_mapped_memory *
	pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx,
	uint64_t addr)
	{
	simple_mtx_assert_locked(&ctx->lock);

	struct pandecode_mapped_memory *mem =
	pandecode_find_mapped_gpu_mem_containing_rw(ctx, addr);

	if (mem && mem->addr && !mem->ro) {
	mprotect(mem->addr, mem->length, PROT_READ);
	mem->ro = true;
	util_dynarray_append(&ctx->ro_mappings, struct pandecode_mapped_memory *,
	mem);
	}

	return mem;
	}

	/*
	* To check for memory safety issues, validates that the given pointer in GPU
	* memory is valid, containing at least sz bytes. This function is a tool to
	* detect GPU-side memory bugs by validating pointers.
	*/
	void
	pandecode_validate_buffer(struct pandecode_context *ctx, uint64_t addr,
	size_t sz)
	{
	if (!addr) {
	pandecode_log(ctx, "// XXX: null pointer deref\n");
	return;
	}

	/* Find a BO */

	struct pandecode_mapped_memory *bo =
	pandecode_find_mapped_gpu_mem_containing(ctx, addr);

	if (!bo) {
	pandecode_log(ctx, "// XXX: invalid memory dereference\n");
	return;
	}

	/* Bounds check */

	unsigned offset = addr - bo->gpu_va;
	unsigned total = offset + sz;

	if (total > bo->length) {
	pandecode_log(ctx,
	"// XXX: buffer overrun. "
	"Chunk of size %zu at offset %d in buffer of size %zu. "
	"Overrun by %zu bytes. \n",
	sz, offset, bo->length, total - bo->length);
	return;
	}
	}

	void
	pandecode_map_read_write(struct pandecode_context *ctx)
	{
	simple_mtx_assert_locked(&ctx->lock);

	util_dynarray_foreach(&ctx->ro_mappings, struct pandecode_mapped_memory *,
	mem) {
	(*mem)->ro = false;
	mprotect((mem)->addr, (mem)->length, PROT_READ \| PROT_WRITE);
	}
	util_dynarray_clear(&ctx->ro_mappings);
	}

	static void
	pandecode_add_name(struct pandecode_context *ctx,
	struct pandecode_mapped_memory *mem, uint64_t gpu_va,
	const char *name)
	{
	simple_mtx_assert_locked(&ctx->lock);

	if (!name) {
	/* If we don't have a name, assign one */

	snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
	} else {
	assert((strlen(name) + 1) < sizeof(mem->name));
	memcpy(mem->name, name, strlen(name) + 1);
	}
	}

	void
	pandecode_inject_mmap(struct pandecode_context ctx, uint64_t gpu_va, void cpu,
	unsigned sz, const char *name)
	{
	simple_mtx_lock(&ctx->lock);

	/* First, search if we already mapped this and are just updating an address */

	struct pandecode_mapped_memory *existing =
	pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);

	if (existing && existing->gpu_va == gpu_va) {
	existing->length = sz;
	existing->addr = cpu;
	pandecode_add_name(ctx, existing, gpu_va, name);
	} else {
	/* Otherwise, add a fresh mapping */
	struct pandecode_mapped_memory *mapped_mem = NULL;

	mapped_mem = calloc(1, sizeof(*mapped_mem));
	mapped_mem->gpu_va = gpu_va;
	mapped_mem->length = sz;
	mapped_mem->addr = cpu;
	pandecode_add_name(ctx, mapped_mem, gpu_va, name);

	/* Add it to the tree */
	rb_tree_insert(&ctx->mmap_tree, &mapped_mem->node, pandecode_cmp);
	}

	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
	unsigned sz)
	{
	simple_mtx_lock(&ctx->lock);

	struct pandecode_mapped_memory *mem =
	pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);

	if (mem) {
	assert(mem->gpu_va == gpu_va);
	assert(mem->length == sz);

	rb_tree_remove(&ctx->mmap_tree, &mem->node);
	free(mem);
	}

	simple_mtx_unlock(&ctx->lock);
	}

	char *
	pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr)
	{
	simple_mtx_assert_locked(&ctx->lock);

	struct pandecode_mapped_memory *mapped;
	char *out = malloc(128);

	/* Try to find the corresponding mapped zone */

	mapped = pandecode_find_mapped_gpu_mem_containing_rw(ctx, ptr);

	if (mapped) {
	snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
	return out;
	}

	/* Just use the raw address if other options are exhausted */

	snprintf(out, 128, "0x%" PRIx64, ptr);
	return out;
	}

	void
	pandecode_dump_file_open(struct pandecode_context *ctx)
	{
	simple_mtx_assert_locked(&ctx->lock);

	/* This does a getenv every frame, so it is possible to use
	* setenv to change the base at runtime.
	*/
	const char *dump_file_base =
	debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
	if (!strcmp(dump_file_base, "stderr"))
	ctx->dump_stream = stderr;
	else if (!ctx->dump_stream) {
	char buffer[1024];
	snprintf(buffer, sizeof(buffer), "%s.ctx-%d.%04d", dump_file_base,
	ctx->id, ctx->dump_frame_count);
	printf("pandecode: dump command stream to file %s\n", buffer);
	ctx->dump_stream = fopen(buffer, "w");
	if (!ctx->dump_stream)
	fprintf(stderr,
	"pandecode: failed to open command stream log file %s\n",
	buffer);
	}
	}

	static void
	pandecode_dump_file_close(struct pandecode_context *ctx)
	{
	simple_mtx_assert_locked(&ctx->lock);

	if (ctx->dump_stream && ctx->dump_stream != stderr) {
	if (fclose(ctx->dump_stream))
	perror("pandecode: dump file");

	ctx->dump_stream = NULL;
	}
	}

	struct pandecode_context *
	pandecode_create_context(bool to_stderr)
	{
	struct pandecode_context ctx = calloc(1, sizeof(ctx));

	/* Not thread safe, but we shouldn't ever hit this, and even if we do, the
	* worst that could happen is having the files dumped with their filenames
	* in a different order. */
	ctx->id = num_ctxs++;

	/* This will be initialized later and can be changed at run time through
	* the PANDECODE_DUMP_FILE environment variable.
	*/
	ctx->dump_stream = to_stderr ? stderr : NULL;

	rb_tree_init(&ctx->mmap_tree);
	util_dynarray_init(&ctx->ro_mappings, NULL);

	simple_mtx_t mtx_init = SIMPLE_MTX_INITIALIZER;
	memcpy(&ctx->lock, &mtx_init, sizeof(simple_mtx_t));

	return ctx;
	}

	void
	pandecode_next_frame(struct pandecode_context *ctx)
	{
	simple_mtx_lock(&ctx->lock);

	pandecode_dump_file_close(ctx);
	ctx->dump_frame_count++;

	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_destroy_context(struct pandecode_context *ctx)
	{
	simple_mtx_lock(&ctx->lock);

	rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &ctx->mmap_tree,
	node) {
	rb_tree_remove(&ctx->mmap_tree, &it->node);
	free(it);
	}

	util_dynarray_fini(&ctx->ro_mappings);
	pandecode_dump_file_close(ctx);

	simple_mtx_unlock(&ctx->lock);

	free(ctx);
	}

	void
	pandecode_dump_mappings(struct pandecode_context *ctx)
	{
	simple_mtx_lock(&ctx->lock);

	pandecode_dump_file_open(ctx);

	rb_tree_foreach(struct pandecode_mapped_memory, it, &ctx->mmap_tree, node) {
	if (!it->addr \|\| !it->length)
	continue;

	fprintf(ctx->dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
	it->gpu_va);

	u_hexdump(ctx->dump_stream, it->addr, it->length, false);
	fprintf(ctx->dump_stream, "\n");
	}

	fflush(ctx->dump_stream);
	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va,
	unsigned gpu_id)
	{
	simple_mtx_lock(&ctx->lock);

	switch (pan_arch(gpu_id)) {
	case 4:
	pandecode_abort_on_fault_v4(ctx, jc_gpu_va);
	break;
	case 5:
	pandecode_abort_on_fault_v5(ctx, jc_gpu_va);
	break;
	case 6:
	pandecode_abort_on_fault_v6(ctx, jc_gpu_va);
	break;
	case 7:
	pandecode_abort_on_fault_v7(ctx, jc_gpu_va);
	break;
	case 9:
	pandecode_abort_on_fault_v9(ctx, jc_gpu_va);
	break;
	default:
	unreachable("Unsupported architecture");
	}

	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, unsigned gpu_id)
	{
	simple_mtx_lock(&ctx->lock);

	switch (pan_arch(gpu_id)) {
	case 4:
	pandecode_jc_v4(ctx, jc_gpu_va, gpu_id);
	break;
	case 5:
	pandecode_jc_v5(ctx, jc_gpu_va, gpu_id);
	break;
	case 6:
	pandecode_jc_v6(ctx, jc_gpu_va, gpu_id);
	break;
	case 7:
	pandecode_jc_v7(ctx, jc_gpu_va, gpu_id);
	break;
	case 9:
	pandecode_jc_v9(ctx, jc_gpu_va, gpu_id);
	break;
	default:
	unreachable("Unsupported architecture");
	}

	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
	uint32_t size, unsigned gpu_id, uint32_t *regs)
	{
	simple_mtx_lock(&ctx->lock);

	switch (pan_arch(gpu_id)) {
	case 10:
	pandecode_interpret_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs);
	break;
	case 12:
	pandecode_interpret_cs_v12(ctx, queue_gpu_va, size, gpu_id, regs);
	break;
	case 13:
	pandecode_interpret_cs_v13(ctx, queue_gpu_va, size, gpu_id, regs);
	break;
	default:
	unreachable("Unsupported architecture");
	}

	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
	uint32_t size, unsigned gpu_id)
	{
	simple_mtx_lock(&ctx->lock);

	switch (pan_arch(gpu_id)) {
	case 10:
	pandecode_cs_binary_v10(ctx, bin_gpu_va, size, gpu_id);
	break;
	case 12:
	pandecode_cs_binary_v12(ctx, bin_gpu_va, size, gpu_id);
	break;
	case 13:
	pandecode_cs_binary_v13(ctx, bin_gpu_va, size, gpu_id);
	break;
	default:
	unreachable("Unsupported architecture");
	}

	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
	uint32_t size, unsigned gpu_id)
	{
	simple_mtx_lock(&ctx->lock);

	switch (pan_arch(gpu_id)) {
	case 10:
	pandecode_cs_trace_v10(ctx, trace_gpu_va, size, gpu_id);
	break;
	case 12:
	pandecode_cs_trace_v12(ctx, trace_gpu_va, size, gpu_id);
	break;
	case 13:
	pandecode_cs_trace_v13(ctx, trace_gpu_va, size, gpu_id);
	break;
	default:
	unreachable("Unsupported architecture");
	}

	simple_mtx_unlock(&ctx->lock);
	}

	void
	pandecode_shader_disassemble(struct pandecode_context *ctx, uint64_t shader_ptr,
	unsigned gpu_id)
	{
	uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);

	/* Compute maximum possible size */
	struct pandecode_mapped_memory *mem =
	pandecode_find_mapped_gpu_mem_containing(ctx, shader_ptr);
	size_t sz = mem->length - (shader_ptr - mem->gpu_va);

	/* Print some boilerplate to clearly denote the assembly (which doesn't
	* obey indentation rules), and actually do the disassembly! */

	pandecode_log_cont(ctx, "\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n",
	code, shader_ptr, sz);

	if (pan_arch(gpu_id) >= 9) {
	disassemble_valhall(ctx->dump_stream, (const uint64_t *)code, sz, true);
	} else if (pan_arch(gpu_id) >= 6)
	disassemble_bifrost(ctx->dump_stream, code, sz, false);
	else
	disassemble_midgard(ctx->dump_stream, code, sz, gpu_id, true);

	pandecode_log_cont(ctx, "\n\n");
	}