src/gallium/drivers/iris/iris_batch.h - third_party/mesa - Git at Google

 /*
  * Copyright © 2017 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #ifndef IRIS_BATCH_DOT_H
 #define IRIS_BATCH_DOT_H

 #include <stdint.h>
 #include <stdbool.h>
 #include <string.h>

 #include "util/u_dynarray.h"
 #include "util/perf/u_trace.h"

 #include "decoder/intel_decoder.h"
 #include "ds/intel_driver_ds.h"
 #include "ds/intel_tracepoints.h"

 #include "iris_fence.h"
 #include "iris_fine_fence.h"

 struct iris_context;

 /* The kernel assumes batchbuffers are smaller than 256kB. */
 #define MAX_BATCH_SIZE (256 * 1024)

 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
  * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
  * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
  * invalidation pipe control.
  */
 #define BATCH_RESERVED 60

 /* Our target batch size - flush approximately at this point. */
 #define BATCH_SZ (128 * 1024 - BATCH_RESERVED)

 enum iris_batch_name {
    IRIS_BATCH_RENDER,
    IRIS_BATCH_COMPUTE,
    IRIS_BATCH_BLITTER,
 };

 /* Same definition as drm_i915_gem_exec_fence so drm_i915_gem_execbuffer2
  * can directly use exec_fences without extra memory allocation
  */
 struct iris_batch_fence {
    uint32_t handle;

 #define IRIS_BATCH_FENCE_WAIT (1 << 0)
 #define IRIS_BATCH_FENCE_SIGNAL (1 << 1)
    uint32_t flags;
 };

 struct iris_batch {
    struct iris_context *ice;
    struct iris_screen *screen;
    struct util_debug_callback *dbg;
    struct pipe_device_reset_callback *reset;

    /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
    enum iris_batch_name name;

    /** Current batchbuffer being queued up. */
    struct iris_bo *bo;
    void *map;
    void *map_next;

    /** Size of the primary batch being submitted to execbuf (in bytes). */
    unsigned primary_batch_size;

    /** Total size of all chained batches (in bytes). */
    unsigned total_chained_batch_size;

    /** Last binder address set in this hardware context. */
    uint64_t last_binder_address;

    /** Write fencing status for mi_builder. */
    bool write_fence_status;

    union {
       struct {
          uint32_t ctx_id;
          uint32_t exec_flags;
       } i915;
       struct {
          uint32_t exec_queue_id;
       } xe;
    };

    /** A list of all BOs referenced by this batch */
    struct iris_bo **exec_bos;
    int exec_count;
    int exec_array_size;
    /** Bitset of whether this batch writes to BO `i'. */
    BITSET_WORD *bos_written;
    uint32_t max_gem_handle;

    /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
     * instruction is a MI_BATCH_BUFFER_END).
     */
    bool noop_enabled;

    /** Whether the first utrace point has been recorded.
     */
    bool begin_trace_recorded;

    /**
     * A list of iris_syncobjs associated with this batch.
     *
     * The first list entry will always be a signalling sync-point, indicating
     * that this batch has completed.  The others are likely to be sync-points
     * to wait on before executing the batch.
     */
    struct util_dynarray syncobjs;

    /** A list of iris_batch_fences to have execbuf signal or wait on */
    struct util_dynarray exec_fences;

    /** The amount of aperture space (in bytes) used by all exec_bos */
    int aperture_space;

    struct {
       /** Uploader to use for sequence numbers */
       struct u_upload_mgr *uploader;

       /** GPU buffer and CPU map where our seqno's will be written. */
       struct iris_state_ref ref;
       uint32_t *map;

       /** The sequence number to write the next time we add a fence. */
       uint32_t next;
    } fine_fences;

    /** A seqno (and syncobj) for the last batch that was submitted. */
    struct iris_fine_fence *last_fence;

    /** List of other batches which we might need to flush to use a BO */
    struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
    unsigned num_other_batches;

    /**
     * Table containing struct iris_bo * that have been accessed within this
     * batchbuffer and would need flushing before being used with a different
     * aux mode.
     */
    struct hash_table *bo_aux_modes;

    struct intel_batch_decode_ctx decoder;
    struct hash_table_u64 *state_sizes;

    /**
     * Matrix representation of the cache coherency status of the GPU at the
     * current end point of the batch.  For every i and j,
     * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
     * cache domain j visible to cache domain i (which obviously implies that
     * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
     * can be used to efficiently determine whether synchronization is
     * necessary before accessing data from cache domain i if it was previously
     * accessed from another cache domain j.
     */
    uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];

    /**
     * A vector representing the cache coherency status of the L3.  For each
     * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
     * recent flush of that domain which is visible to L3 clients.
     */
    uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];

    /**
     * Sequence number used to track the completion of any subsequent memory
     * operations in the batch until the next sync boundary.
     */
    uint64_t next_seqno;

    /** Have we emitted any draw calls to this batch? */
    bool contains_draw;

    /** Have we emitted any draw calls with next_seqno? */
    bool contains_draw_with_next_seqno;

    /** Batch contains fence signal operation. */
    bool contains_fence_signal;

    /**
     * Number of times iris_batch_sync_region_start() has been called without a
     * matching iris_batch_sync_region_end() on this batch.
     */
    uint32_t sync_region_depth;

    uint32_t last_aux_map_state;
    struct iris_measure_batch *measure;

    /** Where tracepoints are recorded */
    struct u_trace trace;

    /** Batch wrapper structure for perfetto */
    struct intel_ds_queue ds;

    uint8_t num_3d_primitives_emitted;
 };

 void iris_init_batches(struct iris_context *ice);
 void iris_chain_to_new_batch(struct iris_batch *batch);
 void iris_destroy_batches(struct iris_context *ice);
 void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);

 void iris_batch_maybe_begin_frame(struct iris_batch *batch);

 void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
 #define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)

 bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);

 bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);

 void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
                         bool writable, enum iris_domain access);

 enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);

 bool iris_batch_syncobj_to_sync_file_fd(struct iris_batch *batch, int *out_fd);

 static inline unsigned
 iris_batch_bytes_used(struct iris_batch *batch)
 {
    return batch->map_next - batch->map;
 }

 static inline uint64_t
 iris_batch_current_address_u64(struct iris_batch *batch)
 {
    return batch->bo->address + (batch->map_next - batch->map);
 }

 /**
  * Ensure the current command buffer has \param size bytes of space
  * remaining.  If not, this creates a secondary batch buffer and emits
  * a jump from the primary batch to the start of the secondary.
  *
  * Most callers want iris_get_command_space() instead.
  */
 static inline void
 iris_require_command_space(struct iris_batch *batch, unsigned size)
 {
    const unsigned required_bytes = iris_batch_bytes_used(batch) + size;

    if (required_bytes >= BATCH_SZ) {
       iris_chain_to_new_batch(batch);
    }
 }

 /**
  * Allocate space in the current command buffer, and return a pointer
  * to the mapped area so the caller can write commands there.
  *
  * This should be called whenever emitting commands.
  */
 static inline void *
 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
 {
    if (!batch->begin_trace_recorded) {
       batch->begin_trace_recorded = true;
       iris_batch_maybe_begin_frame(batch);
       trace_intel_begin_batch(&batch->trace);
    }
    iris_require_command_space(batch, bytes);
    void *map = batch->map_next;
    batch->map_next += bytes;
    return map;
 }

 /**
  * Helper to emit GPU commands - allocates space, copies them there.
  */
 static inline void
 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
 {
    void *map = iris_get_command_space(batch, size);
    memcpy(map, data, size);
 }

 /**
  * Get a pointer to the batch's signalling syncobj.  Does not refcount.
  */
 static inline struct iris_syncobj *
 iris_batch_get_signal_syncobj(struct iris_batch *batch)
 {
    /* The signalling syncobj is the first one in the list. */
    struct iris_syncobj *syncobj =
       ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
    return syncobj;
 }


 /**
  * Take a reference to the batch's signalling syncobj.
  *
  * Callers can use this to wait for the the current batch under construction
  * to complete (after flushing it).
  */
 static inline void
 iris_batch_reference_signal_syncobj(struct iris_batch *batch,
                                    struct iris_syncobj **out_syncobj)
 {
    struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
    iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
 }

 /**
  * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
  */
 static inline void
 iris_record_state_size(struct hash_table_u64 *ht,
                        uint32_t offset_from_base,
                        uint32_t size)
 {
    if (ht) {
       _mesa_hash_table_u64_insert(ht, offset_from_base,
                                   (void *)(uintptr_t) size);
    }
 }

 /**
  * Mark the start of a region in the batch with stable synchronization
  * sequence number.  Any buffer object accessed by the batch buffer only needs
  * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
  * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
  */
 static inline void
 iris_batch_sync_region_start(struct iris_batch *batch)
 {
    batch->sync_region_depth++;
 }

 /**
  * Mark the end of a region in the batch with stable synchronization sequence
  * number.  Should be called once after each call to
  * iris_batch_sync_region_start().
  */
 static inline void
 iris_batch_sync_region_end(struct iris_batch *batch)
 {
    assert(batch->sync_region_depth);
    batch->sync_region_depth--;
 }

 /**
  * Start a new synchronization section at the current point of the batch,
  * unless disallowed by a previous iris_batch_sync_region_start().
  */
 static inline void
 iris_batch_sync_boundary(struct iris_batch *batch)
 {
    if (!batch->sync_region_depth) {
       batch->contains_draw_with_next_seqno = false;
       batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
       assert(batch->next_seqno > 0);
    }
 }

 /**
  * Update the cache coherency status of the batch to reflect a flush of the
  * specified caching domain.
  */
 static inline void
 iris_batch_mark_flush_sync(struct iris_batch *batch,
                            enum iris_domain access)
 {
    const struct intel_device_info *devinfo = batch->screen->devinfo;

    if (iris_domain_is_l3_coherent(devinfo, access))
       batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
    else
       batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
 }

 /**
  * Update the cache coherency status of the batch to reflect an invalidation
  * of the specified caching domain.  All prior flushes of other caches will be
  * considered visible to the specified caching domain.
  */
 static inline void
 iris_batch_mark_invalidate_sync(struct iris_batch *batch,
                                 enum iris_domain access)
 {
    const struct intel_device_info *devinfo = batch->screen->devinfo;

    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
       if (i == access)
          continue;

       if (iris_domain_is_l3_coherent(devinfo, access)) {
          if (iris_domain_is_read_only(access)) {
             /* Invalidating a L3-coherent read-only domain "access" also
              * triggers an invalidation of any matching L3 cachelines as well.
              *
              * If domain 'i' is L3-coherent, it sees the latest data in L3,
              * otherwise it sees the latest globally-observable data.
              */
             batch->coherent_seqnos[access][i] =
                iris_domain_is_l3_coherent(devinfo, i) ?
                batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
          } else {
             /* Invalidating L3-coherent write domains does not trigger
              * an invalidation of any matching L3 cachelines, however.
              *
              * It sees the latest data from domain i visible to L3 clients.
              */
             batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
          }
       } else {
          /* "access" isn't L3-coherent, so invalidating it means it sees the
           * most recent globally-observable data from domain i.
           */
          batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
       }
    }
 }

 /**
  * Update the cache coherency status of the batch to reflect a reset.  All
  * previously accessed data can be considered visible to every caching domain
  * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
  */
 static inline void
 iris_batch_mark_reset_sync(struct iris_batch *batch)
 {
    for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
       batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
       for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
          batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
    }
 }

 const char *
 iris_batch_name_to_string(enum iris_batch_name name);

 bool
 iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);

 #define iris_foreach_batch(ice, batch)                \
    for (struct iris_batch *batch = &ice->batches[0];  \
         batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
         ++batch)

 void iris_batch_update_syncobjs(struct iris_batch *batch);
 unsigned iris_batch_num_fences(struct iris_batch *batch);

 void iris_dump_fence_list(struct iris_batch *batch);
 void iris_dump_bo_list(struct iris_batch *batch);
 void iris_batch_decode_batch(struct iris_batch *batch);

 #endif
	/*
	* Copyright © 2017 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#ifndef IRIS_BATCH_DOT_H
	#define IRIS_BATCH_DOT_H

	#include <stdint.h>
	#include <stdbool.h>
	#include <string.h>

	#include "util/u_dynarray.h"
	#include "util/perf/u_trace.h"

	#include "decoder/intel_decoder.h"
	#include "ds/intel_driver_ds.h"
	#include "ds/intel_tracepoints.h"

	#include "iris_fence.h"
	#include "iris_fine_fence.h"

	struct iris_context;

	/* The kernel assumes batchbuffers are smaller than 256kB. */
	#define MAX_BATCH_SIZE (256 * 1024)

	/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
	* bytes for MI_BATCH_BUFFER_START (when chaining). Plus another 24 bytes for
	* the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
	* invalidation pipe control.
	*/
	#define BATCH_RESERVED 60

	/* Our target batch size - flush approximately at this point. */
	#define BATCH_SZ (128 * 1024 - BATCH_RESERVED)

	enum iris_batch_name {
	IRIS_BATCH_RENDER,
	IRIS_BATCH_COMPUTE,
	IRIS_BATCH_BLITTER,
	};

	/* Same definition as drm_i915_gem_exec_fence so drm_i915_gem_execbuffer2
	* can directly use exec_fences without extra memory allocation
	*/
	struct iris_batch_fence {
	uint32_t handle;

	#define IRIS_BATCH_FENCE_WAIT (1 << 0)
	#define IRIS_BATCH_FENCE_SIGNAL (1 << 1)
	uint32_t flags;
	};

	struct iris_batch {
	struct iris_context *ice;
	struct iris_screen *screen;
	struct util_debug_callback *dbg;
	struct pipe_device_reset_callback *reset;

	/** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
	enum iris_batch_name name;

	/** Current batchbuffer being queued up. */
	struct iris_bo *bo;
	void *map;
	void *map_next;

	/** Size of the primary batch being submitted to execbuf (in bytes). */
	unsigned primary_batch_size;

	/** Total size of all chained batches (in bytes). */
	unsigned total_chained_batch_size;

	/** Last binder address set in this hardware context. */
	uint64_t last_binder_address;

	/** Write fencing status for mi_builder. */
	bool write_fence_status;

	union {
	struct {
	uint32_t ctx_id;
	uint32_t exec_flags;
	} i915;
	struct {
	uint32_t exec_queue_id;
	} xe;
	};

	/** A list of all BOs referenced by this batch */
	struct iris_bo **exec_bos;
	int exec_count;
	int exec_array_size;
	/** Bitset of whether this batch writes to BO `i'. */
	BITSET_WORD *bos_written;
	uint32_t max_gem_handle;

	/** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
	* instruction is a MI_BATCH_BUFFER_END).
	*/
	bool noop_enabled;

	/** Whether the first utrace point has been recorded.
	*/
	bool begin_trace_recorded;

	/**
	* A list of iris_syncobjs associated with this batch.
	*
	* The first list entry will always be a signalling sync-point, indicating
	* that this batch has completed. The others are likely to be sync-points
	* to wait on before executing the batch.
	*/
	struct util_dynarray syncobjs;

	/** A list of iris_batch_fences to have execbuf signal or wait on */
	struct util_dynarray exec_fences;

	/** The amount of aperture space (in bytes) used by all exec_bos */
	int aperture_space;

	struct {
	/** Uploader to use for sequence numbers */
	struct u_upload_mgr *uploader;

	/** GPU buffer and CPU map where our seqno's will be written. */
	struct iris_state_ref ref;
	uint32_t *map;

	/** The sequence number to write the next time we add a fence. */
	uint32_t next;
	} fine_fences;

	/** A seqno (and syncobj) for the last batch that was submitted. */
	struct iris_fine_fence *last_fence;

	/** List of other batches which we might need to flush to use a BO */
	struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
	unsigned num_other_batches;

	/**
	* Table containing struct iris_bo * that have been accessed within this
	* batchbuffer and would need flushing before being used with a different
	* aux mode.
	*/
	struct hash_table *bo_aux_modes;

	struct intel_batch_decode_ctx decoder;
	struct hash_table_u64 *state_sizes;

	/**
	* Matrix representation of the cache coherency status of the GPU at the
	* current end point of the batch. For every i and j,
	* coherent_seqnos[i][j] denotes the seqno of the most recent flush of
	* cache domain j visible to cache domain i (which obviously implies that
	* coherent_seqnos[i][i] is the most recent flush of cache domain i). This
	* can be used to efficiently determine whether synchronization is
	* necessary before accessing data from cache domain i if it was previously
	* accessed from another cache domain j.
	*/
	uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];

	/**
	* A vector representing the cache coherency status of the L3. For each
	* cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
	* recent flush of that domain which is visible to L3 clients.
	*/
	uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];

	/**
	* Sequence number used to track the completion of any subsequent memory
	* operations in the batch until the next sync boundary.
	*/
	uint64_t next_seqno;

	/** Have we emitted any draw calls to this batch? */
	bool contains_draw;

	/** Have we emitted any draw calls with next_seqno? */
	bool contains_draw_with_next_seqno;

	/** Batch contains fence signal operation. */
	bool contains_fence_signal;

	/**
	* Number of times iris_batch_sync_region_start() has been called without a
	* matching iris_batch_sync_region_end() on this batch.
	*/
	uint32_t sync_region_depth;

	uint32_t last_aux_map_state;
	struct iris_measure_batch *measure;

	/** Where tracepoints are recorded */
	struct u_trace trace;

	/** Batch wrapper structure for perfetto */
	struct intel_ds_queue ds;

	uint8_t num_3d_primitives_emitted;
	};

	void iris_init_batches(struct iris_context *ice);
	void iris_chain_to_new_batch(struct iris_batch *batch);
	void iris_destroy_batches(struct iris_context *ice);
	void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);

	void iris_batch_maybe_begin_frame(struct iris_batch *batch);

	void _iris_batch_flush(struct iris_batch batch, const char file, int line);
	#define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)

	bool iris_batch_references(struct iris_batch batch, struct iris_bo bo);

	bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);

	void iris_use_pinned_bo(struct iris_batch batch, struct iris_bo bo,
	bool writable, enum iris_domain access);

	enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);

	bool iris_batch_syncobj_to_sync_file_fd(struct iris_batch batch, int out_fd);

	static inline unsigned
	iris_batch_bytes_used(struct iris_batch *batch)
	{
	return batch->map_next - batch->map;
	}

	static inline uint64_t
	iris_batch_current_address_u64(struct iris_batch *batch)
	{
	return batch->bo->address + (batch->map_next - batch->map);
	}

	/**
	* Ensure the current command buffer has \param size bytes of space
	* remaining. If not, this creates a secondary batch buffer and emits
	* a jump from the primary batch to the start of the secondary.
	*
	* Most callers want iris_get_command_space() instead.
	*/
	static inline void
	iris_require_command_space(struct iris_batch *batch, unsigned size)
	{
	const unsigned required_bytes = iris_batch_bytes_used(batch) + size;

	if (required_bytes >= BATCH_SZ) {
	iris_chain_to_new_batch(batch);
	}
	}

	/**
	* Allocate space in the current command buffer, and return a pointer
	* to the mapped area so the caller can write commands there.
	*
	* This should be called whenever emitting commands.
	*/
	static inline void *
	iris_get_command_space(struct iris_batch *batch, unsigned bytes)
	{
	if (!batch->begin_trace_recorded) {
	batch->begin_trace_recorded = true;
	iris_batch_maybe_begin_frame(batch);
	trace_intel_begin_batch(&batch->trace);
	}
	iris_require_command_space(batch, bytes);
	void *map = batch->map_next;
	batch->map_next += bytes;
	return map;
	}

	/**
	* Helper to emit GPU commands - allocates space, copies them there.
	*/
	static inline void
	iris_batch_emit(struct iris_batch batch, const void data, unsigned size)
	{
	void *map = iris_get_command_space(batch, size);
	memcpy(map, data, size);
	}

	/**
	* Get a pointer to the batch's signalling syncobj. Does not refcount.
	*/
	static inline struct iris_syncobj *
	iris_batch_get_signal_syncobj(struct iris_batch *batch)
	{
	/* The signalling syncobj is the first one in the list. */
	struct iris_syncobj *syncobj =
	((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
	return syncobj;
	}


	/**
	* Take a reference to the batch's signalling syncobj.
	*
	* Callers can use this to wait for the the current batch under construction
	* to complete (after flushing it).
	*/
	static inline void
	iris_batch_reference_signal_syncobj(struct iris_batch *batch,
	struct iris_syncobj **out_syncobj)
	{
	struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
	iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
	}

	/**
	* Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
	*/
	static inline void
	iris_record_state_size(struct hash_table_u64 *ht,
	uint32_t offset_from_base,
	uint32_t size)
	{
	if (ht) {
	_mesa_hash_table_u64_insert(ht, offset_from_base,
	(void *)(uintptr_t) size);
	}
	}

	/**
	* Mark the start of a region in the batch with stable synchronization
	* sequence number. Any buffer object accessed by the batch buffer only needs
	* to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
	* by iris_batch_sync_region_start() and iris_batch_sync_region_end().
	*/
	static inline void
	iris_batch_sync_region_start(struct iris_batch *batch)
	{
	batch->sync_region_depth++;
	}

	/**
	* Mark the end of a region in the batch with stable synchronization sequence
	* number. Should be called once after each call to
	* iris_batch_sync_region_start().
	*/
	static inline void
	iris_batch_sync_region_end(struct iris_batch *batch)
	{
	assert(batch->sync_region_depth);
	batch->sync_region_depth--;
	}

	/**
	* Start a new synchronization section at the current point of the batch,
	* unless disallowed by a previous iris_batch_sync_region_start().
	*/
	static inline void
	iris_batch_sync_boundary(struct iris_batch *batch)
	{
	if (!batch->sync_region_depth) {
	batch->contains_draw_with_next_seqno = false;
	batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
	assert(batch->next_seqno > 0);
	}
	}

	/**
	* Update the cache coherency status of the batch to reflect a flush of the
	* specified caching domain.
	*/
	static inline void
	iris_batch_mark_flush_sync(struct iris_batch *batch,
	enum iris_domain access)
	{
	const struct intel_device_info *devinfo = batch->screen->devinfo;

	if (iris_domain_is_l3_coherent(devinfo, access))
	batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
	else
	batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
	}

	/**
	* Update the cache coherency status of the batch to reflect an invalidation
	* of the specified caching domain. All prior flushes of other caches will be
	* considered visible to the specified caching domain.
	*/
	static inline void
	iris_batch_mark_invalidate_sync(struct iris_batch *batch,
	enum iris_domain access)
	{
	const struct intel_device_info *devinfo = batch->screen->devinfo;

	for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
	if (i == access)
	continue;

	if (iris_domain_is_l3_coherent(devinfo, access)) {
	if (iris_domain_is_read_only(access)) {
	/* Invalidating a L3-coherent read-only domain "access" also
	* triggers an invalidation of any matching L3 cachelines as well.
	*
	* If domain 'i' is L3-coherent, it sees the latest data in L3,
	* otherwise it sees the latest globally-observable data.
	*/
	batch->coherent_seqnos[access][i] =
	iris_domain_is_l3_coherent(devinfo, i) ?
	batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
	} else {
	/* Invalidating L3-coherent write domains does not trigger
	* an invalidation of any matching L3 cachelines, however.
	*
	* It sees the latest data from domain i visible to L3 clients.
	*/
	batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
	}
	} else {
	/* "access" isn't L3-coherent, so invalidating it means it sees the
	* most recent globally-observable data from domain i.
	*/
	batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
	}
	}
	}

	/**
	* Update the cache coherency status of the batch to reflect a reset. All
	* previously accessed data can be considered visible to every caching domain
	* thanks to the kernel's heavyweight flushing at batch buffer boundaries.
	*/
	static inline void
	iris_batch_mark_reset_sync(struct iris_batch *batch)
	{
	for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
	batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
	for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
	batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
	}
	}

	const char *
	iris_batch_name_to_string(enum iris_batch_name name);

	bool
	iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);

	#define iris_foreach_batch(ice, batch) \
	for (struct iris_batch *batch = &ice->batches[0]; \
	batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
	++batch)

	void iris_batch_update_syncobjs(struct iris_batch *batch);
	unsigned iris_batch_num_fences(struct iris_batch *batch);

	void iris_dump_fence_list(struct iris_batch *batch);
	void iris_dump_bo_list(struct iris_batch *batch);
	void iris_batch_decode_batch(struct iris_batch *batch);

	#endif