src/vulkan/runtime/vk_device.h - third_party/mesa - Git at Google

 /*
  * Copyright © 2020 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 #ifndef VK_DEVICE_H
 #define VK_DEVICE_H

 #include "rmv/vk_rmv_common.h"
 #include "vk_dispatch_table.h"
 #include "vk_extensions.h"
 #include "vk_object.h"
 #include "vk_physical_device_features.h"

 #include "util/list.h"
 #include "util/simple_mtx.h"
 #include "util/u_atomic.h"
 #include "util/u_sync_provider.h"

 #if defined(USE_MAGMA)
 #include <lib/magma/magma.h>
 #endif

 #ifdef __cplusplus
 extern "C" {
 #endif

 struct vk_acceleration_structure_build_ops;
 struct vk_command_buffer_ops;
 struct vk_device_shader_ops;
 struct vk_sync;

 enum vk_queue_submit_mode {
    /** Submits happen immediately
     *
     * `vkQueueSubmit()` and `vkQueueBindSparse()` call
     * ``vk_queue::driver_submit`` directly for all submits and the last call to
     * ``vk_queue::driver_submit`` will have completed by the time
     * `vkQueueSubmit()` or `vkQueueBindSparse()` return.
     */
    VK_QUEUE_SUBMIT_MODE_IMMEDIATE,

    /** Submits may be deferred until a future `vk_queue_flush()`
     *
     * Submits are added to the queue and `vk_queue_flush()` is called.
     * However, any submits with unsatisfied dependencies will be left on the
     * queue until a future `vk_queue_flush()` call.  This is used for
     * implementing emulated timeline semaphores without threading.
     */
    VK_QUEUE_SUBMIT_MODE_DEFERRED,

    /** Submits will be added to the queue and handled later by a thread
     *
     * This places additional requirements on the vk_sync types used by the
     * driver:
     *
     *    1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also
     *       support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can
     *       sort out when a given submit has all its dependencies resolved.
     *
     *    2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT`
     *       also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset
     *       semaphores after waiting on them.
     *
     *    3. All vk_sync types used as permanent payloads of semaphores support
     *       ``vk_sync_type::move`` so that it can move the pending signal into a
     *       temporary vk_sync and reset the semaphore.
     *
     * This is requied for shared timeline semaphores where we need to handle
     * wait-before-signal by threading in the driver if we ever see an
     * unresolve dependency.
     */
    VK_QUEUE_SUBMIT_MODE_THREADED,

    /** Threaded but only if we need it to resolve dependencies
     *
     * This imposes all the same requirements on `vk_sync` types as
     * `VK_QUEUE_SUBMIT_MODE_THREADED`.
     */
    VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND,
 };

 struct vk_device_memory_report {
    PFN_vkDeviceMemoryReportCallbackEXT callback;
    void *data;
 };

 /** Base struct for VkDevice */
 struct vk_device {
    struct vk_object_base base;

    /** Allocator used to create this device
     *
     * This is used as a fall-back for when a NULL pAllocator is passed into a
     * device-level create function such as vkCreateImage().
     */
    VkAllocationCallbacks alloc;

    /** Pointer to the physical device */
    struct vk_physical_device *physical;

    /** Table of enabled extensions */
    struct vk_device_extension_table enabled_extensions;

    /** Table of enabled features */
    struct vk_features enabled_features;

    /** Device-level dispatch table */
    struct vk_device_dispatch_table dispatch_table;

    /** Command dispatch table
     *
     * This is used for emulated secondary command buffer support.  To use
     * emulated (trace/replay) secondary command buffers:
     *
     *  1. Provide your "real" command buffer dispatch table here.  Because
     *     this doesn't get populated by vk_device_init(), the driver will have
     *     to add the vk_common entrypoints to this table itself.
     *
     *  2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device
     *     level dispatch table.
     */
    const struct vk_device_dispatch_table *command_dispatch_table;

    /** Command buffer vtable when using the common command pool */
    const struct vk_command_buffer_ops *command_buffer_ops;

    /** Shader vtable for VK_EXT_shader_object and common pipelines */
    const struct vk_device_shader_ops *shader_ops;

    /** Acceleration structure build vtable for common BVH building. */
    const struct vk_acceleration_structure_build_ops *as_build_ops;

    /**
     * Write data to a buffer from the command processor. This is simpler than
     * setting up a staging buffer and faster for small writes, but is not
     * meant for larger amounts of data. \p data is owned by the caller and the
     * driver is expected to write it out directly to the command stream as
     * part of an immediate write packet.
     */
    void (*write_buffer_cp)(VkCommandBuffer cmdbuf, VkDeviceAddress addr,
                            void *data, uint32_t size);

    /* Flush data written via write_buffer_cp. Users must use a normal pipeline
     * barrier in order to read this data, with the appropriate destination
     * access, but this replaces the source access mask.
     */
    void (*flush_buffer_write_cp)(VkCommandBuffer cmdbuf);

    /* An unaligned dispatch function. This launches a number of threads that
     * may not be a multiple of the workgroup size, which may result in partial
     * workgroups.
     */
    void (*cmd_dispatch_unaligned)(VkCommandBuffer cmdbuf,
                                   uint32_t invocations_x,
                                   uint32_t invocations_y,
                                   uint32_t invocations_z);

    /* vkCmdFillBuffer but with a device address. */
    void (*cmd_fill_buffer_addr)(VkCommandBuffer cmdbuf,
                                 VkDeviceAddress devAddr,
                                 VkDeviceSize size,
                                 uint32_t data);

    /** Driver provided callback for capturing traces
     *
     * Triggers for this callback are:
     *    - Keyboard input (F12)
     *    - Creation of a trigger file
     *    - Reaching the trace frame
     */
    VkResult (*capture_trace)(VkQueue queue);

    uint32_t current_frame;
    bool trace_hotkey_trigger;
    simple_mtx_t trace_mtx;

    /* For VK_EXT_private_data */
    uint32_t private_data_next_index;

    struct list_head queues;

    struct {
       int lost;
       bool reported;
    } _lost;

    /** Checks the status of this device
     *
     * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
     * It is called before ``vk_queue::driver_submit`` and after every non-trivial
     * wait operation to ensure the device is still around.  This gives the
     * driver a hook to ask the kernel if its device is still valid.  If the
     * kernel says the device has been lost, it MUST call vk_device_set_lost().
     *
     * This function may be called from any thread at any time.
     */
    VkResult (*check_status)(struct vk_device *device);

    /* Get the device timestamp in the VK_TIME_DOMAIN_DEVICE_KHR domain */
    VkResult (*get_timestamp)(struct vk_device *device, uint64_t *timestamp);

    /** Host time domain used for timestamp calibration */
    VkTimeDomainKHR calibrate_time_domain;
    /** Period of VK_TIME_DOMAIN_DEVICE_KHR */
    uint64_t device_time_domain_period;

    /** Creates a vk_sync that wraps a memory object
     *
     * This is always a one-shot object so it need not track any additional
     * state.  Since it's intended for synchronizing between processes using
     * implicit synchronization mechanisms, no such tracking would be valid
     * anyway.
     *
     * If `signal_memory` is set, the resulting vk_sync will be used to signal
     * the memory object from a queue ``via vk_queue_submit::signals``.  The common
     * code guarantees that, by the time vkQueueSubmit() returns, the signal
     * operation has been submitted to the kernel via the driver's
     * ``vk_queue::driver_submit`` hook.  This means that any vkQueueSubmit() call
     * which needs implicit synchronization may block.
     *
     * If `signal_memory` is not set, it can be assumed that memory object
     * already has a signal operation pending from some other process and we
     * need only wait on it.
     */
    VkResult (*create_sync_for_memory)(struct vk_device *device,
                                       VkDeviceMemory memory,
                                       bool signal_memory,
                                       struct vk_sync **sync_out);

 #if defined(USE_MAGMA)
    magma_connection_t magma_connection;
 #endif
    /* Set by vk_device_set_drm_fd() */
    struct util_sync_provider *sync;

    /** Implicit pipeline cache, or NULL */
    struct vk_pipeline_cache *mem_cache;

    /** An enum describing how timeline semaphores work */
    enum vk_device_timeline_mode {
       /** Timeline semaphores are not supported */
       VK_DEVICE_TIMELINE_MODE_NONE,

       /** Timeline semaphores are emulated with vk_timeline
        *
        * In this mode, timeline semaphores are emulated using vk_timeline
        * which is a collection of binary semaphores, one per time point.
        * These timeline semaphores cannot be shared because the data structure
        * exists entirely in userspace.  These timelines are virtually
        * invisible to the driver; all it sees are the binary vk_syncs, one per
        * time point.
        *
        * To handle wait-before-signal, we place all vk_queue_submits in the
        * queue's submit list in vkQueueSubmit() and call vk_device_flush() at
        * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
        * This ensures that, as soon as a given submit's dependencies are fully
        * resolvable, it gets submitted to the driver.
        */
       VK_DEVICE_TIMELINE_MODE_EMULATED,

       /** Timeline semaphores are a kernel-assisted emulation
        *
        * In this mode, timeline semaphores are still technically an emulation
        * in the sense that they don't support wait-before-signal natively.
        * Instead, all GPU-waitable objects support a CPU wait-for-pending
        * operation which lets the userspace driver wait until a given event
        * on the (possibly shared) vk_sync is pending.  The event is "pending"
        * if a job has been submitted to the kernel (possibly from a different
        * process) which will signal it.  In vkQueueSubit, we use this wait
        * mode to detect waits which are not yet pending and, the first time we
        * do, spawn a thread to manage the queue.  That thread waits for each
        * submit's waits to all be pending before submitting to the driver
        * queue.
        *
        * We have to be a bit more careful about a few things in this mode.
        * In particular, we can never assume that any given wait operation is
        * pending.  For instance, when we go to export a sync file from a
        * binary semaphore, we need to first wait for it to be pending.  The
        * spec guarantees that the vast majority of these waits return almost
        * immediately, but we do need to insert them for correctness.
        */
       VK_DEVICE_TIMELINE_MODE_ASSISTED,

       /** Timeline semaphores are 100% native
        *
        * In this mode, wait-before-signal is natively supported by the
        * underlying timeline implementation.  We can submit-and-forget and
        * assume that dependencies will get resolved for us by the kernel.
        * Currently, this isn't supported by any Linux primitives.
        */
       VK_DEVICE_TIMELINE_MODE_NATIVE,
    } timeline_mode;

    /** Per-device submit mode
     *
     * This represents the device-wide submit strategy which may be different
     * from the per-queue submit mode.  See vk_queue.submit.mode for more
     * details.
     */
    enum vk_queue_submit_mode submit_mode;

    struct vk_memory_trace_data memory_trace_data;

    mtx_t swapchain_private_mtx;
    struct hash_table *swapchain_private;
    mtx_t swapchain_name_mtx;
    struct hash_table *swapchain_name;

    /* For VK_KHR_pipeline_binary */
    bool disable_internal_cache;

    struct vk_device_memory_report *memory_reports;
    uint32_t memory_report_count;
 };

 VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice,
                        VK_OBJECT_TYPE_DEVICE);

 /** Initialize a vk_device
  *
  * Along with initializing the data structures in `vk_device`, this function
  * checks that every extension specified by
  * ``VkInstanceCreateInfo::ppEnabledExtensionNames`` is actually supported by
  * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an
  * unsupported extension is requested.  It also checks all the feature struct
  * chained into the `pCreateInfo->pNext` chain against the features returned
  * by `vkGetPhysicalDeviceFeatures2` and returns
  * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested.
  *
  * :param device:               |out| The device to initialize
  * :param physical_device:      |in|  The physical device
  * :param dispatch_table:       |in|  Device-level dispatch table
  * :param pCreateInfo:          |in|  VkDeviceCreateInfo pointer passed to
  *                                    `vkCreateDevice()`
  * :param alloc:                |in|  Allocation callbacks passed to
  *                                    `vkCreateDevice()`
  */
 VkResult MUST_CHECK
 vk_device_init(struct vk_device *device,
                struct vk_physical_device *physical_device,
                const struct vk_device_dispatch_table *dispatch_table,
                const VkDeviceCreateInfo *pCreateInfo,
                const VkAllocationCallbacks *alloc);

 static inline void
 vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
 {
    device->sync = util_sync_provider_drm(drm_fd);
 }

 /** Tears down a vk_device
  *
  * :param device:       |out| The device to tear down
  */
 void
 vk_device_finish(struct vk_device *device);

 /** Enables threaded submit on this device
  *
  * This doesn't ensure that threaded submit will be used.  It just disables
  * the deferred submit option for emulated timeline semaphores and forces them
  * to always use the threaded path.  It also does some checks that the vk_sync
  * types used by the driver work for threaded submit.
  *
  * This must be called before any queues are created.
  */
 void vk_device_enable_threaded_submit(struct vk_device *device);

 static inline bool
 vk_device_supports_threaded_submit(const struct vk_device *device)
 {
    return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED ||
           device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
 }

 VkResult vk_device_flush(struct vk_device *device);

 VkResult PRINTFLIKE(4, 5)
 _vk_device_set_lost(struct vk_device *device,
                     const char *file, int line,
                     const char *msg, ...);

 #define vk_device_set_lost(device, ...) \
    _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__)

 void _vk_device_report_lost(struct vk_device *device);

 static inline bool
 vk_device_is_lost_no_report(struct vk_device *device)
 {
    return p_atomic_read(&device->_lost.lost) > 0;
 }

 static inline bool
 vk_device_is_lost(struct vk_device *device)
 {
    int lost = vk_device_is_lost_no_report(device);
    if (unlikely(lost && !device->_lost.reported))
       _vk_device_report_lost(device);
    return lost;
 }

 static inline VkResult
 vk_device_check_status(struct vk_device *device)
 {
    if (vk_device_is_lost(device))
       return VK_ERROR_DEVICE_LOST;

    if (!device->check_status)
       return VK_SUCCESS;

    VkResult result = device->check_status(device);

    assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST);
    if (result == VK_ERROR_DEVICE_LOST)
       assert(vk_device_is_lost_no_report(device));

    return result;
 }

 VkResult
 vk_device_get_timestamp(struct vk_device *device, VkTimeDomainKHR domain,
                         uint64_t *timestamp);

 #ifndef _WIN32

 uint64_t
 vk_clock_gettime(clockid_t clock_id);

 #endif //!_WIN32

 static inline uint64_t
 vk_time_max_deviation(uint64_t begin, uint64_t end, uint64_t max_clock_period)
 {
     /*
      * The maximum deviation is the sum of the interval over which we
      * perform the sampling and the maximum period of any sampled
      * clock. That's because the maximum skew between any two sampled
      * clock edges is when the sampled clock with the largest period is
      * sampled at the end of that period but right at the beginning of the
      * sampling interval and some other clock is sampled right at the
      * beginning of its sampling period and right at the end of the
      * sampling interval. Let's assume the GPU has the longest clock
      * period and that the application is sampling GPU and monotonic:
      *
      *                               s                 e
      *                  w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
      * Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
      *
      *                               g
      *           0         1         2         3
      * GPU       -----_____-----_____-----_____-----_____
      *
      *                                                m
      *                                     x y z 0 1 2 3 4 5 6 7 8 9 a b c
      * Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
      *
      * Interval                     <----------------->
      * Deviation           <-------------------------->
      *
      *         s  = read(raw)       2
      *         g  = read(GPU)       1
      *         m  = read(monotonic) 2
      *         e  = read(raw)       b
      *
      * We round the sample interval up by one tick to cover sampling error
      * in the interval clock
      */

    uint64_t sample_interval = end - begin + 1;

    return sample_interval + max_clock_period;
 }

 PFN_vkVoidFunction
 vk_device_get_proc_addr(const struct vk_device *device,
                         const char *name);

 #ifdef __cplusplus
 }
 #endif

 #endif /* VK_DEVICE_H */
	/*
	* Copyright © 2020 Intel Corporation
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/
	#ifndef VK_DEVICE_H
	#define VK_DEVICE_H

	#include "rmv/vk_rmv_common.h"
	#include "vk_dispatch_table.h"
	#include "vk_extensions.h"
	#include "vk_object.h"
	#include "vk_physical_device_features.h"

	#include "util/list.h"
	#include "util/simple_mtx.h"
	#include "util/u_atomic.h"
	#include "util/u_sync_provider.h"

	#if defined(USE_MAGMA)
	#include <lib/magma/magma.h>
	#endif

	#ifdef __cplusplus
	extern "C" {
	#endif

	struct vk_acceleration_structure_build_ops;
	struct vk_command_buffer_ops;
	struct vk_device_shader_ops;
	struct vk_sync;

	enum vk_queue_submit_mode {
	/** Submits happen immediately
	*
	* `vkQueueSubmit()` and `vkQueueBindSparse()` call
	* ``vk_queue::driver_submit`` directly for all submits and the last call to
	* ``vk_queue::driver_submit`` will have completed by the time
	* `vkQueueSubmit()` or `vkQueueBindSparse()` return.
	*/
	VK_QUEUE_SUBMIT_MODE_IMMEDIATE,

	/** Submits may be deferred until a future `vk_queue_flush()`
	*
	* Submits are added to the queue and `vk_queue_flush()` is called.
	* However, any submits with unsatisfied dependencies will be left on the
	* queue until a future `vk_queue_flush()` call. This is used for
	* implementing emulated timeline semaphores without threading.
	*/
	VK_QUEUE_SUBMIT_MODE_DEFERRED,

	/** Submits will be added to the queue and handled later by a thread
	*
	* This places additional requirements on the vk_sync types used by the
	* driver:
	*
	* 1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also
	* support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can
	* sort out when a given submit has all its dependencies resolved.
	*
	* 2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT`
	* also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset
	* semaphores after waiting on them.
	*
	* 3. All vk_sync types used as permanent payloads of semaphores support
	* ``vk_sync_type::move`` so that it can move the pending signal into a
	* temporary vk_sync and reset the semaphore.
	*
	* This is requied for shared timeline semaphores where we need to handle
	* wait-before-signal by threading in the driver if we ever see an
	* unresolve dependency.
	*/
	VK_QUEUE_SUBMIT_MODE_THREADED,

	/** Threaded but only if we need it to resolve dependencies
	*
	* This imposes all the same requirements on `vk_sync` types as
	* `VK_QUEUE_SUBMIT_MODE_THREADED`.
	*/
	VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND,
	};

	struct vk_device_memory_report {
	PFN_vkDeviceMemoryReportCallbackEXT callback;
	void *data;
	};

	/** Base struct for VkDevice */
	struct vk_device {
	struct vk_object_base base;

	/** Allocator used to create this device
	*
	* This is used as a fall-back for when a NULL pAllocator is passed into a
	* device-level create function such as vkCreateImage().
	*/
	VkAllocationCallbacks alloc;

	/** Pointer to the physical device */
	struct vk_physical_device *physical;

	/** Table of enabled extensions */
	struct vk_device_extension_table enabled_extensions;

	/** Table of enabled features */
	struct vk_features enabled_features;

	/** Device-level dispatch table */
	struct vk_device_dispatch_table dispatch_table;

	/** Command dispatch table
	*
	* This is used for emulated secondary command buffer support. To use
	* emulated (trace/replay) secondary command buffers:
	*
	* 1. Provide your "real" command buffer dispatch table here. Because
	* this doesn't get populated by vk_device_init(), the driver will have
	* to add the vk_common entrypoints to this table itself.
	*
	* 2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device
	* level dispatch table.
	*/
	const struct vk_device_dispatch_table *command_dispatch_table;

	/** Command buffer vtable when using the common command pool */
	const struct vk_command_buffer_ops *command_buffer_ops;

	/** Shader vtable for VK_EXT_shader_object and common pipelines */
	const struct vk_device_shader_ops *shader_ops;

	/** Acceleration structure build vtable for common BVH building. */
	const struct vk_acceleration_structure_build_ops *as_build_ops;

	/**
	* Write data to a buffer from the command processor. This is simpler than
	* setting up a staging buffer and faster for small writes, but is not
	* meant for larger amounts of data. \p data is owned by the caller and the
	* driver is expected to write it out directly to the command stream as
	* part of an immediate write packet.
	*/
	void (*write_buffer_cp)(VkCommandBuffer cmdbuf, VkDeviceAddress addr,
	void *data, uint32_t size);

	/* Flush data written via write_buffer_cp. Users must use a normal pipeline
	* barrier in order to read this data, with the appropriate destination
	* access, but this replaces the source access mask.
	*/
	void (*flush_buffer_write_cp)(VkCommandBuffer cmdbuf);

	/* An unaligned dispatch function. This launches a number of threads that
	* may not be a multiple of the workgroup size, which may result in partial
	* workgroups.
	*/
	void (*cmd_dispatch_unaligned)(VkCommandBuffer cmdbuf,
	uint32_t invocations_x,
	uint32_t invocations_y,
	uint32_t invocations_z);

	/* vkCmdFillBuffer but with a device address. */
	void (*cmd_fill_buffer_addr)(VkCommandBuffer cmdbuf,
	VkDeviceAddress devAddr,
	VkDeviceSize size,
	uint32_t data);

	/** Driver provided callback for capturing traces
	*
	* Triggers for this callback are:
	* - Keyboard input (F12)
	* - Creation of a trigger file
	* - Reaching the trace frame
	*/
	VkResult (*capture_trace)(VkQueue queue);

	uint32_t current_frame;
	bool trace_hotkey_trigger;
	simple_mtx_t trace_mtx;

	/* For VK_EXT_private_data */
	uint32_t private_data_next_index;

	struct list_head queues;

	struct {
	int lost;
	bool reported;
	} _lost;

	/** Checks the status of this device
	*
	* This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
	* It is called before ``vk_queue::driver_submit`` and after every non-trivial
	* wait operation to ensure the device is still around. This gives the
	* driver a hook to ask the kernel if its device is still valid. If the
	* kernel says the device has been lost, it MUST call vk_device_set_lost().
	*
	* This function may be called from any thread at any time.
	*/
	VkResult (check_status)(struct vk_device device);

	/* Get the device timestamp in the VK_TIME_DOMAIN_DEVICE_KHR domain */
	VkResult (get_timestamp)(struct vk_device device, uint64_t *timestamp);

	/** Host time domain used for timestamp calibration */
	VkTimeDomainKHR calibrate_time_domain;
	/** Period of VK_TIME_DOMAIN_DEVICE_KHR */
	uint64_t device_time_domain_period;

	/** Creates a vk_sync that wraps a memory object
	*
	* This is always a one-shot object so it need not track any additional
	* state. Since it's intended for synchronizing between processes using
	* implicit synchronization mechanisms, no such tracking would be valid
	* anyway.
	*
	* If `signal_memory` is set, the resulting vk_sync will be used to signal
	* the memory object from a queue ``via vk_queue_submit::signals``. The common
	* code guarantees that, by the time vkQueueSubmit() returns, the signal
	* operation has been submitted to the kernel via the driver's
	* ``vk_queue::driver_submit`` hook. This means that any vkQueueSubmit() call
	* which needs implicit synchronization may block.
	*
	* If `signal_memory` is not set, it can be assumed that memory object
	* already has a signal operation pending from some other process and we
	* need only wait on it.
	*/
	VkResult (create_sync_for_memory)(struct vk_device device,
	VkDeviceMemory memory,
	bool signal_memory,
	struct vk_sync **sync_out);

	#if defined(USE_MAGMA)
	magma_connection_t magma_connection;
	#endif
	/* Set by vk_device_set_drm_fd() */
	struct util_sync_provider *sync;

	/** Implicit pipeline cache, or NULL */
	struct vk_pipeline_cache *mem_cache;

	/** An enum describing how timeline semaphores work */
	enum vk_device_timeline_mode {
	/** Timeline semaphores are not supported */
	VK_DEVICE_TIMELINE_MODE_NONE,

	/** Timeline semaphores are emulated with vk_timeline
	*
	* In this mode, timeline semaphores are emulated using vk_timeline
	* which is a collection of binary semaphores, one per time point.
	* These timeline semaphores cannot be shared because the data structure
	* exists entirely in userspace. These timelines are virtually
	* invisible to the driver; all it sees are the binary vk_syncs, one per
	* time point.
	*
	* To handle wait-before-signal, we place all vk_queue_submits in the
	* queue's submit list in vkQueueSubmit() and call vk_device_flush() at
	* key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
	* This ensures that, as soon as a given submit's dependencies are fully
	* resolvable, it gets submitted to the driver.
	*/
	VK_DEVICE_TIMELINE_MODE_EMULATED,

	/** Timeline semaphores are a kernel-assisted emulation
	*
	* In this mode, timeline semaphores are still technically an emulation
	* in the sense that they don't support wait-before-signal natively.
	* Instead, all GPU-waitable objects support a CPU wait-for-pending
	* operation which lets the userspace driver wait until a given event
	* on the (possibly shared) vk_sync is pending. The event is "pending"
	* if a job has been submitted to the kernel (possibly from a different
	* process) which will signal it. In vkQueueSubit, we use this wait
	* mode to detect waits which are not yet pending and, the first time we
	* do, spawn a thread to manage the queue. That thread waits for each
	* submit's waits to all be pending before submitting to the driver
	* queue.
	*
	* We have to be a bit more careful about a few things in this mode.
	* In particular, we can never assume that any given wait operation is
	* pending. For instance, when we go to export a sync file from a
	* binary semaphore, we need to first wait for it to be pending. The
	* spec guarantees that the vast majority of these waits return almost
	* immediately, but we do need to insert them for correctness.
	*/
	VK_DEVICE_TIMELINE_MODE_ASSISTED,

	/** Timeline semaphores are 100% native
	*
	* In this mode, wait-before-signal is natively supported by the
	* underlying timeline implementation. We can submit-and-forget and
	* assume that dependencies will get resolved for us by the kernel.
	* Currently, this isn't supported by any Linux primitives.
	*/
	VK_DEVICE_TIMELINE_MODE_NATIVE,
	} timeline_mode;

	/** Per-device submit mode
	*
	* This represents the device-wide submit strategy which may be different
	* from the per-queue submit mode. See vk_queue.submit.mode for more
	* details.
	*/
	enum vk_queue_submit_mode submit_mode;

	struct vk_memory_trace_data memory_trace_data;

	mtx_t swapchain_private_mtx;
	struct hash_table *swapchain_private;
	mtx_t swapchain_name_mtx;
	struct hash_table *swapchain_name;

	/* For VK_KHR_pipeline_binary */
	bool disable_internal_cache;

	struct vk_device_memory_report *memory_reports;
	uint32_t memory_report_count;
	};

	VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice,
	VK_OBJECT_TYPE_DEVICE);

	/** Initialize a vk_device
	*
	* Along with initializing the data structures in `vk_device`, this function
	* checks that every extension specified by
	* ``VkInstanceCreateInfo::ppEnabledExtensionNames`` is actually supported by
	* the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an
	* unsupported extension is requested. It also checks all the feature struct
	* chained into the `pCreateInfo->pNext` chain against the features returned
	* by `vkGetPhysicalDeviceFeatures2` and returns
	* `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested.
	*
	* :param device: \|out\| The device to initialize
	* :param physical_device: \|in\| The physical device
	* :param dispatch_table: \|in\| Device-level dispatch table
	* :param pCreateInfo: \|in\| VkDeviceCreateInfo pointer passed to
	* `vkCreateDevice()`
	* :param alloc: \|in\| Allocation callbacks passed to
	* `vkCreateDevice()`
	*/
	VkResult MUST_CHECK
	vk_device_init(struct vk_device *device,
	struct vk_physical_device *physical_device,
	const struct vk_device_dispatch_table *dispatch_table,
	const VkDeviceCreateInfo *pCreateInfo,
	const VkAllocationCallbacks *alloc);

	static inline void
	vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
	{
	device->sync = util_sync_provider_drm(drm_fd);
	}

	/** Tears down a vk_device
	*
	* :param device: \|out\| The device to tear down
	*/
	void
	vk_device_finish(struct vk_device *device);

	/** Enables threaded submit on this device
	*
	* This doesn't ensure that threaded submit will be used. It just disables
	* the deferred submit option for emulated timeline semaphores and forces them
	* to always use the threaded path. It also does some checks that the vk_sync
	* types used by the driver work for threaded submit.
	*
	* This must be called before any queues are created.
	*/
	void vk_device_enable_threaded_submit(struct vk_device *device);

	static inline bool
	vk_device_supports_threaded_submit(const struct vk_device *device)
	{
	return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED \|\|
	device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
	}

	VkResult vk_device_flush(struct vk_device *device);

	VkResult PRINTFLIKE(4, 5)
	_vk_device_set_lost(struct vk_device *device,
	const char *file, int line,
	const char *msg, ...);

	#define vk_device_set_lost(device, ...) \
	_vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__)

	void _vk_device_report_lost(struct vk_device *device);

	static inline bool
	vk_device_is_lost_no_report(struct vk_device *device)
	{
	return p_atomic_read(&device->_lost.lost) > 0;
	}

	static inline bool
	vk_device_is_lost(struct vk_device *device)
	{
	int lost = vk_device_is_lost_no_report(device);
	if (unlikely(lost && !device->_lost.reported))
	_vk_device_report_lost(device);
	return lost;
	}

	static inline VkResult
	vk_device_check_status(struct vk_device *device)
	{
	if (vk_device_is_lost(device))
	return VK_ERROR_DEVICE_LOST;

	if (!device->check_status)
	return VK_SUCCESS;

	VkResult result = device->check_status(device);

	assert(result == VK_SUCCESS \|\| result == VK_ERROR_DEVICE_LOST);
	if (result == VK_ERROR_DEVICE_LOST)
	assert(vk_device_is_lost_no_report(device));

	return result;
	}

	VkResult
	vk_device_get_timestamp(struct vk_device *device, VkTimeDomainKHR domain,
	uint64_t *timestamp);

	#ifndef _WIN32

	uint64_t
	vk_clock_gettime(clockid_t clock_id);

	#endif //!_WIN32

	static inline uint64_t
	vk_time_max_deviation(uint64_t begin, uint64_t end, uint64_t max_clock_period)
	{
	/*
	* The maximum deviation is the sum of the interval over which we
	* perform the sampling and the maximum period of any sampled
	* clock. That's because the maximum skew between any two sampled
	* clock edges is when the sampled clock with the largest period is
	* sampled at the end of that period but right at the beginning of the
	* sampling interval and some other clock is sampled right at the
	* beginning of its sampling period and right at the end of the
	* sampling interval. Let's assume the GPU has the longest clock
	* period and that the application is sampling GPU and monotonic:
	*
	* s e
	* w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
	* Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
	*
	* g
	* 0 1 2 3
	* GPU -----_____-----_____-----_____-----_____
	*
	* m
	* x y z 0 1 2 3 4 5 6 7 8 9 a b c
	* Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
	*
	* Interval <----------------->
	* Deviation <-------------------------->
	*
	* s = read(raw) 2
	* g = read(GPU) 1
	* m = read(monotonic) 2
	* e = read(raw) b
	*
	* We round the sample interval up by one tick to cover sampling error
	* in the interval clock
	*/

	uint64_t sample_interval = end - begin + 1;

	return sample_interval + max_clock_period;
	}

	PFN_vkVoidFunction
	vk_device_get_proc_addr(const struct vk_device *device,
	const char *name);

	#ifdef __cplusplus
	}
	#endif

	#endif /* VK_DEVICE_H */