blob: 41732858580d1a6d7446991ce980f94429d4e27c [file] [log] [blame]
/*
* Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include <assert.h>
#include <inttypes.h>
#include "util/hash_table.h"
#include "util/slab.h"
#include "drm/freedreno_ringbuffer.h"
#include "msm_priv.h"
/* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
* by avoiding the additional tracking necessary to build cmds/relocs tables
* (but still builds a bos table)
*/
#define INIT_SIZE 0x1000
struct msm_submit_sp {
struct fd_submit base;
DECLARE_ARRAY(struct drm_msm_gem_submit_bo, submit_bos);
DECLARE_ARRAY(struct fd_bo *, bos);
/* maps fd_bo to idx in bos table: */
struct hash_table *bo_table;
struct slab_child_pool ring_pool;
struct fd_ringbuffer *primary;
/* Allow for sub-allocation of stateobj ring buffers (ie. sharing
* the same underlying bo)..
*
* We also rely on previous stateobj having been fully constructed
* so we can reclaim extra space at it's end.
*/
struct fd_ringbuffer *suballoc_ring;
};
FD_DEFINE_CAST(fd_submit, msm_submit_sp);
/* for FD_RINGBUFFER_GROWABLE rb's, tracks the 'finalized' cmdstream buffers
* and sizes. Ie. a finalized buffer can have no more commands appended to
* it.
*/
struct msm_cmd_sp {
struct fd_bo *ring_bo;
unsigned size;
};
/* for _FD_RINGBUFFER_OBJECT rb's we need to track the bo's and flags to
* later copy into the submit when the stateobj rb is later referenced by
* a regular rb:
*/
struct msm_reloc_bo_sp {
struct fd_bo *bo;
unsigned flags;
};
struct msm_ringbuffer_sp {
struct fd_ringbuffer base;
/* for FD_RINGBUFFER_STREAMING rb's which are sub-allocated */
unsigned offset;
// TODO check disasm.. hopefully compilers CSE can realize that
// reloc_bos and cmds are at the same offsets and optimize some
// divergent cases into single case
union {
/* for _FD_RINGBUFFER_OBJECT case: */
struct {
struct fd_pipe *pipe;
DECLARE_ARRAY(struct msm_reloc_bo_sp, reloc_bos);
};
/* for other cases: */
struct {
struct fd_submit *submit;
DECLARE_ARRAY(struct msm_cmd_sp, cmds);
};
} u;
struct fd_bo *ring_bo;
};
FD_DEFINE_CAST(fd_ringbuffer, msm_ringbuffer_sp);
static void finalize_current_cmd(struct fd_ringbuffer *ring);
static struct fd_ringbuffer * msm_ringbuffer_sp_init(
struct msm_ringbuffer_sp *msm_ring,
uint32_t size, enum fd_ringbuffer_flags flags);
/* add (if needed) bo to submit and return index: */
static uint32_t
append_bo(struct msm_submit_sp *submit, struct fd_bo *bo, uint32_t flags)
{
struct msm_bo *msm_bo = to_msm_bo(bo);
uint32_t idx;
/* NOTE: it is legal to use the same bo on different threads for
* different submits. But it is not legal to use the same submit
* from given threads.
*/
idx = READ_ONCE(msm_bo->idx);
if (unlikely((idx >= submit->nr_submit_bos) ||
(submit->submit_bos[idx].handle != bo->handle))) {
uint32_t hash = _mesa_hash_pointer(bo);
struct hash_entry *entry;
entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
if (entry) {
/* found */
idx = (uint32_t)(uintptr_t)entry->data;
} else {
idx = APPEND(submit, submit_bos);
idx = APPEND(submit, bos);
submit->submit_bos[idx].flags = 0;
submit->submit_bos[idx].handle = bo->handle;
submit->submit_bos[idx].presumed = 0;
submit->bos[idx] = fd_bo_ref(bo);
_mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
(void *)(uintptr_t)idx);
}
msm_bo->idx = idx;
}
if (flags & FD_RELOC_READ)
submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_READ;
if (flags & FD_RELOC_WRITE)
submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_WRITE;
if (flags & FD_RELOC_DUMP)
submit->submit_bos[idx].flags |= MSM_SUBMIT_BO_DUMP;
return idx;
}
static void
msm_submit_suballoc_ring_bo(struct fd_submit *submit,
struct msm_ringbuffer_sp *msm_ring, uint32_t size)
{
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
unsigned suballoc_offset = 0;
struct fd_bo *suballoc_bo = NULL;
if (msm_submit->suballoc_ring) {
struct msm_ringbuffer_sp *suballoc_ring =
to_msm_ringbuffer_sp(msm_submit->suballoc_ring);
suballoc_bo = suballoc_ring->ring_bo;
suballoc_offset = fd_ringbuffer_size(msm_submit->suballoc_ring) +
suballoc_ring->offset;
suballoc_offset = align(suballoc_offset, 0x10);
if ((size + suballoc_offset) > suballoc_bo->size) {
suballoc_bo = NULL;
}
}
if (!suballoc_bo) {
// TODO possibly larger size for streaming bo?
msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev,
0x8000, DRM_FREEDRENO_GEM_GPUREADONLY);
msm_ring->offset = 0;
} else {
msm_ring->ring_bo = fd_bo_ref(suballoc_bo);
msm_ring->offset = suballoc_offset;
}
struct fd_ringbuffer *old_suballoc_ring = msm_submit->suballoc_ring;
msm_submit->suballoc_ring = fd_ringbuffer_ref(&msm_ring->base);
if (old_suballoc_ring)
fd_ringbuffer_del(old_suballoc_ring);
}
static struct fd_ringbuffer *
msm_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
enum fd_ringbuffer_flags flags)
{
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
struct msm_ringbuffer_sp *msm_ring;
msm_ring = slab_alloc(&msm_submit->ring_pool);
msm_ring->u.submit = submit;
/* NOTE: needs to be before _suballoc_ring_bo() since it could
* increment the refcnt of the current ring
*/
msm_ring->base.refcnt = 1;
if (flags & FD_RINGBUFFER_STREAMING) {
msm_submit_suballoc_ring_bo(submit, msm_ring, size);
} else {
if (flags & FD_RINGBUFFER_GROWABLE)
size = INIT_SIZE;
msm_ring->offset = 0;
msm_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size,
DRM_FREEDRENO_GEM_GPUREADONLY);
}
if (!msm_ringbuffer_sp_init(msm_ring, size, flags))
return NULL;
if (flags & FD_RINGBUFFER_PRIMARY) {
debug_assert(!msm_submit->primary);
msm_submit->primary = fd_ringbuffer_ref(&msm_ring->base);
}
return &msm_ring->base;
}
static int
msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
int *out_fence_fd, uint32_t *out_fence)
{
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
struct drm_msm_gem_submit req = {
.flags = msm_pipe->pipe,
.queueid = msm_pipe->queue_id,
};
int ret;
debug_assert(msm_submit->primary);
finalize_current_cmd(msm_submit->primary);
struct msm_ringbuffer_sp *primary = to_msm_ringbuffer_sp(msm_submit->primary);
struct drm_msm_gem_submit_cmd cmds[primary->u.nr_cmds];
for (unsigned i = 0; i < primary->u.nr_cmds; i++) {
cmds[i].type = MSM_SUBMIT_CMD_BUF;
cmds[i].submit_idx = append_bo(msm_submit,
primary->u.cmds[i].ring_bo, FD_RELOC_READ | FD_RELOC_DUMP);
cmds[i].submit_offset = primary->offset;
cmds[i].size = primary->u.cmds[i].size;
cmds[i].pad = 0;
cmds[i].nr_relocs = 0;
}
if (in_fence_fd != -1) {
req.flags |= MSM_SUBMIT_FENCE_FD_IN | MSM_SUBMIT_NO_IMPLICIT;
req.fence_fd = in_fence_fd;
}
if (out_fence_fd) {
req.flags |= MSM_SUBMIT_FENCE_FD_OUT;
}
/* needs to be after get_cmd() as that could create bos/cmds table: */
req.bos = VOID2U64(msm_submit->submit_bos),
req.nr_bos = msm_submit->nr_submit_bos;
req.cmds = VOID2U64(cmds),
req.nr_cmds = primary->u.nr_cmds;
DEBUG_MSG("nr_cmds=%u, nr_bos=%u", req.nr_cmds, req.nr_bos);
ret = drmCommandWriteRead(submit->pipe->dev->fd, DRM_MSM_GEM_SUBMIT,
&req, sizeof(req));
if (ret) {
ERROR_MSG("submit failed: %d (%s)", ret, strerror(errno));
msm_dump_submit(&req);
} else if (!ret) {
if (out_fence)
*out_fence = req.fence;
if (out_fence_fd)
*out_fence_fd = req.fence_fd;
}
return ret;
}
static void
msm_submit_sp_destroy(struct fd_submit *submit)
{
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
if (msm_submit->primary)
fd_ringbuffer_del(msm_submit->primary);
if (msm_submit->suballoc_ring)
fd_ringbuffer_del(msm_submit->suballoc_ring);
_mesa_hash_table_destroy(msm_submit->bo_table, NULL);
// TODO it would be nice to have a way to debug_assert() if all
// rb's haven't been free'd back to the slab, because that is
// an indication that we are leaking bo's
slab_destroy_child(&msm_submit->ring_pool);
for (unsigned i = 0; i < msm_submit->nr_bos; i++)
fd_bo_del(msm_submit->bos[i]);
free(msm_submit->submit_bos);
free(msm_submit->bos);
free(msm_submit);
}
static const struct fd_submit_funcs submit_funcs = {
.new_ringbuffer = msm_submit_sp_new_ringbuffer,
.flush = msm_submit_sp_flush,
.destroy = msm_submit_sp_destroy,
};
struct fd_submit *
msm_submit_sp_new(struct fd_pipe *pipe)
{
struct msm_submit_sp *msm_submit = calloc(1, sizeof(*msm_submit));
struct fd_submit *submit;
msm_submit->bo_table = _mesa_hash_table_create(NULL,
_mesa_hash_pointer, _mesa_key_pointer_equal);
slab_create_child(&msm_submit->ring_pool, &to_msm_pipe(pipe)->ring_pool);
submit = &msm_submit->base;
submit->pipe = pipe;
submit->funcs = &submit_funcs;
return submit;
}
void
msm_pipe_sp_ringpool_init(struct msm_pipe *msm_pipe)
{
// TODO tune size:
slab_create_parent(&msm_pipe->ring_pool, sizeof(struct msm_ringbuffer_sp), 16);
}
void
msm_pipe_sp_ringpool_fini(struct msm_pipe *msm_pipe)
{
if (msm_pipe->ring_pool.num_elements)
slab_destroy_parent(&msm_pipe->ring_pool);
}
static void
finalize_current_cmd(struct fd_ringbuffer *ring)
{
debug_assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
unsigned idx = APPEND(&msm_ring->u, cmds);
msm_ring->u.cmds[idx].ring_bo = fd_bo_ref(msm_ring->ring_bo);
msm_ring->u.cmds[idx].size = offset_bytes(ring->cur, ring->start);
}
static void
msm_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
{
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
struct fd_pipe *pipe = msm_ring->u.submit->pipe;
debug_assert(ring->flags & FD_RINGBUFFER_GROWABLE);
finalize_current_cmd(ring);
fd_bo_del(msm_ring->ring_bo);
msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
DRM_FREEDRENO_GEM_GPUREADONLY);
ring->start = fd_bo_map(msm_ring->ring_bo);
ring->end = &(ring->start[size/4]);
ring->cur = ring->start;
ring->size = size;
}
static void
msm_ringbuffer_sp_emit_reloc(struct fd_ringbuffer *ring,
const struct fd_reloc *reloc)
{
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
struct fd_pipe *pipe;
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
unsigned idx = APPEND(&msm_ring->u, reloc_bos);
msm_ring->u.reloc_bos[idx].bo = fd_bo_ref(reloc->bo);
msm_ring->u.reloc_bos[idx].flags = reloc->flags;
pipe = msm_ring->u.pipe;
} else {
struct msm_submit_sp *msm_submit =
to_msm_submit_sp(msm_ring->u.submit);
append_bo(msm_submit, reloc->bo, reloc->flags);
pipe = msm_ring->u.submit->pipe;
}
uint64_t iova = fd_bo_get_iova(reloc->bo) + reloc->offset;
int shift = reloc->shift;
if (shift < 0)
iova >>= -shift;
else
iova <<= shift;
uint32_t dword = iova;
(*ring->cur++) = dword | reloc->or;
if (pipe->gpu_id >= 500) {
dword = iova >> 32;
(*ring->cur++) = dword | reloc->orhi;
}
}
static uint32_t
msm_ringbuffer_sp_emit_reloc_ring(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx)
{
struct msm_ringbuffer_sp *msm_target = to_msm_ringbuffer_sp(target);
struct fd_bo *bo;
uint32_t size;
if ((target->flags & FD_RINGBUFFER_GROWABLE) &&
(cmd_idx < msm_target->u.nr_cmds)) {
bo = msm_target->u.cmds[cmd_idx].ring_bo;
size = msm_target->u.cmds[cmd_idx].size;
} else {
bo = msm_target->ring_bo;
size = offset_bytes(target->cur, target->start);
}
msm_ringbuffer_sp_emit_reloc(ring, &(struct fd_reloc){
.bo = bo,
.flags = FD_RELOC_READ | FD_RELOC_DUMP,
.offset = msm_target->offset,
});
if (!(target->flags & _FD_RINGBUFFER_OBJECT))
return size;
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
unsigned idx = APPEND(&msm_ring->u, reloc_bos);
msm_ring->u.reloc_bos[idx].bo =
fd_bo_ref(msm_target->u.reloc_bos[i].bo);
msm_ring->u.reloc_bos[idx].flags =
msm_target->u.reloc_bos[i].flags;
}
} else {
// TODO it would be nice to know whether we have already
// seen this target before. But hopefully we hit the
// append_bo() fast path enough for this to not matter:
struct msm_submit_sp *msm_submit = to_msm_submit_sp(msm_ring->u.submit);
for (unsigned i = 0; i < msm_target->u.nr_reloc_bos; i++) {
append_bo(msm_submit, msm_target->u.reloc_bos[i].bo,
msm_target->u.reloc_bos[i].flags);
}
}
return size;
}
static uint32_t
msm_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
{
if (ring->flags & FD_RINGBUFFER_GROWABLE)
return to_msm_ringbuffer_sp(ring)->u.nr_cmds + 1;
return 1;
}
static void
msm_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
{
struct msm_ringbuffer_sp *msm_ring = to_msm_ringbuffer_sp(ring);
fd_bo_del(msm_ring->ring_bo);
if (ring->flags & _FD_RINGBUFFER_OBJECT) {
for (unsigned i = 0; i < msm_ring->u.nr_reloc_bos; i++) {
fd_bo_del(msm_ring->u.reloc_bos[i].bo);
}
free(msm_ring->u.reloc_bos);
free(msm_ring);
} else {
struct fd_submit *submit = msm_ring->u.submit;
for (unsigned i = 0; i < msm_ring->u.nr_cmds; i++) {
fd_bo_del(msm_ring->u.cmds[i].ring_bo);
}
free(msm_ring->u.cmds);
slab_free(&to_msm_submit_sp(submit)->ring_pool, msm_ring);
}
}
static const struct fd_ringbuffer_funcs ring_funcs = {
.grow = msm_ringbuffer_sp_grow,
.emit_reloc = msm_ringbuffer_sp_emit_reloc,
.emit_reloc_ring = msm_ringbuffer_sp_emit_reloc_ring,
.cmd_count = msm_ringbuffer_sp_cmd_count,
.destroy = msm_ringbuffer_sp_destroy,
};
static inline struct fd_ringbuffer *
msm_ringbuffer_sp_init(struct msm_ringbuffer_sp *msm_ring, uint32_t size,
enum fd_ringbuffer_flags flags)
{
struct fd_ringbuffer *ring = &msm_ring->base;
debug_assert(msm_ring->ring_bo);
uint8_t *base = fd_bo_map(msm_ring->ring_bo);
ring->start = (void *)(base + msm_ring->offset);
ring->end = &(ring->start[size/4]);
ring->cur = ring->start;
ring->size = size;
ring->flags = flags;
ring->funcs = &ring_funcs;
// TODO initializing these could probably be conditional on flags
// since unneed for FD_RINGBUFFER_STAGING case..
msm_ring->u.cmds = NULL;
msm_ring->u.nr_cmds = msm_ring->u.max_cmds = 0;
msm_ring->u.reloc_bos = NULL;
msm_ring->u.nr_reloc_bos = msm_ring->u.max_reloc_bos = 0;
return ring;
}
struct fd_ringbuffer *
msm_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
{
struct msm_ringbuffer_sp *msm_ring = malloc(sizeof(*msm_ring));
msm_ring->u.pipe = pipe;
msm_ring->offset = 0;
msm_ring->ring_bo = fd_bo_new_ring(pipe->dev, size,
DRM_FREEDRENO_GEM_GPUREADONLY);
msm_ring->base.refcnt = 1;
return msm_ringbuffer_sp_init(msm_ring, size, _FD_RINGBUFFER_OBJECT);
}