| // Copyright 2019 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // |
| // |
| // |
| |
| #include "path_builder_impl.h" |
| |
| #include <float.h> |
| #include <memory.h> |
| #include <stdlib.h> |
| |
| #include "block_pool.h" |
| #include "common/macros.h" |
| #include "common/vk/assert.h" |
| #include "common/vk/barrier.h" |
| #include "core_c.h" |
| #include "device.h" |
| #include "flush.h" |
| #include "handle_pool.h" |
| #include "queue_pool.h" |
| #include "ring.h" |
| #include "shaders/push.h" |
| #include "spinel/spinel_assert.h" |
| |
| // |
| // Verify the path header size matches core.h |
| // |
| STATIC_ASSERT_MACRO_1(sizeof(union spinel_path_header) == SPN_PATH_HEAD_DWORDS * sizeof(uint32_t)); |
| |
| // |
| // The path builder moves bulk path data, nodes and a single header from the |
| // host into the device-managed "block" memory pool. The data is arranged into |
| // a SIMT/SIMD-friendly data structure that can be efficiently read by the |
| // rasterizer. |
| // |
| // A simplifying assumption is that the maximum length of a single path can't be |
| // larger than what fits in the path builder ring. |
| // |
| // If a path is too long then the path builder instance is lost. |
| // |
| // Note that this restriction can be removed with added complexity to the |
| // builder and shader. |
| // |
| // Also note that for some systems, it may be appropriate to never pull path |
| // data into the device-managed block pool and instead present the path data to |
| // the device in a temporarily available allocated memory "zone" of paths that |
| // can be discarded all at once. |
| // |
| // For other systems, it may be appropriate to simply copy the path data from |
| // host to device. |
| // |
| // The general strategy that this particular Vulkan implementation uses is to |
| // allocate a large "HOST_COHERENT" bulk-data path buffer and an auxiliary |
| // mappable command buffer. |
| // |
| // The work-in-progress path's header and latest node are updated locally until |
| // full and then stored because the mapped HOST_COHERENT memory is likely |
| // uncached and read-modify-writes will be expensive. |
| // |
| // A line/quad/cubic/rat_quad/rat_cubic acquires 4/6/8/7/10 segments which may |
| // be spread across one or more contiguous blocks. |
| // |
| // If a flush() occurs, then the remaining columns of multi-segment paths are |
| // initialized with zero-length path primitives. |
| // |
| // Every block's command word has a type and a count acquired from a rolling |
| // counter. |
| // |
| // Note that the maximum number of "in-flight" path copy grids is conveniently |
| // determined by the size of the fence pool. |
| // |
| |
| // |
| // A dispatch record represents a contiguous region of the ring that can be |
| // copied to or read from the device. |
| // |
| // There should be enough dispatch records available so that if they're all in |
| // flight then either a PCIe or memory bandwidth "roofline" limit is reached. |
| // |
| // The expectation is that the path builder will *not* be CPU bound. |
| // |
| // The number of dispatch records is defined in the target's config data |
| // structure. |
| // |
| struct spinel_pbi_head_span |
| { |
| uint32_t head; |
| uint32_t span; |
| }; |
| |
| struct spinel_pbi_dispatch |
| { |
| struct spinel_pbi_head_span blocks; |
| struct spinel_pbi_head_span paths; |
| |
| uint32_t rolling; // FIXME(allanmac): move to wip |
| |
| struct |
| { |
| spinel_deps_delayed_semaphore_t delayed; |
| } signal; |
| }; |
| |
| // |
| // |
| // |
| |
| struct spinel_pbi_vk |
| { |
| struct spinel_dbi_dm_devaddr alloc; |
| struct spinel_dbi_dm_devaddr ring; |
| }; |
| |
| // |
| // |
| // |
| |
| struct spinel_path_builder_impl |
| { |
| struct spinel_path_builder * path_builder; |
| struct spinel_device * device; |
| struct spinel_pbi_vk vk; |
| |
| struct |
| { |
| uint32_t block_dwords; |
| uint32_t block_subgroups; |
| uint32_t subgroup_dwords; |
| uint32_t subgroup_subblocks; |
| uint32_t rolling_one; |
| uint32_t eager_size; |
| } config; |
| |
| // |
| // block and cmd rings share a buffer |
| // |
| // [<--- blocks --->|<--- cmds --->] |
| // |
| struct |
| { |
| struct spinel_ring ring; |
| |
| uint32_t rolling; |
| |
| struct |
| { |
| uint32_t rem; |
| float * f32; |
| } subgroups; |
| |
| union |
| { |
| uint32_t * u32; |
| float * f32; |
| // add head and node structures |
| } blocks; |
| |
| uint32_t * cmds; |
| } mapped; |
| |
| // |
| // work in progress header |
| // |
| struct |
| { |
| union spinel_path_header header; |
| |
| uint32_t * node; |
| |
| struct |
| { |
| uint32_t idx; |
| uint32_t rolling; |
| } head; |
| |
| struct |
| { |
| uint32_t rolling; |
| } segs; |
| |
| uint32_t rem; |
| } wip; |
| |
| // |
| // Resources released upon an grid completion: |
| // |
| // - Path handles are released immediately. |
| // |
| // - Dispatch records and associated mapped spans are released in |
| // ring order. |
| // |
| // Note that there can only be as many paths as there are blocks |
| // (empty paths have a header block) so this resource is implicitly |
| // managed by the mapped.ring and release.dispatch.ring. |
| // |
| struct |
| { |
| spinel_handle_t * extent; |
| struct spinel_next next; |
| } paths; |
| |
| struct |
| { |
| struct spinel_pbi_dispatch * extent; |
| struct spinel_ring ring; |
| } dispatches; |
| }; |
| |
| // |
| // |
| // |
| static spinel_result_t |
| spinel_pbi_lost_begin(struct spinel_path_builder_impl * impl) |
| { |
| return SPN_ERROR_PATH_BUILDER_LOST; |
| } |
| |
| static spinel_result_t |
| spinel_pbi_lost_end(struct spinel_path_builder_impl * impl, spinel_path_t * path) |
| { |
| *path = SPN_PATH_INVALID; |
| |
| return SPN_ERROR_PATH_BUILDER_LOST; |
| } |
| |
| static spinel_result_t |
| spinel_pbi_release(struct spinel_path_builder_impl * impl); |
| |
| static spinel_result_t |
| spinel_pbi_lost_release(struct spinel_path_builder_impl * impl) |
| { |
| // |
| // FIXME -- releasing a lost path builder might eventually require a |
| // specialized function. For now, just call the default release. |
| // |
| return spinel_pbi_release(impl); |
| } |
| |
| static spinel_result_t |
| spinel_pbi_lost_flush(struct spinel_path_builder_impl * impl) |
| { |
| return SPN_ERROR_PATH_BUILDER_LOST; |
| } |
| |
| // |
| // Define primitive geometry "lost" pfns |
| // |
| #define SPN_PBI_PFN_LOST_NAME(_p) spinel_pbi_lost_##_p |
| |
| #undef SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X |
| #define SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X(_p, _i, _n) \ |
| static spinel_result_t SPN_PBI_PFN_LOST_NAME(_p)(struct spinel_path_builder_impl * impl) \ |
| { \ |
| return SPN_ERROR_PATH_BUILDER_LOST; \ |
| } |
| |
| SPN_PATH_BUILDER_PRIM_TYPE_EXPAND() |
| |
| // |
| // If (wip.span == mapped.ring.size) then the path is too long and the |
| // path builder is terminally "lost". The path builder should be |
| // released and a new one created. |
| // |
| static void |
| spinel_pbi_lost(struct spinel_path_builder_impl * impl) |
| { |
| struct spinel_path_builder * pb = impl->path_builder; |
| |
| pb->begin = spinel_pbi_lost_begin; |
| pb->end = spinel_pbi_lost_end; |
| pb->release = spinel_pbi_lost_release; |
| pb->flush = spinel_pbi_lost_flush; |
| |
| #undef SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X |
| #define SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X(_p, _i, _n) pb->_p = SPN_PBI_PFN_LOST_NAME(_p); |
| |
| SPN_PATH_BUILDER_PRIM_TYPE_EXPAND() |
| } |
| |
| // |
| // Append path to path release extent -- note that this resource is |
| // implicitly "clocked" by the mapped.ring. |
| // |
| static void |
| spinel_pbi_path_append(struct spinel_path_builder_impl * impl, spinel_path_t const * path) |
| { |
| uint32_t const idx = spinel_next_acquire_1(&impl->paths.next); |
| |
| impl->paths.extent[idx] = path->handle; |
| } |
| |
| // |
| // A dispatch captures how many paths and blocks are in a dispatched or |
| // the work-in-progress compute grid. |
| // |
| static struct spinel_pbi_dispatch * |
| spinel_pbi_dispatch_head(struct spinel_path_builder_impl * impl) |
| { |
| assert(!spinel_ring_is_empty(&impl->dispatches.ring)); |
| |
| return impl->dispatches.extent + impl->dispatches.ring.head; |
| } |
| |
| static struct spinel_pbi_dispatch * |
| spinel_pbi_dispatch_tail(struct spinel_path_builder_impl * impl) |
| { |
| assert(!spinel_ring_is_full(&impl->dispatches.ring)); |
| |
| return impl->dispatches.extent + impl->dispatches.ring.tail; |
| } |
| |
| static void |
| spinel_pbi_dispatch_head_init(struct spinel_path_builder_impl * impl) |
| { |
| *spinel_pbi_dispatch_head(impl) = (struct spinel_pbi_dispatch){ |
| .blocks = { .head = impl->wip.head.idx, // |
| .span = 0 }, // |
| .paths = { .head = impl->paths.next.head, // |
| .span = 0 }, // |
| .rolling = impl->wip.head.rolling, // |
| .signal = { .delayed = SPN_DEPS_DELAYED_SEMAPHORE_INVALID }, |
| }; |
| } |
| |
| static void |
| spinel_pbi_dispatch_drop(struct spinel_path_builder_impl * impl) |
| { |
| struct spinel_ring * const ring = &impl->dispatches.ring; |
| |
| spinel_ring_drop_1(ring); |
| } |
| |
| static void |
| spinel_pbi_dispatch_acquire(struct spinel_path_builder_impl * impl) |
| { |
| struct spinel_ring * const ring = &impl->dispatches.ring; |
| struct spinel_device * const device = impl->device; |
| |
| while (spinel_ring_is_empty(ring)) |
| { |
| spinel_deps_drain_1(device->deps, &device->vk); |
| } |
| |
| spinel_pbi_dispatch_head_init(impl); |
| } |
| |
| static void |
| spinel_pbi_dispatch_append(struct spinel_path_builder_impl * impl, |
| struct spinel_pbi_dispatch * dispatch, |
| spinel_path_t const * path) |
| { |
| spinel_pbi_path_append(impl, path); |
| |
| // clang-format off |
| dispatch->blocks.span += impl->wip.header.named.blocks; |
| dispatch->paths.span += 1; |
| // clang-format on |
| } |
| |
| // |
| // |
| // |
| static void |
| spinel_pbi_flush_complete(void * data0, void * data1) |
| { |
| struct spinel_path_builder_impl * const impl = data0; |
| struct spinel_pbi_dispatch * const dispatch = data1; |
| struct spinel_device * const device = impl->device; |
| |
| // |
| // These path handles are now materialized |
| // |
| spinel_deps_delayed_detach_ring(device->deps, |
| impl->paths.extent, |
| impl->paths.next.size, |
| dispatch->paths.head, |
| dispatch->paths.span); |
| |
| // |
| // Release the paths -- may invoke wait() |
| // |
| spinel_device_release_d_paths_ring(device, |
| impl->paths.extent, |
| impl->paths.next.size, |
| dispatch->paths.head, |
| dispatch->paths.span); |
| // |
| // If the dispatch is the tail of the ring then try to release as |
| // many dispatch records as possible... |
| // |
| // Note that kernels can complete in any order so the release |
| // records need to add to the mapped.ring.tail in order. |
| // |
| dispatch->signal.delayed = SPN_DEPS_DELAYED_SEMAPHORE_INVALID; |
| |
| struct spinel_pbi_dispatch * tail = spinel_pbi_dispatch_tail(impl); |
| |
| while (tail->signal.delayed == SPN_DEPS_DELAYED_SEMAPHORE_INVALID) |
| { |
| // release the blocks and cmds |
| spinel_ring_release_n(&impl->mapped.ring, tail->blocks.span); |
| |
| // release the dispatch |
| spinel_ring_release_n(&impl->dispatches.ring, 1); |
| |
| // any dispatches in flight? |
| if (spinel_ring_is_full(&impl->dispatches.ring)) |
| { |
| break; |
| } |
| |
| // get new tail |
| tail = spinel_pbi_dispatch_tail(impl); |
| } |
| } |
| |
| // |
| // |
| // |
| static VkPipelineStageFlags |
| spinel_pbi_flush_record(VkCommandBuffer cb, void * data0, void * data1) |
| { |
| struct spinel_path_builder_impl * const impl = data0; |
| struct spinel_pbi_dispatch * const dispatch = data1; |
| struct spinel_device * const device = impl->device; |
| |
| //////////////////////////////////////////////////////////////// |
| // |
| // PATHS ALLOC |
| // |
| //////////////////////////////////////////////////////////////// |
| |
| struct spinel_push_paths_alloc const push_paths_alloc = { |
| |
| .devaddr_block_pool_ids = device->block_pool.vk.dbi_devaddr.ids.devaddr, |
| .devaddr_paths_copy_alloc = impl->vk.alloc.devaddr, |
| .pc_alloc_idx = impl->dispatches.ring.head, |
| .pc_span = dispatch->blocks.span |
| }; |
| |
| vkCmdPushConstants(cb, |
| device->ti.pipeline_layouts.named.paths_alloc, |
| VK_SHADER_STAGE_COMPUTE_BIT, |
| 0, |
| sizeof(push_paths_alloc), |
| &push_paths_alloc); |
| |
| vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, device->ti.pipelines.named.paths_alloc); |
| |
| vkCmdDispatch(cb, 1, 1, 1); |
| |
| //////////////////////////////////////////////////////////////// |
| // |
| // BARRIER: COMPUTE>COMPUTE |
| // |
| //////////////////////////////////////////////////////////////// |
| |
| vk_barrier_compute_w_to_compute_r(cb); |
| |
| //////////////////////////////////////////////////////////////// |
| // |
| // PATHS COPY |
| // |
| //////////////////////////////////////////////////////////////// |
| |
| struct spinel_push_paths_copy const push_paths_copy = { |
| |
| .devaddr_block_pool_ids = device->block_pool.vk.dbi_devaddr.ids.devaddr, |
| .devaddr_block_pool_blocks = device->block_pool.vk.dbi_devaddr.blocks.devaddr, |
| .devaddr_block_pool_host_map = device->block_pool.vk.dbi_devaddr.host_map.devaddr, |
| .devaddr_paths_copy_alloc = impl->vk.alloc.devaddr, |
| .devaddr_paths_copy_ring = impl->vk.ring.devaddr, |
| .bp_mask = device->block_pool.bp_mask, |
| .pc_alloc_idx = impl->dispatches.ring.head, |
| .pc_span = dispatch->blocks.span, |
| .pc_head = dispatch->blocks.head, |
| .pc_rolling = dispatch->rolling, |
| .pc_size = impl->mapped.ring.size, |
| }; |
| |
| vkCmdPushConstants(cb, |
| device->ti.pipeline_layouts.named.paths_copy, |
| VK_SHADER_STAGE_COMPUTE_BIT, |
| 0, |
| sizeof(push_paths_copy), |
| &push_paths_copy); |
| |
| vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, device->ti.pipelines.named.paths_copy); |
| |
| // |
| // Dispatch one subgroup per block |
| // |
| struct spinel_target_config const * const config = &device->ti.config; |
| |
| uint32_t const sgs_per_wg = config->group_sizes.named.paths_copy.workgroup >> |
| config->group_sizes.named.paths_copy.subgroup_log2; |
| |
| uint32_t const wg_count = (dispatch->blocks.span + sgs_per_wg - 1) / sgs_per_wg; |
| |
| vkCmdDispatch(cb, wg_count, 1, 1); |
| |
| // |
| // NOTE(allanmac): |
| // |
| // The `deps` scheduler assumes that the command buffers associated with |
| // delayed semaphores always end with a with a compute shader |
| // (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT). |
| // |
| // Only the path builder and raster builder acquire delayes semaphores. |
| // |
| return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; |
| } |
| |
| // |
| // |
| // |
| static void |
| spinel_pbi_flush_submit(void * data0, void * data1) |
| { |
| struct spinel_path_builder_impl * const impl = data0; |
| struct spinel_pbi_dispatch * const dispatch = data1; |
| struct spinel_device * const device = impl->device; |
| |
| assert(dispatch->paths.span > 0); |
| |
| // |
| // Flush if the ring is non-coherent |
| // |
| if (!spinel_allocator_is_coherent(&device->allocator.device.perm.hw_dr)) |
| { |
| VkDeviceSize const block_size = sizeof(uint32_t) * impl->config.block_dwords; |
| VkDeviceSize const blocks_ring_size = block_size * impl->mapped.ring.size; |
| |
| // Flush blocks |
| spinel_ring_flush(&device->vk, |
| impl->vk.ring.dbi_dm.dm, |
| 0, |
| impl->mapped.ring.size, |
| dispatch->blocks.head, |
| dispatch->blocks.span, |
| block_size); |
| |
| // Flush commands |
| spinel_ring_flush(&device->vk, |
| impl->vk.ring.dbi_dm.dm, |
| blocks_ring_size, |
| impl->mapped.ring.size, |
| dispatch->blocks.head, |
| dispatch->blocks.span, |
| sizeof(uint32_t)); |
| } |
| |
| // |
| // Acquire an immediate semaphore |
| // |
| // Doesn't wait on any handles. |
| // |
| struct spinel_deps_immediate_submit_info const disi = { |
| .record = { |
| .pfn = spinel_pbi_flush_record, |
| .data0 = impl, |
| .data1 = dispatch, |
| }, |
| // |
| // Path builder has no delayed handle dependency |
| // |
| .completion = { |
| .pfn = spinel_pbi_flush_complete, |
| .data0 = impl, |
| .data1 = dispatch, |
| }, |
| .signal = { |
| .delayed = { |
| .count = 1, |
| .semaphores = { |
| dispatch->signal.delayed, |
| }, |
| }, |
| }, |
| }; |
| |
| // |
| // The current dispatch is now sealed so drop it |
| // |
| spinel_pbi_dispatch_drop(impl); |
| |
| // |
| // We don't need to save the returned immediate semaphore. |
| // |
| spinel_deps_immediate_submit(device->deps, &device->vk, &disi, NULL); |
| |
| // |
| // Acquire and initialize the next dispatch |
| // |
| spinel_pbi_dispatch_acquire(impl); |
| } |
| |
| // |
| // |
| // |
| static spinel_result_t |
| spinel_pbi_flush(struct spinel_path_builder_impl * impl) |
| { |
| // |
| // Anything to launch? |
| // |
| struct spinel_pbi_dispatch * const dispatch = spinel_pbi_dispatch_head(impl); |
| |
| if (dispatch->paths.span == 0) |
| { |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // Invoke the delayed submission action |
| // |
| spinel_deps_delayed_flush(impl->device->deps, dispatch->signal.delayed); |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // Before returning a path handle, any remaining coordinates in the |
| // subgroups(s) are finalized with zero-length primitives. |
| // |
| static void |
| spinel_pb_cn_coords_zero(float * coords, uint32_t rem) |
| { |
| do |
| { |
| *coords++ = 0.0f; |
| } while (--rem > 0); |
| } |
| |
| static void |
| spinel_pb_cn_coords_finalize(float * coords[], uint32_t coords_len, uint32_t rem) |
| { |
| do |
| { |
| spinel_pb_cn_coords_zero(*coords++, rem); |
| } while (--coords_len > 0); |
| } |
| |
| static void |
| spinel_pb_finalize_subgroups(struct spinel_path_builder_impl * impl) |
| { |
| struct spinel_path_builder * const pb = impl->path_builder; |
| |
| // |
| // Note that this zeroes a cacheline / subblock at a time |
| // |
| #undef SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X |
| #define SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X(_p, _i, _n) \ |
| { \ |
| uint32_t rem = pb->cn.rem._p; \ |
| \ |
| if (rem > 0) \ |
| { \ |
| pb->cn.rem._p = 0; \ |
| \ |
| spinel_pb_cn_coords_finalize(pb->cn.coords._p, _n, rem); \ |
| } \ |
| } |
| |
| SPN_PATH_BUILDER_PRIM_TYPE_EXPAND() |
| } |
| |
| // |
| // |
| // |
| static void |
| spinel_pbi_cmd_append(struct spinel_path_builder_impl * impl, |
| uint32_t const idx, |
| uint32_t const type) |
| { |
| uint32_t const rolling = impl->mapped.rolling; |
| uint32_t const cmd = rolling | type; |
| |
| impl->mapped.cmds[idx] = cmd; |
| impl->mapped.rolling = rolling + impl->config.rolling_one; |
| |
| impl->wip.header.named.blocks += 1; |
| } |
| |
| // |
| // |
| // |
| static void |
| spinel_pbi_node_append_next(struct spinel_path_builder_impl * impl) |
| { |
| // no need to increment the node pointer |
| *impl->wip.node = impl->mapped.rolling | SPN_BLOCK_ID_TAG_PATH_NEXT; |
| } |
| |
| // |
| // |
| // |
| static uint32_t |
| spinel_pbi_acquire_head_block(struct spinel_path_builder_impl * impl) |
| { |
| struct spinel_ring * const ring = &impl->mapped.ring; |
| |
| // is ring full? |
| if (spinel_ring_is_empty(ring)) |
| { |
| // launch any unlaunched dispatch |
| spinel_pbi_flush(impl); |
| |
| struct spinel_device * const device = impl->device; |
| |
| do |
| { |
| // wait for at least one dispatch to complete |
| spinel_deps_drain_1(device->deps, &device->vk); |
| |
| } while (spinel_ring_is_empty(ring)); |
| } |
| |
| return spinel_ring_acquire_1(&impl->mapped.ring); |
| } |
| |
| static spinel_result_t |
| spinel_pbi_acquire_node_segs_block(struct spinel_path_builder_impl * impl, uint32_t * idx) |
| { |
| struct spinel_ring * const ring = &impl->mapped.ring; |
| |
| if (spinel_ring_is_empty(ring)) |
| { |
| // |
| // If the work in progress is going to exceed the size of the ring |
| // then this is a fatal error. At this point, we can kill the path |
| // builder instead of the device. |
| // |
| if (impl->wip.header.named.blocks >= impl->mapped.ring.size) |
| { |
| spinel_pbi_lost(impl); |
| |
| return SPN_ERROR_PATH_BUILDER_LOST; // FIXME(allanmac): return a "TOO_LONG" error? |
| } |
| |
| // |
| // Otherwise, launch whatever is in the ring... |
| // |
| spinel_pbi_flush(impl); |
| |
| // |
| // ... and wait for blocks to appear in the ring! |
| // |
| struct spinel_device * const device = impl->device; |
| |
| do |
| { |
| // wait for at least one dispatch to complete |
| spinel_deps_drain_1(device->deps, &device->vk); |
| |
| } while (spinel_ring_is_empty(ring)); |
| } |
| |
| *idx = spinel_ring_acquire_1(&impl->mapped.ring); |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // |
| // |
| static void |
| spinel_pbi_acquire_head(struct spinel_path_builder_impl * impl) |
| { |
| uint32_t const idx = spinel_pbi_acquire_head_block(impl); |
| |
| spinel_pbi_cmd_append(impl, idx, SPN_PATHS_COPY_CMD_TYPE_HEAD); |
| |
| uint32_t const offset = idx * impl->config.block_dwords; |
| uint32_t * const head = impl->mapped.blocks.u32 + offset; |
| |
| impl->wip.node = head + SPN_PATH_HEAD_DWORDS; |
| impl->wip.rem = impl->config.block_dwords - SPN_PATH_HEAD_DWORDS; |
| } |
| |
| static spinel_result_t |
| spinel_pbi_acquire_node(struct spinel_path_builder_impl * impl) |
| { |
| spinel_pbi_node_append_next(impl); |
| |
| uint32_t idx; |
| spinel_result_t const err = spinel_pbi_acquire_node_segs_block(impl, &idx); |
| |
| if (err != SPN_SUCCESS) |
| { |
| return err; |
| } |
| |
| spinel_pbi_cmd_append(impl, idx, SPN_PATHS_COPY_CMD_TYPE_NODE); |
| |
| impl->wip.header.named.nodes += 1; |
| |
| uint32_t const offset = idx * impl->config.block_dwords; |
| |
| impl->wip.node = impl->mapped.blocks.u32 + offset; |
| impl->wip.rem = impl->config.block_dwords; |
| |
| return SPN_SUCCESS; |
| } |
| |
| static spinel_result_t |
| spinel_pbi_acquire_segs(struct spinel_path_builder_impl * impl) |
| { |
| uint32_t idx; |
| |
| spinel_result_t const err = spinel_pbi_acquire_node_segs_block(impl, &idx); |
| |
| if (err != SPN_SUCCESS) |
| { |
| return err; |
| } |
| |
| impl->wip.segs.rolling = impl->mapped.rolling; |
| |
| spinel_pbi_cmd_append(impl, idx, SPN_PATHS_COPY_CMD_TYPE_SEGS); |
| |
| uint32_t const offset = idx * impl->config.block_dwords; |
| |
| impl->mapped.subgroups.f32 = impl->mapped.blocks.f32 + offset; |
| impl->mapped.subgroups.rem = impl->config.block_subgroups; |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // |
| // |
| static void |
| spinel_pbi_node_append_segs(struct spinel_path_builder_impl * impl, uint32_t const tag) |
| { |
| uint32_t const subgroup_idx = impl->config.block_subgroups - impl->mapped.subgroups.rem; |
| uint32_t const subblock_idx = subgroup_idx * impl->config.subgroup_subblocks; |
| uint32_t const subblock_shl = subblock_idx << SPN_TAGGED_BLOCK_ID_BITS_TAG; |
| uint32_t const tbid = (impl->wip.segs.rolling | subblock_shl | tag); |
| |
| *impl->wip.node++ = tbid; |
| |
| impl->wip.rem -= 1; |
| } |
| |
| // |
| // |
| // |
| static spinel_result_t |
| spinel_pbi_prim_acquire_subgroups(struct spinel_path_builder_impl * impl, |
| uint32_t const tag, |
| float ** coords, |
| uint32_t coords_len) |
| { |
| // |
| // Write a tagged block id to the node that records: |
| // |
| // { block id, subblock idx, prim tag } |
| // |
| // If the path primitive spans more than one block then there will |
| // be a TAG_PATH_NEXT pointing to the next block. |
| // |
| // Note that a subgroup may be 1, 2 or a higher power of two |
| // subblocks. |
| // |
| uint32_t curr_tag = tag; |
| |
| do |
| { |
| // is there only one tagged block id left in the node? |
| if (impl->wip.rem == 1) |
| { |
| spinel_result_t const err = spinel_pbi_acquire_node(impl); |
| |
| if (err != SPN_SUCCESS) |
| return err; |
| } |
| |
| // are there no subgroups left? |
| if (impl->mapped.subgroups.rem == 0) |
| { |
| spinel_result_t const err = spinel_pbi_acquire_segs(impl); |
| |
| if (err != SPN_SUCCESS) |
| return err; |
| } |
| |
| // record the tagged block id |
| spinel_pbi_node_append_segs(impl, curr_tag); |
| |
| // any tag after this is a caboose |
| curr_tag = SPN_BLOCK_ID_TAG_PATH_NEXT; |
| |
| // initialize path builder's pointers |
| uint32_t count = MIN_MACRO(uint32_t, coords_len, impl->mapped.subgroups.rem); |
| |
| impl->mapped.subgroups.rem -= count; |
| |
| coords_len -= count; |
| |
| do |
| { |
| *coords++ = impl->mapped.subgroups.f32; |
| |
| impl->mapped.subgroups.f32 += impl->config.subgroup_dwords; |
| } while (--count > 0); |
| } while (coords_len > 0); |
| |
| // update path builder rem count |
| impl->path_builder->cn.rem.aN[tag] = impl->config.subgroup_dwords; |
| |
| // the prims count tracks the number of tagged block ids |
| impl->wip.header.named.prims.array[tag] += 1; |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // Define primitive geometry pfns |
| // |
| |
| #define SPN_PBI_PFN_NAME(_p) spinel_pbi_##_p |
| |
| #undef SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X |
| #define SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X(_p, _i, _n) \ |
| static spinel_result_t SPN_PBI_PFN_NAME(_p)(struct spinel_path_builder_impl * impl) \ |
| { \ |
| return spinel_pbi_prim_acquire_subgroups(impl, _i, impl->path_builder->cn.coords._p, _n); \ |
| } |
| |
| SPN_PATH_BUILDER_PRIM_TYPE_EXPAND() |
| |
| // |
| // |
| // |
| STATIC_ASSERT_MACRO_1(sizeof(union spinel_path_header) == |
| MEMBER_SIZE_MACRO(union spinel_path_header, array)); |
| |
| static void |
| spinel_pbi_wip_reset(struct spinel_path_builder_impl * impl) |
| { |
| struct spinel_path_builder * const pb = impl->path_builder; |
| |
| // init path builder counters |
| #undef SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X |
| #define SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X(_p, _i, _n) pb->cn.rem._p = 0; |
| |
| SPN_PATH_BUILDER_PRIM_TYPE_EXPAND(); |
| |
| // save mapped head to wip |
| impl->wip.head.idx = impl->mapped.ring.head; |
| impl->wip.head.rolling = impl->mapped.rolling; |
| |
| // there are no subblocks available |
| impl->mapped.subgroups.rem = 0; |
| |
| // update header -- don't bother initializing .handle and .na |
| impl->wip.header.named.blocks = 0; |
| impl->wip.header.named.nodes = 0; |
| |
| // reset prim counters |
| memset(impl->wip.header.named.prims.array, 0, sizeof(impl->wip.header.named.prims.array)); |
| |
| // reset bounds |
| impl->wip.header.named.bounds[0] = FLT_MAX; |
| impl->wip.header.named.bounds[1] = FLT_MAX; |
| impl->wip.header.named.bounds[2] = FLT_MIN; |
| impl->wip.header.named.bounds[3] = FLT_MIN; |
| } |
| |
| // |
| // |
| // |
| static spinel_result_t |
| spinel_pbi_begin(struct spinel_path_builder_impl * impl) |
| { |
| // acquire head block |
| spinel_pbi_acquire_head(impl); |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // |
| // |
| STATIC_ASSERT_MACRO_1(SPN_TAGGED_BLOCK_ID_INVALID == UINT32_MAX); |
| |
| static spinel_result_t |
| spinel_pbi_end(struct spinel_path_builder_impl * impl, spinel_path_t * path) |
| { |
| // finalize all incomplete active subgroups -- note that we don't |
| // care about unused remaining subblocks in a block |
| spinel_pb_finalize_subgroups(impl); |
| |
| // mark remaining ids in the head or node as invalid |
| memset(impl->wip.node, 0xFF, sizeof(*impl->wip.node) * impl->wip.rem); |
| |
| // device |
| struct spinel_device * const device = impl->device; |
| |
| // get the head dispatch |
| struct spinel_pbi_dispatch * const dispatch = spinel_pbi_dispatch_head(impl); |
| |
| // do we need to acquire a delayed semaphore? |
| if (dispatch->signal.delayed == SPN_DEPS_DELAYED_SEMAPHORE_INVALID) |
| { |
| struct spinel_deps_acquire_delayed_info const dadi = { |
| |
| .submission = { .pfn = spinel_pbi_flush_submit, // |
| .data0 = impl, |
| .data1 = dispatch } |
| }; |
| |
| dispatch->signal.delayed = spinel_deps_delayed_acquire(device->deps, &device->vk, &dadi); |
| } |
| |
| // acquire path host id |
| path->handle = spinel_device_handle_acquire(device); |
| |
| // update device-side path header with host-side path handle |
| impl->wip.header.named.handle = path->handle; |
| |
| // associate delayed semaphore with handle |
| spinel_deps_delayed_attach(device->deps, path->handle, dispatch->signal.delayed); |
| |
| // append path to dispatch |
| spinel_pbi_dispatch_append(impl, dispatch, path); |
| |
| uint32_t const offset = impl->wip.head.idx * impl->config.block_dwords; |
| uint32_t * const head = impl->mapped.blocks.u32 + offset; |
| |
| // copy wip header to mapped coherent head block |
| memcpy(head, impl->wip.header.array, sizeof(impl->wip.header)); |
| |
| // reset wip header |
| spinel_pbi_wip_reset(impl); |
| |
| // eagerly flush? |
| if (dispatch->blocks.span >= impl->config.eager_size) |
| { |
| spinel_deps_delayed_flush(device->deps, dispatch->signal.delayed); |
| } |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // |
| // |
| static spinel_result_t |
| spinel_pbi_release(struct spinel_path_builder_impl * impl) |
| { |
| // |
| // Launch any wip dispatch |
| // |
| spinel_pbi_flush(impl); |
| |
| // |
| // Wait for all in-flight dispatches to complete |
| // |
| struct spinel_ring * const ring = &impl->dispatches.ring; |
| struct spinel_device * const device = impl->device; |
| |
| while (!spinel_ring_is_full(ring)) |
| { |
| spinel_deps_drain_1(device->deps, &device->vk); |
| } |
| |
| // |
| // Free device allocations. |
| // |
| vkUnmapMemory(device->vk.d, impl->vk.ring.dbi_dm.dm); // not necessary |
| |
| spinel_allocator_free_dbi_dm(&device->allocator.device.perm.hw_dr, |
| device->vk.d, |
| device->vk.ac, |
| &impl->vk.ring.dbi_dm); |
| |
| spinel_allocator_free_dbi_dm(&device->allocator.device.perm.drw, |
| device->vk.d, |
| device->vk.ac, |
| &impl->vk.alloc.dbi_dm); |
| |
| // |
| // Free host allocations |
| // |
| free(impl->dispatches.extent); |
| free(impl->paths.extent); |
| free(impl->path_builder); |
| free(impl); |
| |
| spinel_context_release(device->context); |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // |
| // |
| spinel_result_t |
| spinel_path_builder_impl_create(struct spinel_device * device, |
| struct spinel_path_builder ** path_builder) |
| { |
| spinel_context_retain(device->context); |
| |
| // |
| // allocate impl |
| // |
| struct spinel_path_builder_impl * const impl = MALLOC_MACRO(sizeof(*impl)); |
| |
| // |
| // allocate path builder |
| // |
| struct spinel_path_builder * const pb = MALLOC_MACRO(sizeof(*pb)); |
| |
| // init impl and pb back-pointers |
| *path_builder = pb; |
| impl->path_builder = pb; |
| pb->impl = impl; |
| |
| // save device |
| impl->device = device; |
| |
| // |
| // init path builder pfns and rem count |
| // |
| pb->begin = spinel_pbi_begin; |
| pb->end = spinel_pbi_end; |
| pb->release = spinel_pbi_release; |
| pb->flush = spinel_pbi_flush; |
| |
| #undef SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X |
| #define SPN_PATH_BUILDER_PRIM_TYPE_EXPAND_X(_p, _i, _n) pb->_p = SPN_PBI_PFN_NAME(_p); |
| |
| SPN_PATH_BUILDER_PRIM_TYPE_EXPAND() |
| |
| // |
| // init refcount & state |
| // |
| pb->ref_count = 1; |
| |
| SPN_ASSERT_STATE_INIT(SPN_PATH_BUILDER_STATE_READY, pb); |
| |
| // |
| // get target config |
| // |
| struct spinel_target_config const * const config = &device->ti.config; |
| |
| // |
| // FIXME(allanmac): Stop replicating these constants in the impl? |
| // |
| // stash device-specific params |
| uint32_t const block_dwords = 1u << config->block_pool.block_dwords_log2; |
| uint32_t const subblock_dwords = 1u << config->block_pool.subblock_dwords_log2; |
| uint32_t const subgroup_dwords = 1u << config->group_sizes.named.paths_copy.subgroup_log2; |
| |
| impl->config.block_dwords = block_dwords; |
| impl->config.block_subgroups = block_dwords / subgroup_dwords; |
| impl->config.subgroup_dwords = subgroup_dwords; |
| impl->config.subgroup_subblocks = subgroup_dwords / subblock_dwords; |
| |
| impl->config.rolling_one = (block_dwords / subblock_dwords) << SPN_TAGGED_BLOCK_ID_BITS_TAG; |
| impl->config.eager_size = config->path_builder.size.eager; |
| |
| uint32_t const max_in_flight = config->path_builder.size.dispatches; |
| |
| spinel_allocator_alloc_dbi_dm_devaddr(&device->allocator.device.perm.drw, |
| device->vk.pd, |
| device->vk.d, |
| device->vk.ac, |
| sizeof(uint32_t) * max_in_flight, |
| NULL, |
| &impl->vk.alloc); |
| |
| uint32_t const ring_size = config->path_builder.size.ring; |
| |
| // |
| // initialize mapped counters |
| // |
| spinel_ring_init(&impl->mapped.ring, ring_size); |
| |
| impl->mapped.rolling = 0; |
| |
| // |
| // each ring entry is a block of dwords and a one dword cmd |
| // |
| // round up to coherent atom whether allocator is coherent or not |
| // |
| uint32_t const extent_dwords = ring_size * (block_dwords + 1); |
| size_t const extent_size = extent_dwords * sizeof(uint32_t); |
| VkDeviceSize const extent_size_ru = ROUND_UP_POW2_MACRO(extent_size, // |
| device->vk.limits.noncoherent_atom_size); |
| |
| spinel_allocator_alloc_dbi_dm_devaddr(&device->allocator.device.perm.hw_dr, |
| device->vk.pd, |
| device->vk.d, |
| device->vk.ac, |
| extent_size_ru, |
| NULL, |
| &impl->vk.ring); |
| |
| // |
| // map and initialize blocks and cmds |
| // |
| vk(MapMemory(device->vk.d, |
| impl->vk.ring.dbi_dm.dm, |
| 0, |
| VK_WHOLE_SIZE, |
| 0, |
| (void **)&impl->mapped.blocks.u32)); |
| |
| // |
| // cmds are offset from blocks |
| // |
| uint32_t const cmds_offset = ring_size * block_dwords; |
| |
| impl->mapped.cmds = impl->mapped.blocks.u32 + cmds_offset; |
| |
| // |
| // allocate path release extent |
| // |
| size_t const paths_size = sizeof(*impl->paths.extent) * ring_size; |
| |
| impl->paths.extent = MALLOC_MACRO(paths_size); |
| |
| spinel_next_init(&impl->paths.next, ring_size); |
| |
| // |
| // reset wip after mapped counters and path release extent |
| // |
| spinel_pbi_wip_reset(impl); |
| |
| // |
| // allocate dispatches ring |
| // |
| size_t const dispatches_size = sizeof(*impl->dispatches.extent) * max_in_flight; |
| |
| impl->dispatches.extent = MALLOC_MACRO(dispatches_size); |
| |
| spinel_ring_init(&impl->dispatches.ring, max_in_flight); |
| |
| spinel_pbi_dispatch_head_init(impl); |
| |
| return SPN_SUCCESS; |
| } |
| |
| // |
| // |
| // |