Wire up semaphores on command buffer execution.

Change-Id: I90fe0eb53cba85d75efbcee9ef3fc5ebd6d8e173
diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index 8ebe964..9279917 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1148,9 +1148,9 @@
    return true;
 }
 
-VkResult
-anv_cmd_buffer_execbuf(struct anv_device *device,
-                       struct anv_cmd_buffer *cmd_buffer)
+VkResult anv_cmd_buffer_execbuf(struct anv_device* device, struct anv_cmd_buffer* cmd_buffer,
+                                uint32_t wait_semaphore_count, anv_semaphore_t* wait_semaphores,
+                                uint32_t signal_semaphore_count, anv_semaphore_t* signal_semaphores)
 {
    struct anv_batch *batch = &cmd_buffer->batch;
    struct anv_block_pool *ss_pool =
@@ -1278,8 +1278,8 @@
          cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
    }
 
-   VkResult result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
-
+   VkResult result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos, wait_semaphore_count,
+                                        wait_semaphores, signal_semaphore_count, signal_semaphores);
    anv_execbuf_finish(&execbuf, &cmd_buffer->pool->alloc);
 
    return result;
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 2d9d1b5..af78d65 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -830,7 +830,7 @@
    execbuf.rsvd1 = device->context_id;
    execbuf.rsvd2 = 0;
 
-   result = anv_device_execbuf(device, &execbuf, exec_bos);
+   result = anv_device_execbuf(device, &execbuf, exec_bos, 0, NULL, 0, NULL);
    if (result != VK_SUCCESS)
       goto fail;
 
@@ -1107,12 +1107,13 @@
    *pQueue = anv_queue_to_handle(&device->queue);
 }
 
-VkResult
-anv_device_execbuf(struct anv_device *device,
-                   struct drm_i915_gem_execbuffer2 *execbuf,
-                   struct anv_bo **execbuf_bos)
+VkResult anv_device_execbuf(struct anv_device* device, struct drm_i915_gem_execbuffer2* execbuf,
+                            struct anv_bo** execbuf_bos, uint32_t wait_semaphore_count,
+                            anv_semaphore_t* wait_semaphores, uint32_t signal_semaphore_count,
+                            anv_semaphore_t* signal_semaphores)
 {
-   int ret = anv_gem_execbuffer(device, execbuf);
+   int ret = anv_gem_execbuffer(device, execbuf, wait_semaphore_count, wait_semaphores,
+                                signal_semaphore_count, signal_semaphores);
    if (ret != 0) {
       /* We don't know the real error. */
       return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
@@ -1169,7 +1170,10 @@
                          pSubmits[i].pCommandBuffers[j]);
          assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
 
-         result = anv_cmd_buffer_execbuf(device, cmd_buffer);
+         result = anv_cmd_buffer_execbuf(device, cmd_buffer, pSubmits[i].waitSemaphoreCount,
+                                         (anv_semaphore_t*)pSubmits[i].pWaitSemaphores,
+                                         pSubmits[i].signalSemaphoreCount,
+                                         (anv_semaphore_t*)pSubmits[i].pSignalSemaphores);
          if (result != VK_SUCCESS)
             goto out;
       }
@@ -1177,7 +1181,7 @@
 
    if (fence) {
       struct anv_bo *fence_bo = &fence->bo;
-      result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
+      result = anv_device_execbuf(device, &fence->execbuf, &fence_bo, 0, NULL, 0, NULL);
       if (result != VK_SUCCESS)
          goto out;
 
diff --git a/src/intel/vulkan/anv_magma.cc b/src/intel/vulkan/anv_magma.cc
index d3819cb..e69a90b 100644
--- a/src/intel/vulkan/anv_magma.cc
+++ b/src/intel/vulkan/anv_magma.cc
@@ -109,14 +109,17 @@
    return 0;
 }
 
-int anv_gem_execbuffer(anv_device* device, drm_i915_gem_execbuffer2* execbuf)
+int anv_gem_execbuffer(anv_device* device, drm_i915_gem_execbuffer2* execbuf,
+                       uint32_t wait_semaphore_count, anv_semaphore_t* wait_semaphores,
+                       uint32_t signal_semaphore_count, anv_semaphore_t* signal_semaphores)
 {
    DLOG("anv_gem_execbuffer");
 
    if (execbuf->buffer_count == 0)
       return 0;
 
-   uint64_t required_size = DrmCommandBuffer::RequiredSize(execbuf);
+   uint64_t required_size =
+       DrmCommandBuffer::RequiredSize(execbuf, wait_semaphore_count, signal_semaphore_count);
 
    uint64_t allocated_size;
    uint64_t cmd_buf_id;
@@ -135,7 +138,18 @@
       return DRET_MSG(error, "magma_system_map failed");
    }
 
-   if (!DrmCommandBuffer::Translate(execbuf, cmd_buf_data)) {
+   std::vector<uint64_t> wait_semaphore_ids(wait_semaphore_count);
+   for (uint32_t i = 0; i < wait_semaphore_count; i++) {
+      wait_semaphore_ids[i] = magma_system_get_semaphore_id(wait_semaphores[i]);
+   }
+
+   std::vector<uint64_t> signal_semaphore_ids(signal_semaphore_count);
+   for (uint32_t i = 0; i < signal_semaphore_count; i++) {
+      signal_semaphore_ids[i] = magma_system_get_semaphore_id(signal_semaphores[i]);
+   }
+
+   if (!DrmCommandBuffer::Translate(execbuf, std::move(wait_semaphore_ids),
+                                    std::move(signal_semaphore_ids), cmd_buf_data)) {
       error = magma_system_unmap(magma_connection(device), cmd_buf_id);
       DASSERT(!error);
       magma_system_free(magma_connection(device), cmd_buf_id);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 89f4fa9..9d2fdf5 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -603,9 +603,10 @@
 void anv_device_init_blorp(struct anv_device *device);
 void anv_device_finish_blorp(struct anv_device *device);
 
-VkResult anv_device_execbuf(struct anv_device *device,
-                            struct drm_i915_gem_execbuffer2 *execbuf,
-                            struct anv_bo **execbuf_bos);
+VkResult anv_device_execbuf(struct anv_device* device, struct drm_i915_gem_execbuffer2* execbuf,
+                            struct anv_bo** execbuf_bos, uint32_t wait_semaphore_count,
+                            anv_semaphore_t* wait_semaphores, uint32_t signal_semaphore_count,
+                            anv_semaphore_t* signal_semaphores);
 
 int anv_gem_connect(struct anv_device* device);
 void anv_gem_disconnect(struct anv_device* device);
@@ -617,8 +618,9 @@
 void anv_gem_close(struct anv_device* device, anv_buffer_handle_t gem_handle);
 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
 int anv_gem_wait(struct anv_device* device, anv_buffer_handle_t gem_handle, int64_t* timeout_ns);
-int anv_gem_execbuffer(struct anv_device *device,
-                       struct drm_i915_gem_execbuffer2 *execbuf);
+int anv_gem_execbuffer(struct anv_device* device, struct drm_i915_gem_execbuffer2* execbuf,
+                       uint32_t wait_semaphore_count, anv_semaphore_t* wait_semaphores,
+                       uint32_t signal_semaphore_count, anv_semaphore_t* signal_semaphores);
 int anv_gem_set_tiling(struct anv_device* device, anv_buffer_handle_t gem_handle, uint32_t stride,
                        uint32_t tiling);
 int anv_gem_create_context(struct anv_device *device);
@@ -1225,8 +1227,10 @@
 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
                                   struct anv_cmd_buffer *secondary);
 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
-VkResult anv_cmd_buffer_execbuf(struct anv_device *device,
-                                struct anv_cmd_buffer *cmd_buffer);
+VkResult anv_cmd_buffer_execbuf(struct anv_device* device, struct anv_cmd_buffer* cmd_buffer,
+                                uint32_t wait_semaphore_count, anv_semaphore_t* wait_semaphores,
+                                uint32_t signal_semaphore_count,
+                                anv_semaphore_t* signal_semaphores);
 
 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
 
diff --git a/src/magma/drm_command_buffer.cc b/src/magma/drm_command_buffer.cc
index d264412..97da6e3 100644
--- a/src/magma/drm_command_buffer.cc
+++ b/src/magma/drm_command_buffer.cc
@@ -6,9 +6,10 @@
 #include "magma_system.h"
 #include "magma_util/dlog.h"
 #include "magma_util/macros.h"
-#include <inttypes.h>
 
-uint64_t DrmCommandBuffer::RequiredSize(drm_i915_gem_execbuffer2* execbuf)
+uint64_t DrmCommandBuffer::RequiredSize(drm_i915_gem_execbuffer2* execbuf,
+                                        uint32_t wait_semaphore_count,
+                                        uint32_t signal_semaphore_count)
 {
    auto execobjects = reinterpret_cast<drm_i915_gem_exec_object2*>(execbuf->buffers_ptr);
 
@@ -19,11 +20,16 @@
       num_relocations += execobjects[res_index].relocation_count;
    }
 
-   return sizeof(magma_system_command_buffer) + sizeof(magma_system_exec_resource) * num_resources +
+   return sizeof(magma_system_command_buffer) +
+          (wait_semaphore_count + signal_semaphore_count) * sizeof(uint64_t) +
+          sizeof(magma_system_exec_resource) * num_resources +
           sizeof(magma_system_relocation_entry) * num_relocations;
 }
 
-bool DrmCommandBuffer::Translate(drm_i915_gem_execbuffer2* execbuf, void* command_buffer_out)
+bool DrmCommandBuffer::Translate(drm_i915_gem_execbuffer2* execbuf,
+                                 std::vector<uint64_t> wait_semaphore_ids,
+                                 std::vector<uint64_t> signal_semaphore_ids,
+                                 void* command_buffer_out)
 {
    DASSERT((execbuf->flags & I915_EXEC_HANDLE_LUT) != 0);
 
@@ -32,11 +38,21 @@
 
    magma_system_command_buffer* command_buffer =
        reinterpret_cast<magma_system_command_buffer*>(command_buffer_out);
-   magma_system_exec_resource* exec_resources =
-       reinterpret_cast<magma_system_exec_resource*>(command_buffer + 1);
+   uint64_t* dst_wait_semaphore_ids = reinterpret_cast<uint64_t*>(command_buffer + 1);
+   uint64_t* dst_signal_semaphore_ids =
+       reinterpret_cast<uint64_t*>(dst_wait_semaphore_ids + wait_semaphore_ids.size());
+   magma_system_exec_resource* exec_resources = reinterpret_cast<magma_system_exec_resource*>(
+       dst_signal_semaphore_ids + signal_semaphore_ids.size());
    magma_system_relocation_entry* relocation_entries =
        reinterpret_cast<magma_system_relocation_entry*>(exec_resources + num_resources);
 
+   for (uint32_t i = 0; i < wait_semaphore_ids.size(); i++) {
+      dst_wait_semaphore_ids[i] = wait_semaphore_ids[i];
+   }
+   for (uint32_t i = 0; i < signal_semaphore_ids.size(); i++) {
+      dst_signal_semaphore_ids[i] = signal_semaphore_ids[i];
+   }
+
    uint32_t res_reloc_base = 0;
 
    for (uint32_t res_index = 0; res_index < num_resources; res_index++) {
@@ -44,7 +60,8 @@
       auto src_res = &execobjects[res_index];
       auto src_res_relocs = reinterpret_cast<drm_i915_gem_relocation_entry*>(src_res->relocs_ptr);
 
-      DLOG("translating res_index %u handle 0x%" PRIx64 " start_offset 0x%lx length 0x%lx",
+      DLOG("translating res_index %u handle 0x%" PRIx64 " start_offset 0x%" PRIx64
+           " length 0x%" PRIx64,
            res_index, src_res->handle, src_res->rsvd1, src_res->rsvd2);
 
       uint32_t num_relocations = dst_res->num_relocations = src_res->relocation_count;
@@ -80,6 +97,8 @@
    command_buffer->num_resources = num_resources;
    command_buffer->batch_buffer_resource_index = num_resources - 1; // by drm convention
    command_buffer->batch_start_offset = execbuf->batch_start_offset;
+   command_buffer->wait_semaphore_count = wait_semaphore_ids.size();
+   command_buffer->signal_semaphore_count = signal_semaphore_ids.size();
 
    return true;
 }
diff --git a/src/magma/drm_command_buffer.h b/src/magma/drm_command_buffer.h
index dd2198b..e0e1823 100644
--- a/src/magma/drm_command_buffer.h
+++ b/src/magma/drm_command_buffer.h
@@ -6,17 +6,22 @@
 #define DRM_COMMAND_BUFFER_H
 
 #include "i915_drm.h"
+#include <vector>
 
 class DrmCommandBuffer {
 public:
    // Returns the number of bytes needed for the magma_system_command_buffer
-   // and the associated data structures for |execbuf|
-   static uint64_t RequiredSize(drm_i915_gem_execbuffer2* execbuf);
+   // and the associated data structures for |execbuf| with |wait_semaphore_count| +
+   // |signal_semaphore_count| semaphores.
+   static uint64_t RequiredSize(drm_i915_gem_execbuffer2* execbuf, uint32_t wait_semaphore_count,
+                                uint32_t signal_semaphore_count);
 
    // Writes the magma_system_command_buffer and associated data structures
    // into |command_buffer_out|. |command_buffer_out| must point to a buffer
-   // that is at least RequiredSize(|exec_buf|) bytes
-   static bool Translate(drm_i915_gem_execbuffer2* execbuf, void* command_buffer_out);
+   // that is sufficiently large, see RequiredSize.
+   static bool Translate(drm_i915_gem_execbuffer2* execbuf,
+                         std::vector<uint64_t> wait_semaphore_ids,
+                         std::vector<uint64_t> signal_semaphore_ids, void* command_buffer_out);
 
 private:
    DrmCommandBuffer() {}
diff --git a/tests/unit_tests/test_drm_command_buffer.cc b/tests/unit_tests/test_drm_command_buffer.cc
index 3b22712..4207351 100644
--- a/tests/unit_tests/test_drm_command_buffer.cc
+++ b/tests/unit_tests/test_drm_command_buffer.cc
@@ -38,16 +38,23 @@
           .flags = I915_EXEC_HANDLE_LUT,
       };
 
-      uint64_t size = DrmCommandBuffer::RequiredSize(&execbuffer2);
-      ASSERT_EQ(sizeof(magma_system_command_buffer), size);
+      std::vector<uint64_t> wait_semaphores;
+      std::vector<uint64_t> signal_semaphores;
+
+      uint64_t size = DrmCommandBuffer::RequiredSize(&execbuffer2, 0, 0);
+      EXPECT_EQ(sizeof(magma_system_command_buffer), size);
 
       std::vector<uint8_t> buffer(size);
-      EXPECT_TRUE(DrmCommandBuffer::Translate(&execbuffer2, buffer.data()));
+
+      EXPECT_TRUE(DrmCommandBuffer::Translate(&execbuffer2, wait_semaphores, signal_semaphores,
+                                              buffer.data()));
 
       auto command_buffer = reinterpret_cast<magma_system_command_buffer*>(buffer.data());
       EXPECT_EQ(-1, (int)command_buffer->batch_buffer_resource_index);
       EXPECT_EQ(0u, command_buffer->batch_start_offset);
       EXPECT_EQ(0u, command_buffer->num_resources);
+      EXPECT_EQ(0u, command_buffer->wait_semaphore_count);
+      EXPECT_EQ(0u, command_buffer->signal_semaphore_count);
    }
 
    std::unique_ptr<Buffer> CreateBuffer(uint64_t size)
@@ -58,13 +65,22 @@
       return std::make_unique<Buffer>(connection_, handle, size);
    }
 
-   void WithBuffers(bool add_relocs)
+   void WithBuffers(bool add_relocs, uint32_t wait_semaphore_count, uint32_t signal_semaphore_count)
    {
       std::vector<std::unique_ptr<Buffer>> buffers;
 
       buffers.push_back(CreateBuffer(PAGE_SIZE));
       buffers.push_back(CreateBuffer(PAGE_SIZE));
 
+      std::vector<uint64_t> wait_semaphore_ids;
+      for (uint32_t i = 0; i < wait_semaphore_count; i++) {
+         wait_semaphore_ids.push_back(10 + i);
+      }
+      std::vector<uint64_t> signal_semaphore_ids;
+      for (uint32_t i = 0; i < signal_semaphore_count; i++) {
+         signal_semaphore_ids.push_back(100 + i);
+      }
+
       std::vector<drm_i915_gem_relocation_entry> exec_relocs_0;
       std::vector<drm_i915_gem_relocation_entry> exec_relocs_1;
       std::vector<drm_i915_gem_exec_object2> exec_res;
@@ -120,23 +136,37 @@
           .flags = I915_EXEC_HANDLE_LUT,
       };
 
-      uint64_t size = DrmCommandBuffer::RequiredSize(&exec_buffer);
-      uint64_t expected_size = sizeof(magma_system_command_buffer) +
-                               sizeof(magma_system_exec_resource) * exec_res.size();
-      if (add_relocs)
-         expected_size +=
-             sizeof(magma_system_relocation_entry) * (exec_relocs_0.size() + exec_relocs_1.size());
+      uint64_t size = DrmCommandBuffer::RequiredSize(&exec_buffer, wait_semaphore_ids.size(),
+                                                     signal_semaphore_ids.size());
+      uint64_t expected_size =
+          sizeof(magma_system_command_buffer) +
+          (wait_semaphore_ids.size() + signal_semaphore_ids.size()) * sizeof(uint64_t) +
+          sizeof(magma_system_exec_resource) * exec_res.size() +
+          sizeof(magma_system_relocation_entry) * (exec_relocs_0.size() + exec_relocs_1.size());
       EXPECT_EQ(expected_size, size);
 
       std::vector<uint8_t> buffer(size);
-      EXPECT_TRUE(DrmCommandBuffer::Translate(&exec_buffer, buffer.data()));
+      EXPECT_TRUE(DrmCommandBuffer::Translate(&exec_buffer, wait_semaphore_ids,
+                                              signal_semaphore_ids, buffer.data()));
 
       auto command_buffer = reinterpret_cast<magma_system_command_buffer*>(buffer.data());
       EXPECT_EQ(exec_buffer.buffer_count - 1, command_buffer->batch_buffer_resource_index);
       EXPECT_EQ(exec_buffer.batch_start_offset, command_buffer->batch_start_offset);
       EXPECT_EQ(exec_buffer.buffer_count, command_buffer->num_resources);
+      EXPECT_EQ(wait_semaphore_ids.size(), command_buffer->wait_semaphore_count);
+      EXPECT_EQ(signal_semaphore_ids.size(), command_buffer->signal_semaphore_count);
 
-      auto exec_resource = reinterpret_cast<magma_system_exec_resource*>(command_buffer + 1);
+      auto semaphores = reinterpret_cast<uint64_t*>(command_buffer + 1);
+      for (uint32_t i = 0; i < wait_semaphore_count; i++) {
+         EXPECT_EQ(wait_semaphore_ids[i], semaphores[i]);
+      }
+      semaphores += wait_semaphore_count;
+      for (uint32_t i = 0; i < signal_semaphore_count; i++) {
+         EXPECT_EQ(signal_semaphore_ids[i], semaphores[i]);
+      }
+
+      auto exec_resource =
+          reinterpret_cast<magma_system_exec_resource*>(semaphores + signal_semaphore_count);
       for (uint32_t i = 0; i < exec_res.size(); i++) {
          EXPECT_EQ(exec_resource->buffer_id, buffers[i]->id());
          EXPECT_EQ(exec_resource->offset, exec_res[i].rsvd1);
@@ -179,11 +209,11 @@
 TEST(DrmCommandBuffer, SomeBuffers)
 {
    TestDrmCommandBuffer test;
-   test.WithBuffers(false);
+   test.WithBuffers(false, 1, 2);
 }
 
 TEST(DrmCommandBuffer, BuffersWithRelocs)
 {
    TestDrmCommandBuffer test;
-   test.WithBuffers(true);
+   test.WithBuffers(true, 3, 2);
 }