[escher] Add finer-grained tracing.

Change-Id: Ib9948f807e96d8082e505849b3ee5fc1383b8e11
diff --git a/src/ui/lib/escher/paper/paper_render_funcs.cc b/src/ui/lib/escher/paper/paper_render_funcs.cc
index 1bab80b..17751be 100644
--- a/src/ui/lib/escher/paper/paper_render_funcs.cc
+++ b/src/ui/lib/escher/paper/paper_render_funcs.cc
@@ -27,6 +27,7 @@
 namespace escher {
 
 void PaperRenderFuncs::MeshData::Bind(CommandBuffer* cb) const {
+  TRACE_DURATION("gfx", "PaperRenderFuncs::MeshData::Bind");
   index_binding.Bind(cb);
   for (uint32_t i = 0; i < vertex_binding_count; ++i) {
     vertex_bindings[i].Bind(cb);
diff --git a/src/ui/lib/escher/paper/paper_render_funcs.h b/src/ui/lib/escher/paper/paper_render_funcs.h
index 39c0330..2db8b77 100644
--- a/src/ui/lib/escher/paper/paper_render_funcs.h
+++ b/src/ui/lib/escher/paper/paper_render_funcs.h
@@ -5,6 +5,7 @@
 #ifndef SRC_UI_LIB_ESCHER_PAPER_PAPER_RENDER_FUNCS_H_
 #define SRC_UI_LIB_ESCHER_PAPER_PAPER_RENDER_FUNCS_H_
 
+#include "src/ui/lib/escher/forward_declarations.h"
 #include "src/ui/lib/escher/paper/paper_drawable_flags.h"
 #include "src/ui/lib/escher/paper/paper_readme.h"
 #include "src/ui/lib/escher/renderer/render_funcs.h"
diff --git a/src/ui/lib/escher/renderer/frame.h b/src/ui/lib/escher/renderer/frame.h
index 5ffe512..1285dd3 100644
--- a/src/ui/lib/escher/renderer/frame.h
+++ b/src/ui/lib/escher/renderer/frame.h
@@ -74,8 +74,8 @@
     return block_allocator_.AllocateMany<T>(count);
   }
 
-  // Allocate temporary GPU uniform buffer memory that is value until the frame
-  // is finished rendering (after EndFrame() is called).
+  // Allocate temporary GPU uniform buffer memory that is valid until the frame is finished
+  // rendering (after EndFrame() is called).
   UniformAllocation AllocateUniform(size_t size, size_t alignment) {
     return uniform_block_allocator_.Allocate(size, alignment);
   }
@@ -149,6 +149,9 @@
 
   BlockAllocator block_allocator_;
 
+  // TODO(42570): investigate whether this memory is host-coherent, and whether it should be
+  // (it seems like it isn't and should be).  Document the usage guarantees/requirements in
+  // AllocateUniform(), above.
   UniformBlockAllocator uniform_block_allocator_;
 
   TimestampProfilerPtr profiler_;
diff --git a/src/ui/lib/escher/third_party/granite/vk/command_buffer.cc b/src/ui/lib/escher/third_party/granite/vk/command_buffer.cc
index 421fb5a2..fc62b37 100644
--- a/src/ui/lib/escher/third_party/granite/vk/command_buffer.cc
+++ b/src/ui/lib/escher/third_party/granite/vk/command_buffer.cc
@@ -354,6 +354,7 @@
     // Bindings are unchanged.
     return;
   }
+  TRACE_DURATION("gfx", "escher::CommandBuffer::BindIndices");
 
   // Index buffer changes never require a new pipeline to be generated, so it is
   // OK to make this change immediately.
@@ -377,6 +378,8 @@
   FXL_DCHECK(index_binding_.buffer);
 
   FlushRenderState();
+
+  TRACE_DURATION("gfx", "escher::CommandBuffer::DrawIndexed[vulkan]");
   vk().drawIndexed(index_count, instance_count, first_index, vertex_offset, first_instance);
 }
 
@@ -409,6 +412,7 @@
   const PipelineStaticState* static_pipeline_state = pipeline_state_.static_state();
 
   if (GetAndClearDirty(kDirtyPushConstantsBit)) {
+    TRACE_DURATION("gfx", "escher::CommandBuffer::FlushRenderState[push_constants]");
     // The push constants were invalidated (perhaps by being explicitly set, or
     // perhaps by a change in the descriptor set layout; it doesn't matter).
     uint32_t num_ranges = current_pipeline_layout_->spec().num_push_constant_ranges();
@@ -419,15 +423,19 @@
     }
   }
   if (GetAndClearDirty(kDirtyViewportBit)) {
+    TRACE_DURATION("gfx", "escher::CommandBuffer::FlushRenderState[viewport]");
     vk().setViewport(0, 1, &viewport_);
   }
   if (GetAndClearDirty(kDirtyScissorBit)) {
+    TRACE_DURATION("gfx", "escher::CommandBuffer::FlushRenderState[scissor]");
     vk().setScissor(0, 1, &scissor_);
   }
   if (static_pipeline_state->depth_bias_enable && GetAndClearDirty(kDirtyDepthBiasBit)) {
+    TRACE_DURATION("gfx", "escher::CommandBuffer::FlushRenderState[depth_bias]");
     vk().setDepthBias(dynamic_state_.depth_bias_constant, 0.0f, dynamic_state_.depth_bias_slope);
   }
   if (static_pipeline_state->stencil_test && GetAndClearDirty(kDirtyStencilMasksAndReferenceBit)) {
+    TRACE_DURATION("gfx", "escher::CommandBuffer::FlushRenderState[stencil]");
     vk().setStencilCompareMask(vk::StencilFaceFlagBits::eFront, dynamic_state_.front_compare_mask);
     vk().setStencilReference(vk::StencilFaceFlagBits::eFront, dynamic_state_.front_reference);
     vk().setStencilWriteMask(vk::StencilFaceFlagBits::eFront, dynamic_state_.front_write_mask);
diff --git a/src/ui/lib/escher/third_party/granite/vk/command_buffer_pipeline_state.cc b/src/ui/lib/escher/third_party/granite/vk/command_buffer_pipeline_state.cc
index 5c6a1ff..9746687 100644
--- a/src/ui/lib/escher/third_party/granite/vk/command_buffer_pipeline_state.cc
+++ b/src/ui/lib/escher/third_party/granite/vk/command_buffer_pipeline_state.cc
@@ -355,6 +355,8 @@
 }
 
 void CommandBufferPipelineState::FlushVertexBuffers(vk::CommandBuffer cb) {
+  TRACE_DURATION("gfx", "escher::CommandBuffer::FlushVertexBuffers");
+
   uint32_t update_vbo_mask = dirty_vertex_bindings_ & active_vertex_bindings_;
   ForEachBitRange(update_vbo_mask, [&](uint32_t binding, uint32_t binding_count) {
 #ifndef NDEBUG