src/ui/lib/escher/profiling/timestamp_profiler.cc - fuchsia - Git at Google

 // Copyright 2017 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "src/ui/lib/escher/profiling/timestamp_profiler.h"

 #include "src/ui/lib/escher/impl/command_buffer.h"
 #include "src/ui/lib/escher/impl/vulkan_utils.h"
 #include "src/ui/lib/escher/util/trace_macros.h"

 #ifdef OS_FUCHSIA
 #include <lib/zx/time.h>
 #include <zircon/syscalls.h>
 #endif

 namespace escher {

 static constexpr uint32_t kPoolSize = 20;

 TimestampProfiler::TimestampProfiler(vk::Device device, float timestamp_period)
     : device_(device), timestamp_period_(timestamp_period) {}

 TimestampProfiler::~TimestampProfiler() {
   FX_DCHECK(ranges_.empty() && pools_.empty() && query_count_ == 0 && current_pool_index_ == 0);
 }

 void TimestampProfiler::AddTimestamp(CommandBufferPtr cmd_buf, vk::PipelineStageFlagBits flags,
                                      const char* name) {
   QueryRange* range = ObtainRange(cmd_buf);
   cmd_buf->vk().writeTimestamp(flags, range->pool, current_pool_index_);
   results_.push_back(Result{0, 0, 0, name});
   ++range->count;
   ++current_pool_index_;
   ++query_count_;
 }

 std::vector<TimestampProfiler::Result> TimestampProfiler::GetQueryResults() {
   uint32_t result_index = 0;
   for (auto& range : ranges_) {
     // We don't wait for results.  Crash if results aren't immediately
     // available.
     vk::Result status = device_.getQueryPoolResults(
         range.pool, range.start_index, range.count, range.count * sizeof(Result),
         &results_[result_index], sizeof(Result), vk::QueryResultFlagBits::e64);
     FX_DCHECK(status == vk::Result::eSuccess);

     result_index += range.count;
   }
   FX_CHECK(result_index == query_count_);

   for (auto& pool : pools_) {
     device_.destroyQueryPool(pool);
   }
   ranges_.clear();
   pools_.clear();
   query_count_ = 0;
   current_pool_index_ = 0;

   // |timestamp_period_| is the number of nanoseconds per time unit reported by
   // the timer query, so this is the number of microseconds in the same time
   // per the same time unit.  We need to use this below because an IEEE double
   // doesn't have enough precision to hold nanoseconds since the epoch.
   const double microsecond_multiplier = timestamp_period_ * 0.001;
   for (size_t i = 0; i < results_.size(); ++i) {
     // Avoid precision issues that we would have if we simply multiplied by
     // timestamp_period_.
     results_[i].raw_nanoseconds =
         1000 * static_cast<uint64_t>(static_cast<double>(results_[i].raw_nanoseconds) *
                                      microsecond_multiplier);

     // Microseconds since the beginning of this timing query.
     results_[i].time = (results_[i].raw_nanoseconds - results_[0].raw_nanoseconds) / 1000;

     // Microseconds since the previous event.
     results_[i].elapsed = i == 0 ? 0 : results_[i].time - results_[i - 1].time;
   }

   return std::move(results_);
 }

 #ifdef OS_FUCHSIA
 static inline uint64_t MicrosToTicks(zx_time_t microseconds) {
   static const uint64_t ticks_per_microsecond = zx_ticks_per_second() / 1000000;
   return microseconds * ticks_per_microsecond;
 }

 // ProcessTraceEvents transforms the data received from GetQueryResults into a
 // format more suitable for tracing and logging, though it does not do any
 // tracing or logging on its own.
 //
 // The |timestamps| vector holds an ordered sequence of timestamps. The first
 // and last timestamps represent the beginning and end of the frame. All other
 // timestamps were added by the application.
 //
 // Each of those Vulkan timestamps represents when that GPU work ended. It is
 // possible for multiple timestamps to have the same value due to the specifics
 // of the Vulkan implementation. If this occurs, we interpret those GPU events
 // as occurring during the same period of time, and output a single trace
 // event struct accordingly.
 std::vector<TimestampProfiler::TraceEvent> TimestampProfiler::ProcessTraceEvents(
     const std::vector<TimestampProfiler::Result>& timestamps) {
   std::vector<TimestampProfiler::TraceEvent> traces;

   // We need at least two timestamps to create a TraceEvent with positive
   // duration.
   if (timestamps.size() < 2)
     return traces;

   uint64_t start_ticks = MicrosToTicks(timestamps[0].time);
   uint64_t end_ticks = MicrosToTicks(timestamps[1].time);

   // Create the first trace event.
   std::vector<const char*> name = {timestamps[1].name};
   traces.push_back({start_ticks, end_ticks, name});

   for (size_t i = 2; i < timestamps.size() - 1; i++) {
     uint64_t ticks = MicrosToTicks(timestamps[i].time);

     // If the ticks we see is greater than our last one, we start a new
     // TraceEvent and update our start and end tick values.
     if (ticks > end_ticks) {
       start_ticks = end_ticks;
       end_ticks = ticks;

       std::vector<const char*> names;
       names.push_back(timestamps[i].name);
       traces.push_back({start_ticks, end_ticks, names});
     } else {
       // Otherwise, we are seeing a concurrent event and should simply append
       // to the latest names vector.
       traces.back().names.push_back(timestamps[i].name);
     }
   }

   return traces;
 }

 // This function outputs trace events generated by the application. It is
 // intended to be used in conjunction with the ProcessTraceEvents() method.
 //
 // We utilize virtual duration events to represent this GPU work on a virtual
 // thread (vthread) since it is not local to any CPU thread.
 void TimestampProfiler::TraceGpuQueryResults(
     const std::vector<TimestampProfiler::TraceEvent>& trace_events, uint64_t frame_number,
     uint64_t escher_frame_number, const char* trace_literal, const char* gpu_vthread_literal,
     uint64_t gpu_vthread_id) {
   constexpr static const char* kCategoryLiteral = "gfx";

   // NOTE: If this value changes, you should also change the corresponding
   // kCleanupDelay inside engine.cc.
   zx::duration kCleanupDelay = zx::msec(1);

   // Shift the vthread events by kCleanupDelay / 2. This is specifically chosen
   // because we know CleanupEscher() (and therefore us, too) runs every 1ms,
   // so by setting the "end" to be 0.5ms ago, we know we cannot be off by more than
   // +/- 0.5ms. See fxbug.dev/24648 for more details.
   kCleanupDelay /= 2;

   static const uint64_t kCleanupRatio = zx::msec(1).get() / kCleanupDelay.get();

   static const uint64_t kTicksPerMillisecond = zx_ticks_per_second() / 1000;
   static const uint64_t kTicksOffset = kTicksPerMillisecond / kCleanupRatio;

   uint64_t real_end_ticks = zx_ticks_get() - kTicksOffset;

   // Get the beginning, which is the offset we add to all |elapsed_ticks| to get the
   // shifted start and end.
   uint64_t real_start_ticks = real_end_ticks - (trace_events.back().end_elapsed_ticks);

   // First, create the entire duration event. We can do this by creating an
   // event combining the start of the first event, and the end of the last
   // event.
   TRACE_VTHREAD_DURATION_BEGIN(kCategoryLiteral, trace_literal, gpu_vthread_literal, gpu_vthread_id,
                                real_start_ticks, "Frame number", frame_number,
                                "Escher frame number", escher_frame_number);

   TRACE_VTHREAD_DURATION_END(kCategoryLiteral, trace_literal, gpu_vthread_literal, gpu_vthread_id,
                              real_end_ticks, "Frame number", frame_number, "Escher frame number",
                              escher_frame_number);

   // Now, output the more interesting events added by the application.
   for (size_t i = 0; i < trace_events.size(); ++i) {
     size_t num_concurrent_events = trace_events[i].names.size();
     uint64_t start_ticks = real_start_ticks + trace_events[i].start_elapsed_ticks;
     uint64_t end_ticks = real_start_ticks + trace_events[i].end_elapsed_ticks;

     // Combine all additional events into a single string.
     std::stringstream stream;
     copy(trace_events[i].names.begin() + 1, trace_events[i].names.end(),
          std::ostream_iterator<std::string>(stream, ",\n"));
     std::string additional_events = stream.str();

     TRACE_VTHREAD_DURATION_BEGIN(kCategoryLiteral, trace_events[i].names[0], gpu_vthread_literal,
                                  gpu_vthread_id, start_ticks, "Additional Events",
                                  additional_events);
     TRACE_VTHREAD_DURATION_END(kCategoryLiteral, trace_events[i].names[0], gpu_vthread_literal,
                                gpu_vthread_id, end_ticks, "Additional Events", additional_events);
   }

   // Flow event tracking the progress of a Scenic frame.
   TRACE_VTHREAD_FLOW_STEP(kCategoryLiteral, "scenic_frame", gpu_vthread_literal, gpu_vthread_id,
                           frame_number, real_start_ticks);

   TRACE_VTHREAD_FLOW_STEP(kCategoryLiteral, "scenic_frame", gpu_vthread_literal, gpu_vthread_id,
                           frame_number, real_end_ticks);
 }
 #else
 void TimestampProfiler::TraceGpuQueryResults(
     const std::vector<TimestampProfiler::TraceEvent>& trace_events, uint64_t frame_number,
     uint64_t escher_frame_number, const char* trace_literal, const char* gpu_vthread_literal,
     uint64_t gpu_vthread_id) {}

 std::vector<TimestampProfiler::TraceEvent> TimestampProfiler::ProcessTraceEvents(
     const std::vector<TimestampProfiler::Result>& timestamps) {
   std::vector<TimestampProfiler::TraceEvent> t;
   return t;
 }
 #endif

 void TimestampProfiler::LogGpuQueryResults(
     uint64_t escher_frame_number, const std::vector<TimestampProfiler::Result>& timestamps) {
   FX_LOGS(INFO) << "--------------------------------"
                    "----------------------";
   FX_LOGS(INFO) << "Timestamps for frame #" << escher_frame_number;
   FX_LOGS(INFO) << "total\t | \tsince previous (all "
                    "times in microseconds)";
   FX_LOGS(INFO) << "--------------------------------"
                    "----------------------";
   for (size_t i = 0; i < timestamps.size(); ++i) {
     FX_LOGS(INFO) << timestamps[i].time << " \t | \t" << timestamps[i].elapsed << "   \t"
                   << timestamps[i].name;
   }
   FX_LOGS(INFO) << "--------------------------------"
                    "----------------------";
 }

 TimestampProfiler::QueryRange* TimestampProfiler::ObtainRange(CommandBufferPtr cmd_buf) {
   if (ranges_.empty() || current_pool_index_ == kPoolSize) {
     return CreateRangeAndPool(cmd_buf);
   } else if (ranges_.back().command_buffer != cmd_buf->vk()) {
     return CreateRange(cmd_buf);
   } else {
     auto range = &ranges_.back();
     FX_DCHECK(current_pool_index_ < kPoolSize);
     if (current_pool_index_ != range->start_index + range->count) {
       FX_LOGS(INFO) << current_pool_index_ << "  " << range->start_index << "  " << range->count;
     }
     FX_DCHECK(current_pool_index_ == range->start_index + range->count);
     return range;
   }
 }

 TimestampProfiler::QueryRange* TimestampProfiler::CreateRangeAndPool(CommandBufferPtr cmd_buf) {
   vk::QueryPoolCreateInfo info;
   info.flags = vk::QueryPoolCreateFlags();  // no flags currently exist
   info.queryType = vk::QueryType::eTimestamp;
   info.queryCount = kPoolSize;
   vk::QueryPool pool = ESCHER_CHECKED_VK_RESULT(device_.createQueryPool(info));
   cmd_buf->vk().resetQueryPool(pool, 0, kPoolSize);

   QueryRange range;
   range.pool = pool;
   range.command_buffer = cmd_buf->vk();
   range.start_index = 0;
   range.count = 0;

   current_pool_index_ = 0;
   pools_.push_back(pool);
   ranges_.push_back(range);

   return &ranges_.back();
 }

 TimestampProfiler::QueryRange* TimestampProfiler::CreateRange(CommandBufferPtr cmd_buf) {
   QueryRange& prev = ranges_.back();
   FX_DCHECK(!ranges_.empty() && current_pool_index_ < kPoolSize);
   FX_DCHECK(current_pool_index_ == prev.start_index + prev.count);

   QueryRange range;
   range.pool = prev.pool;
   range.command_buffer = cmd_buf->vk();
   range.start_index = prev.start_index + prev.count;
   range.count = 0;
   FX_DCHECK(range.start_index == current_pool_index_);

   ranges_.push_back(range);
   return &ranges_.back();
 }

 }  // namespace escher
	// Copyright 2017 The Fuchsia Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "src/ui/lib/escher/profiling/timestamp_profiler.h"

	#include "src/ui/lib/escher/impl/command_buffer.h"
	#include "src/ui/lib/escher/impl/vulkan_utils.h"
	#include "src/ui/lib/escher/util/trace_macros.h"

	#ifdef OS_FUCHSIA
	#include <lib/zx/time.h>
	#include <zircon/syscalls.h>
	#endif

	namespace escher {

	static constexpr uint32_t kPoolSize = 20;

	TimestampProfiler::TimestampProfiler(vk::Device device, float timestamp_period)
	: device_(device), timestamp_period_(timestamp_period) {}

	TimestampProfiler::~TimestampProfiler() {
	FX_DCHECK(ranges_.empty() && pools_.empty() && query_count_ == 0 && current_pool_index_ == 0);
	}

	void TimestampProfiler::AddTimestamp(CommandBufferPtr cmd_buf, vk::PipelineStageFlagBits flags,
	const char* name) {
	QueryRange* range = ObtainRange(cmd_buf);
	cmd_buf->vk().writeTimestamp(flags, range->pool, current_pool_index_);
	results_.push_back(Result{0, 0, 0, name});
	++range->count;
	++current_pool_index_;
	++query_count_;
	}

	std::vector<TimestampProfiler::Result> TimestampProfiler::GetQueryResults() {
	uint32_t result_index = 0;
	for (auto& range : ranges_) {
	// We don't wait for results. Crash if results aren't immediately
	// available.
	vk::Result status = device_.getQueryPoolResults(
	range.pool, range.start_index, range.count, range.count * sizeof(Result),
	&results_[result_index], sizeof(Result), vk::QueryResultFlagBits::e64);
	FX_DCHECK(status == vk::Result::eSuccess);

	result_index += range.count;
	}
	FX_CHECK(result_index == query_count_);

	for (auto& pool : pools_) {
	device_.destroyQueryPool(pool);
	}
	ranges_.clear();
	pools_.clear();
	query_count_ = 0;
	current_pool_index_ = 0;

	// \|timestamp_period_\| is the number of nanoseconds per time unit reported by
	// the timer query, so this is the number of microseconds in the same time
	// per the same time unit. We need to use this below because an IEEE double
	// doesn't have enough precision to hold nanoseconds since the epoch.
	const double microsecond_multiplier = timestamp_period_ * 0.001;
	for (size_t i = 0; i < results_.size(); ++i) {
	// Avoid precision issues that we would have if we simply multiplied by
	// timestamp_period_.
	results_[i].raw_nanoseconds =
	1000 * static_cast<uint64_t>(static_cast<double>(results_[i].raw_nanoseconds) *
	microsecond_multiplier);

	// Microseconds since the beginning of this timing query.
	results_[i].time = (results_[i].raw_nanoseconds - results_[0].raw_nanoseconds) / 1000;

	// Microseconds since the previous event.
	results_[i].elapsed = i == 0 ? 0 : results_[i].time - results_[i - 1].time;
	}

	return std::move(results_);
	}

	#ifdef OS_FUCHSIA
	static inline uint64_t MicrosToTicks(zx_time_t microseconds) {
	static const uint64_t ticks_per_microsecond = zx_ticks_per_second() / 1000000;
	return microseconds * ticks_per_microsecond;
	}

	// ProcessTraceEvents transforms the data received from GetQueryResults into a
	// format more suitable for tracing and logging, though it does not do any
	// tracing or logging on its own.
	//
	// The \|timestamps\| vector holds an ordered sequence of timestamps. The first
	// and last timestamps represent the beginning and end of the frame. All other
	// timestamps were added by the application.
	//
	// Each of those Vulkan timestamps represents when that GPU work ended. It is
	// possible for multiple timestamps to have the same value due to the specifics
	// of the Vulkan implementation. If this occurs, we interpret those GPU events
	// as occurring during the same period of time, and output a single trace
	// event struct accordingly.
	std::vector<TimestampProfiler::TraceEvent> TimestampProfiler::ProcessTraceEvents(
	const std::vector<TimestampProfiler::Result>& timestamps) {
	std::vector<TimestampProfiler::TraceEvent> traces;

	// We need at least two timestamps to create a TraceEvent with positive
	// duration.
	if (timestamps.size() < 2)
	return traces;

	uint64_t start_ticks = MicrosToTicks(timestamps[0].time);
	uint64_t end_ticks = MicrosToTicks(timestamps[1].time);

	// Create the first trace event.
	std::vector<const char*> name = {timestamps[1].name};
	traces.push_back({start_ticks, end_ticks, name});

	for (size_t i = 2; i < timestamps.size() - 1; i++) {
	uint64_t ticks = MicrosToTicks(timestamps[i].time);

	// If the ticks we see is greater than our last one, we start a new
	// TraceEvent and update our start and end tick values.
	if (ticks > end_ticks) {
	start_ticks = end_ticks;
	end_ticks = ticks;

	std::vector<const char*> names;
	names.push_back(timestamps[i].name);
	traces.push_back({start_ticks, end_ticks, names});
	} else {
	// Otherwise, we are seeing a concurrent event and should simply append
	// to the latest names vector.
	traces.back().names.push_back(timestamps[i].name);
	}
	}

	return traces;
	}

	// This function outputs trace events generated by the application. It is
	// intended to be used in conjunction with the ProcessTraceEvents() method.
	//
	// We utilize virtual duration events to represent this GPU work on a virtual
	// thread (vthread) since it is not local to any CPU thread.
	void TimestampProfiler::TraceGpuQueryResults(
	const std::vector<TimestampProfiler::TraceEvent>& trace_events, uint64_t frame_number,
	uint64_t escher_frame_number, const char* trace_literal, const char* gpu_vthread_literal,
	uint64_t gpu_vthread_id) {
	constexpr static const char* kCategoryLiteral = "gfx";

	// NOTE: If this value changes, you should also change the corresponding
	// kCleanupDelay inside engine.cc.
	zx::duration kCleanupDelay = zx::msec(1);

	// Shift the vthread events by kCleanupDelay / 2. This is specifically chosen
	// because we know CleanupEscher() (and therefore us, too) runs every 1ms,
	// so by setting the "end" to be 0.5ms ago, we know we cannot be off by more than
	// +/- 0.5ms. See fxbug.dev/24648 for more details.
	kCleanupDelay /= 2;

	static const uint64_t kCleanupRatio = zx::msec(1).get() / kCleanupDelay.get();

	static const uint64_t kTicksPerMillisecond = zx_ticks_per_second() / 1000;
	static const uint64_t kTicksOffset = kTicksPerMillisecond / kCleanupRatio;

	uint64_t real_end_ticks = zx_ticks_get() - kTicksOffset;

	// Get the beginning, which is the offset we add to all \|elapsed_ticks\| to get the
	// shifted start and end.
	uint64_t real_start_ticks = real_end_ticks - (trace_events.back().end_elapsed_ticks);

	// First, create the entire duration event. We can do this by creating an
	// event combining the start of the first event, and the end of the last
	// event.
	TRACE_VTHREAD_DURATION_BEGIN(kCategoryLiteral, trace_literal, gpu_vthread_literal, gpu_vthread_id,
	real_start_ticks, "Frame number", frame_number,
	"Escher frame number", escher_frame_number);

	TRACE_VTHREAD_DURATION_END(kCategoryLiteral, trace_literal, gpu_vthread_literal, gpu_vthread_id,
	real_end_ticks, "Frame number", frame_number, "Escher frame number",
	escher_frame_number);

	// Now, output the more interesting events added by the application.
	for (size_t i = 0; i < trace_events.size(); ++i) {
	size_t num_concurrent_events = trace_events[i].names.size();
	uint64_t start_ticks = real_start_ticks + trace_events[i].start_elapsed_ticks;
	uint64_t end_ticks = real_start_ticks + trace_events[i].end_elapsed_ticks;

	// Combine all additional events into a single string.
	std::stringstream stream;
	copy(trace_events[i].names.begin() + 1, trace_events[i].names.end(),
	std::ostream_iterator<std::string>(stream, ",\n"));
	std::string additional_events = stream.str();

	TRACE_VTHREAD_DURATION_BEGIN(kCategoryLiteral, trace_events[i].names[0], gpu_vthread_literal,
	gpu_vthread_id, start_ticks, "Additional Events",
	additional_events);
	TRACE_VTHREAD_DURATION_END(kCategoryLiteral, trace_events[i].names[0], gpu_vthread_literal,
	gpu_vthread_id, end_ticks, "Additional Events", additional_events);
	}

	// Flow event tracking the progress of a Scenic frame.
	TRACE_VTHREAD_FLOW_STEP(kCategoryLiteral, "scenic_frame", gpu_vthread_literal, gpu_vthread_id,
	frame_number, real_start_ticks);

	TRACE_VTHREAD_FLOW_STEP(kCategoryLiteral, "scenic_frame", gpu_vthread_literal, gpu_vthread_id,
	frame_number, real_end_ticks);
	}
	#else
	void TimestampProfiler::TraceGpuQueryResults(
	const std::vector<TimestampProfiler::TraceEvent>& trace_events, uint64_t frame_number,
	uint64_t escher_frame_number, const char* trace_literal, const char* gpu_vthread_literal,
	uint64_t gpu_vthread_id) {}

	std::vector<TimestampProfiler::TraceEvent> TimestampProfiler::ProcessTraceEvents(
	const std::vector<TimestampProfiler::Result>& timestamps) {
	std::vector<TimestampProfiler::TraceEvent> t;
	return t;
	}
	#endif

	void TimestampProfiler::LogGpuQueryResults(
	uint64_t escher_frame_number, const std::vector<TimestampProfiler::Result>& timestamps) {
	FX_LOGS(INFO) << "--------------------------------"
	"----------------------";
	FX_LOGS(INFO) << "Timestamps for frame #" << escher_frame_number;
	FX_LOGS(INFO) << "total\t \| \tsince previous (all "
	"times in microseconds)";
	FX_LOGS(INFO) << "--------------------------------"
	"----------------------";
	for (size_t i = 0; i < timestamps.size(); ++i) {
	FX_LOGS(INFO) << timestamps[i].time << " \t \| \t" << timestamps[i].elapsed << " \t"
	<< timestamps[i].name;
	}
	FX_LOGS(INFO) << "--------------------------------"
	"----------------------";
	}

	TimestampProfiler::QueryRange* TimestampProfiler::ObtainRange(CommandBufferPtr cmd_buf) {
	if (ranges_.empty() \|\| current_pool_index_ == kPoolSize) {
	return CreateRangeAndPool(cmd_buf);
	} else if (ranges_.back().command_buffer != cmd_buf->vk()) {
	return CreateRange(cmd_buf);
	} else {
	auto range = &ranges_.back();
	FX_DCHECK(current_pool_index_ < kPoolSize);
	if (current_pool_index_ != range->start_index + range->count) {
	FX_LOGS(INFO) << current_pool_index_ << " " << range->start_index << " " << range->count;
	}
	FX_DCHECK(current_pool_index_ == range->start_index + range->count);
	return range;
	}
	}

	TimestampProfiler::QueryRange* TimestampProfiler::CreateRangeAndPool(CommandBufferPtr cmd_buf) {
	vk::QueryPoolCreateInfo info;
	info.flags = vk::QueryPoolCreateFlags(); // no flags currently exist
	info.queryType = vk::QueryType::eTimestamp;
	info.queryCount = kPoolSize;
	vk::QueryPool pool = ESCHER_CHECKED_VK_RESULT(device_.createQueryPool(info));
	cmd_buf->vk().resetQueryPool(pool, 0, kPoolSize);

	QueryRange range;
	range.pool = pool;
	range.command_buffer = cmd_buf->vk();
	range.start_index = 0;
	range.count = 0;

	current_pool_index_ = 0;
	pools_.push_back(pool);
	ranges_.push_back(range);

	return &ranges_.back();
	}

	TimestampProfiler::QueryRange* TimestampProfiler::CreateRange(CommandBufferPtr cmd_buf) {
	QueryRange& prev = ranges_.back();
	FX_DCHECK(!ranges_.empty() && current_pool_index_ < kPoolSize);
	FX_DCHECK(current_pool_index_ == prev.start_index + prev.count);

	QueryRange range;
	range.pool = prev.pool;
	range.command_buffer = cmd_buf->vk();
	range.start_index = prev.start_index + prev.count;
	range.count = 0;
	FX_DCHECK(range.start_index == current_pool_index_);

	ranges_.push_back(range);
	return &ranges_.back();
	}

	} // namespace escher