blob: 76321c2439ba8bb72677ecddf9b7e16f87daca59 [file] [log] [blame]
/*
* Copyright © 2021 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "freedreno_autotune.h"
#include "freedreno_batch.h"
#include "freedreno_util.h"
/**
* Tracks, for a given batch key (which maps to a FBO/framebuffer state),
*
* ralloc parent is fd_autotune::ht
*/
struct fd_batch_history {
struct fd_batch_key *key;
/* Entry in fd_autotune::lru: */
struct list_head node;
unsigned num_results;
/**
* List of recent fd_batch_result's
*/
struct list_head results;
#define MAX_RESULTS 5
};
static struct fd_batch_history *
get_history(struct fd_autotune *at, struct fd_batch *batch)
{
struct fd_batch_history *history;
/* draw batches should still have their key at this point. */
assert(batch->key || batch->nondraw);
if (!batch->key)
return NULL;
struct hash_entry *entry =
_mesa_hash_table_search_pre_hashed(at->ht, batch->hash, batch->key);
if (entry) {
history = entry->data;
goto found;
}
history = rzalloc_size(at->ht, sizeof(*history));
history->key = fd_batch_key_clone(history, batch->key);
list_inithead(&history->node);
list_inithead(&history->results);
/* Note: We cap # of cached GMEM states at 20.. so assuming double-
* buffering, 40 should be a good place to cap cached autotune state
*/
if (at->ht->entries >= 40) {
struct fd_batch_history *last =
list_last_entry(&at->lru, struct fd_batch_history, node);
_mesa_hash_table_remove_key(at->ht, last->key);
list_del(&last->node);
ralloc_free(last);
}
_mesa_hash_table_insert_pre_hashed(at->ht, batch->hash, history->key,
history);
found:
/* Move to the head of the LRU: */
list_delinit(&history->node);
list_add(&history->node, &at->lru);
return history;
}
static void
result_destructor(void *r)
{
struct fd_batch_result *result = r;
/* Just in case we manage to somehow still be on the pending_results list: */
list_del(&result->node);
}
static struct fd_batch_result *
get_result(struct fd_autotune *at, struct fd_batch_history *history)
{
struct fd_batch_result *result = rzalloc_size(history, sizeof(*result));
result->fence =
++at->fence_counter; /* pre-increment so zero isn't valid fence */
result->idx = at->idx_counter++;
if (at->idx_counter >= ARRAY_SIZE(at->results->result))
at->idx_counter = 0;
result->history = history;
list_addtail(&result->node, &at->pending_results);
ralloc_set_destructor(result, result_destructor);
return result;
}
static void
process_results(struct fd_autotune *at)
{
uint32_t current_fence = at->results->fence;
list_for_each_entry_safe (struct fd_batch_result, result,
&at->pending_results, node) {
if (result->fence > current_fence)
break;
struct fd_batch_history *history = result->history;
result->samples_passed = at->results->result[result->idx].samples_end -
at->results->result[result->idx].samples_start;
list_delinit(&result->node);
list_add(&result->node, &history->results);
if (history->num_results < MAX_RESULTS) {
history->num_results++;
} else {
/* Once above a limit, start popping old results off the
* tail of the list:
*/
struct fd_batch_result *old_result =
list_last_entry(&history->results, struct fd_batch_result, node);
list_delinit(&old_result->node);
ralloc_free(old_result);
}
}
}
static bool
fallback_use_bypass(struct fd_batch *batch)
{
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
/* Fallback logic if we have no historical data about the rendertarget: */
if (batch->cleared || batch->gmem_reason ||
(batch->num_draws > 5) || (pfb->samples > 1)) {
return false;
}
return true;
}
/**
* A magic 8-ball that tells the gmem code whether we should do bypass mode
* for moar fps.
*/
bool
fd_autotune_use_bypass(struct fd_autotune *at, struct fd_batch *batch)
{
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
process_results(at);
/* Only enable on gen's that opt-in (and actually have sample-passed
* collection wired up:
*/
if (!batch->ctx->screen->gmem_reason_mask)
return fallback_use_bypass(batch);
if (batch->gmem_reason & ~batch->ctx->screen->gmem_reason_mask)
return fallback_use_bypass(batch);
for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
/* If ms-rtt is involved, force GMEM, as we don't currently
* implement a temporary render target that we can MSAA resolve
* from
*/
if (pfb->cbufs[i] && pfb->cbufs[i]->nr_samples)
return fallback_use_bypass(batch);
}
struct fd_batch_history *history = get_history(at, batch);
if (!history)
return fallback_use_bypass(batch);
batch->autotune_result = get_result(at, history);
batch->autotune_result->cost = batch->cost;
bool use_bypass = fallback_use_bypass(batch);
if (use_bypass)
return true;
if (history->num_results > 0) {
uint32_t total_samples = 0;
// TODO we should account for clears somehow
// TODO should we try to notice if there is a drastic change from
// frame to frame?
list_for_each_entry (struct fd_batch_result, result, &history->results,
node) {
total_samples += result->samples_passed;
}
float avg_samples = (float)total_samples / (float)history->num_results;
/* Low sample count could mean there was only a clear.. or there was
* a clear plus draws that touch no or few samples
*/
if (avg_samples < 500.0f)
return true;
/* Cost-per-sample is an estimate for the average number of reads+
* writes for a given passed sample.
*/
float sample_cost = batch->cost;
sample_cost /= batch->num_draws;
float total_draw_cost = (avg_samples * sample_cost) / batch->num_draws;
DBG("%08x:%u\ttotal_samples=%u, avg_samples=%f, sample_cost=%f, "
"total_draw_cost=%f\n",
batch->hash, batch->num_draws, total_samples, avg_samples,
sample_cost, total_draw_cost);
if (total_draw_cost < 3000.0f)
return true;
}
return use_bypass;
}
void
fd_autotune_init(struct fd_autotune *at, struct fd_device *dev)
{
at->ht =
_mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
list_inithead(&at->lru);
at->results_mem = fd_bo_new(dev, sizeof(struct fd_autotune_results),
0, "autotune");
at->results = fd_bo_map(at->results_mem);
list_inithead(&at->pending_results);
}
void
fd_autotune_fini(struct fd_autotune *at)
{
_mesa_hash_table_destroy(at->ht, NULL);
fd_bo_del(at->results_mem);
}