blob: d9ebaa36784bb56e7a58d1ccadeaf755450b0074 [file] [log] [blame]
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include <stdlib.h>
#include "dev/intel_debug.h"
#include "intel_common.h"
#include "intel_engine.h"
#include "util/compiler.h"
/* Updates intel_device_info fields that has dependencies on intel/common
* functions.
*/
void intel_common_update_device_info(int fd, struct intel_device_info *devinfo)
{
struct intel_query_engine_info *engine_info;
enum intel_engine_class klass;
engine_info = intel_engine_get_info(fd, devinfo->kmd_type);
if (!engine_info)
return;
devinfo->has_compute_engine = intel_engines_count(engine_info,
INTEL_ENGINE_CLASS_COMPUTE);
for (klass = 0; klass < INTEL_ENGINE_CLASS_INVALID; klass++)
devinfo->engine_class_supported_count[klass] =
intel_engines_supported_count(fd, devinfo, engine_info, klass);
free(engine_info);
}
void
intel_compute_engine_async_threads_limit(const struct intel_device_info *devinfo,
uint32_t hw_threads_in_wg,
bool slm_or_barrier_enabled,
uint8_t *ret_pixel_async_compute_thread_limit,
uint8_t *ret_z_pass_async_compute_thread_limit,
uint8_t *ret_np_z_async_throttle_settings)
{
/* Spec recommended SW values.
* IMPORTANT: values set to this variables are HW values
*/
uint8_t pixel_async_compute_thread_limit = 2;
uint8_t z_pass_async_compute_thread_limit = 0;
uint8_t np_z_async_throttle_settings = 0;
bool has_vrt = devinfo->verx10 >= 300 && !INTEL_DEBUG(DEBUG_NO_VRT);
/* When VRT is enabled async threads limits don't have effect */
if (!slm_or_barrier_enabled || has_vrt) {
*ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit;
*ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit;
*ret_np_z_async_throttle_settings = np_z_async_throttle_settings;
return;
}
if (devinfo->verx10 >= 200) {
/* Spec give us a table with Throttle value | SIMD | MAX API threads(LWS).
* HW threads = MAX API threads(LWS) / SIMD
*/
switch (hw_threads_in_wg) {
case 0 ... 2:
/* Minimum is Max 2 but lets use spec recommended value below */
FALLTHROUGH;
case 3 ... 8:
/* Max 8 */
pixel_async_compute_thread_limit = 2;
break;
case 9 ... 16:
/* Max 16 */
pixel_async_compute_thread_limit = 3;
break;
case 17 ... 24:
/* Max 24 */
pixel_async_compute_thread_limit = 4;
break;
case 25 ... 32:
/* Max 32 */
pixel_async_compute_thread_limit = 5;
break;
case 33 ... 40:
/* Max 40 */
pixel_async_compute_thread_limit = 6;
break;
case 41 ... 48:
/* Max 48 */
pixel_async_compute_thread_limit = 7;
break;
default:
/* No limit applied */
pixel_async_compute_thread_limit = 0;
}
switch (hw_threads_in_wg) {
case 0 ... 32:
/* Minimum is Max 32 but lets use spec recommended value below */
FALLTHROUGH;
case 33 ... 40:
/* Minimum is Max 40 but lets use spec recommended value below */
FALLTHROUGH;
case 41 ... 48:
/* Minimum is Max 48 but lets use spec recommended value below */
FALLTHROUGH;
case 49 ... 56:
/* Minimum is Max 56 but lets use spec recommended value below */
FALLTHROUGH;
case 57 ... 60:
/* Max 60 */
z_pass_async_compute_thread_limit = 0;
break;
default:
/* No limit applied */
z_pass_async_compute_thread_limit = 1;
}
switch (hw_threads_in_wg) {
case 0 ... 32:
/* Max 32 */
np_z_async_throttle_settings = 1;
break;
case 33 ... 40:
/* Max 40 */
np_z_async_throttle_settings = 2;
break;
case 41 ... 48:
/* Max 48 */
np_z_async_throttle_settings = 3;
break;
default:
/* Use the same settings as the Pixel shader Async compute setting,
* for values >= async compute settings disables the limits
*/
np_z_async_throttle_settings = 0;
}
} else {
switch (hw_threads_in_wg) {
case 0 ... 4:
/* Minimum is Max 2 but lets use spec recommended value below */
FALLTHROUGH;
case 5 ... 16:
/* Max 8 */
pixel_async_compute_thread_limit = 2;
break;
case 17 ... 32:
/* Max 16 */
pixel_async_compute_thread_limit = 3;
break;
case 33 ... 48:
/* Max 24 */
pixel_async_compute_thread_limit = 4;
break;
case 49 ... 64:
/* Max 32 */
pixel_async_compute_thread_limit = 5;
break;
case 65 ... 80:
/* Max 40 */
pixel_async_compute_thread_limit = 6;
break;
case 81 ... 96:
/* Max 48 */
pixel_async_compute_thread_limit = 7;
break;
default:
/* No limit applied */
pixel_async_compute_thread_limit = 0;
}
switch (hw_threads_in_wg) {
case 0 ... 64:
/* Minimum is Max 32 but lets use spec recommended value below */
FALLTHROUGH;
case 65 ... 80:
/* Minimum is Max 40 but lets use spec recommended value below */
FALLTHROUGH;
case 81 ... 96:
/* Minimum is Max 48 but lets use spec recommended value below */
FALLTHROUGH;
case 97 ... 112:
/* Minimum is Max 56 but lets use spec recommended value below */
FALLTHROUGH;
case 113 ... 120:
/* Max 60 */
z_pass_async_compute_thread_limit = 0;
break;
default:
/* Max 64/No limit applied */
z_pass_async_compute_thread_limit = 1;
}
switch (hw_threads_in_wg) {
case 0 ... 64:
/* Max 32 */
np_z_async_throttle_settings = 1;
break;
case 65 ... 80:
/* Max 40 */
np_z_async_throttle_settings = 2;
break;
case 81 ... 96:
/* Max 48 */
np_z_async_throttle_settings = 3;
break;
default:
/* Use the same settings as the Pixel shader Async compute setting,
* for values >= async compute settings disables the limits
*/
np_z_async_throttle_settings = 0;
}
}
assert(np_z_async_throttle_settings != 0 || pixel_async_compute_thread_limit == 0);
*ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit;
*ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit;
*ret_np_z_async_throttle_settings = np_z_async_throttle_settings;
}
int
intel_compute_threads_group_dispatch_size(uint32_t hw_threads_in_wg)
{
/* Following value calculated based on overdispatch is disabled. In case if
* compute overdispatch disabled set to 1, then we need to use TG Size 1.
*/
switch (hw_threads_in_wg) {
case 1 ... 16:
return 0;
case 17 ... 32:
return 1;
default:
return 2;
}
}