src/intel/common/intel_common.c - third_party/mesa - Git at Google

 /*
  * Copyright 2024 Intel Corporation
  * SPDX-License-Identifier: MIT
  */

 #include <stdlib.h>

 #include "dev/intel_debug.h"

 #include "intel_common.h"

 #include "intel_engine.h"

 #include "util/compiler.h"

 /* Updates intel_device_info fields that has dependencies on intel/common
  * functions.
  */
 void intel_common_update_device_info(int fd, struct intel_device_info *devinfo)
 {
    struct intel_query_engine_info *engine_info;
    enum intel_engine_class klass;

    engine_info = intel_engine_get_info(fd, devinfo->kmd_type);
    if (!engine_info)
       return;

    devinfo->has_compute_engine = intel_engines_count(engine_info,
                                                      INTEL_ENGINE_CLASS_COMPUTE);

    for (klass = 0; klass < INTEL_ENGINE_CLASS_INVALID; klass++)
       devinfo->engine_class_supported_count[klass] =
          intel_engines_supported_count(fd, devinfo, engine_info, klass);

    free(engine_info);
 }

 void
 intel_compute_engine_async_threads_limit(const struct intel_device_info *devinfo,
                                          uint32_t hw_threads_in_wg,
                                          bool slm_or_barrier_enabled,
                                          uint8_t *ret_pixel_async_compute_thread_limit,
                                          uint8_t *ret_z_pass_async_compute_thread_limit,
                                          uint8_t *ret_np_z_async_throttle_settings)
 {
    /* Spec recommended SW values.
     * IMPORTANT: values set to this variables are HW values
     */
    uint8_t pixel_async_compute_thread_limit = 2;
    uint8_t z_pass_async_compute_thread_limit = 0;
    uint8_t np_z_async_throttle_settings = 0;
    bool has_vrt = devinfo->verx10 >= 300 && !INTEL_DEBUG(DEBUG_NO_VRT);

    /* When VRT is enabled async threads limits don't have effect */
    if (!slm_or_barrier_enabled || has_vrt) {
       *ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit;
       *ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit;
       *ret_np_z_async_throttle_settings = np_z_async_throttle_settings;
       return;
    }

    if (devinfo->verx10 >= 200) {
       /* Spec give us a table with Throttle value | SIMD | MAX API threads(LWS).
        * HW threads = MAX API threads(LWS) / SIMD
        */
       switch (hw_threads_in_wg) {
       case 0 ... 2:
          /* Minimum is Max 2 but lets use spec recommended value below */
          FALLTHROUGH;
       case 3 ... 8:
          /* Max 8 */
          pixel_async_compute_thread_limit = 2;
          break;
       case 9 ... 16:
          /* Max 16 */
          pixel_async_compute_thread_limit = 3;
          break;
       case 17 ... 24:
          /* Max 24 */
          pixel_async_compute_thread_limit = 4;
          break;
       case 25 ... 32:
          /* Max 32 */
          pixel_async_compute_thread_limit = 5;
          break;
       case 33 ... 40:
          /* Max 40 */
          pixel_async_compute_thread_limit = 6;
          break;
       case 41 ... 48:
          /* Max 48 */
          pixel_async_compute_thread_limit = 7;
          break;
       default:
          /* No limit applied */
          pixel_async_compute_thread_limit = 0;
       }

       switch (hw_threads_in_wg) {
       case 0 ... 32:
          /* Minimum is Max 32 but lets use spec recommended value below */
          FALLTHROUGH;
       case 33 ... 40:
          /* Minimum is Max 40 but lets use spec recommended value below */
          FALLTHROUGH;
       case 41 ... 48:
          /* Minimum is Max 48 but lets use spec recommended value below */
          FALLTHROUGH;
       case 49 ... 56:
          /* Minimum is Max 56 but lets use spec recommended value below */
          FALLTHROUGH;
       case 57 ... 60:
          /* Max 60 */
          z_pass_async_compute_thread_limit = 0;
          break;
       default:
          /* No limit applied */
          z_pass_async_compute_thread_limit = 1;
       }

       switch (hw_threads_in_wg) {
       case 0 ... 32:
          /* Max 32 */
          np_z_async_throttle_settings = 1;
          break;
       case 33 ... 40:
          /* Max 40 */
          np_z_async_throttle_settings = 2;
          break;
       case 41 ... 48:
          /* Max 48 */
          np_z_async_throttle_settings = 3;
          break;
       default:
          /* Use the same settings as the Pixel shader Async compute setting,
           * for values >= async compute settings disables the limits
           */
          np_z_async_throttle_settings = 0;
       }
    } else {
       switch (hw_threads_in_wg) {
       case 0 ... 4:
          /* Minimum is Max 2 but lets use spec recommended value below */
          FALLTHROUGH;
       case 5 ... 16:
          /* Max 8 */
          pixel_async_compute_thread_limit = 2;
          break;
       case 17 ... 32:
          /* Max 16 */
          pixel_async_compute_thread_limit = 3;
          break;
       case 33 ... 48:
          /* Max 24 */
          pixel_async_compute_thread_limit = 4;
          break;
       case 49 ... 64:
          /* Max 32 */
          pixel_async_compute_thread_limit = 5;
          break;
       case 65 ... 80:
          /* Max 40 */
          pixel_async_compute_thread_limit = 6;
          break;
       case 81 ... 96:
          /* Max 48 */
          pixel_async_compute_thread_limit = 7;
          break;
       default:
          /* No limit applied */
          pixel_async_compute_thread_limit = 0;
       }

       switch (hw_threads_in_wg) {
       case 0 ... 64:
          /* Minimum is Max 32 but lets use spec recommended value below */
          FALLTHROUGH;
       case 65 ... 80:
          /* Minimum is Max 40 but lets use spec recommended value below */
          FALLTHROUGH;
       case 81 ... 96:
          /* Minimum is Max 48 but lets use spec recommended value below */
          FALLTHROUGH;
       case 97 ... 112:
          /* Minimum is Max 56 but lets use spec recommended value below */
          FALLTHROUGH;
       case 113 ... 120:
          /* Max 60 */
          z_pass_async_compute_thread_limit = 0;
          break;
       default:
          /* Max 64/No limit applied */
          z_pass_async_compute_thread_limit = 1;
       }

       switch (hw_threads_in_wg) {
       case 0 ... 64:
          /* Max 32 */
          np_z_async_throttle_settings = 1;
          break;
       case 65 ... 80:
          /* Max 40 */
          np_z_async_throttle_settings = 2;
          break;
       case 81 ... 96:
          /* Max 48 */
          np_z_async_throttle_settings = 3;
          break;
       default:
          /* Use the same settings as the Pixel shader Async compute setting,
           * for values >= async compute settings disables the limits
           */
          np_z_async_throttle_settings = 0;
       }
    }

    assert(np_z_async_throttle_settings != 0 || pixel_async_compute_thread_limit == 0);
    *ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit;
    *ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit;
    *ret_np_z_async_throttle_settings = np_z_async_throttle_settings;
 }

 int
 intel_compute_threads_group_dispatch_size(uint32_t hw_threads_in_wg)
 {
    /* Following value calculated based on overdispatch is disabled. In case if
     * compute overdispatch disabled set to 1, then we need to use TG Size 1.
     */
    switch (hw_threads_in_wg) {
    case 1 ... 16:
       return 0;
    case 17 ... 32:
       return 1;
    default:
       return 2;
    }
 }
	/*
	* Copyright 2024 Intel Corporation
	* SPDX-License-Identifier: MIT
	*/

	#include <stdlib.h>

	#include "dev/intel_debug.h"

	#include "intel_common.h"

	#include "intel_engine.h"

	#include "util/compiler.h"

	/* Updates intel_device_info fields that has dependencies on intel/common
	* functions.
	*/
	void intel_common_update_device_info(int fd, struct intel_device_info *devinfo)
	{
	struct intel_query_engine_info *engine_info;
	enum intel_engine_class klass;

	engine_info = intel_engine_get_info(fd, devinfo->kmd_type);
	if (!engine_info)
	return;

	devinfo->has_compute_engine = intel_engines_count(engine_info,
	INTEL_ENGINE_CLASS_COMPUTE);

	for (klass = 0; klass < INTEL_ENGINE_CLASS_INVALID; klass++)
	devinfo->engine_class_supported_count[klass] =
	intel_engines_supported_count(fd, devinfo, engine_info, klass);

	free(engine_info);
	}

	void
	intel_compute_engine_async_threads_limit(const struct intel_device_info *devinfo,
	uint32_t hw_threads_in_wg,
	bool slm_or_barrier_enabled,
	uint8_t *ret_pixel_async_compute_thread_limit,
	uint8_t *ret_z_pass_async_compute_thread_limit,
	uint8_t *ret_np_z_async_throttle_settings)
	{
	/* Spec recommended SW values.
	* IMPORTANT: values set to this variables are HW values
	*/
	uint8_t pixel_async_compute_thread_limit = 2;
	uint8_t z_pass_async_compute_thread_limit = 0;
	uint8_t np_z_async_throttle_settings = 0;
	bool has_vrt = devinfo->verx10 >= 300 && !INTEL_DEBUG(DEBUG_NO_VRT);

	/* When VRT is enabled async threads limits don't have effect */
	if (!slm_or_barrier_enabled \|\| has_vrt) {
	*ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit;
	*ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit;
	*ret_np_z_async_throttle_settings = np_z_async_throttle_settings;
	return;
	}

	if (devinfo->verx10 >= 200) {
	/* Spec give us a table with Throttle value \| SIMD \| MAX API threads(LWS).
	* HW threads = MAX API threads(LWS) / SIMD
	*/
	switch (hw_threads_in_wg) {
	case 0 ... 2:
	/* Minimum is Max 2 but lets use spec recommended value below */
	FALLTHROUGH;
	case 3 ... 8:
	/* Max 8 */
	pixel_async_compute_thread_limit = 2;
	break;
	case 9 ... 16:
	/* Max 16 */
	pixel_async_compute_thread_limit = 3;
	break;
	case 17 ... 24:
	/* Max 24 */
	pixel_async_compute_thread_limit = 4;
	break;
	case 25 ... 32:
	/* Max 32 */
	pixel_async_compute_thread_limit = 5;
	break;
	case 33 ... 40:
	/* Max 40 */
	pixel_async_compute_thread_limit = 6;
	break;
	case 41 ... 48:
	/* Max 48 */
	pixel_async_compute_thread_limit = 7;
	break;
	default:
	/* No limit applied */
	pixel_async_compute_thread_limit = 0;
	}

	switch (hw_threads_in_wg) {
	case 0 ... 32:
	/* Minimum is Max 32 but lets use spec recommended value below */
	FALLTHROUGH;
	case 33 ... 40:
	/* Minimum is Max 40 but lets use spec recommended value below */
	FALLTHROUGH;
	case 41 ... 48:
	/* Minimum is Max 48 but lets use spec recommended value below */
	FALLTHROUGH;
	case 49 ... 56:
	/* Minimum is Max 56 but lets use spec recommended value below */
	FALLTHROUGH;
	case 57 ... 60:
	/* Max 60 */
	z_pass_async_compute_thread_limit = 0;
	break;
	default:
	/* No limit applied */
	z_pass_async_compute_thread_limit = 1;
	}

	switch (hw_threads_in_wg) {
	case 0 ... 32:
	/* Max 32 */
	np_z_async_throttle_settings = 1;
	break;
	case 33 ... 40:
	/* Max 40 */
	np_z_async_throttle_settings = 2;
	break;
	case 41 ... 48:
	/* Max 48 */
	np_z_async_throttle_settings = 3;
	break;
	default:
	/* Use the same settings as the Pixel shader Async compute setting,
	* for values >= async compute settings disables the limits
	*/
	np_z_async_throttle_settings = 0;
	}
	} else {
	switch (hw_threads_in_wg) {
	case 0 ... 4:
	/* Minimum is Max 2 but lets use spec recommended value below */
	FALLTHROUGH;
	case 5 ... 16:
	/* Max 8 */
	pixel_async_compute_thread_limit = 2;
	break;
	case 17 ... 32:
	/* Max 16 */
	pixel_async_compute_thread_limit = 3;
	break;
	case 33 ... 48:
	/* Max 24 */
	pixel_async_compute_thread_limit = 4;
	break;
	case 49 ... 64:
	/* Max 32 */
	pixel_async_compute_thread_limit = 5;
	break;
	case 65 ... 80:
	/* Max 40 */
	pixel_async_compute_thread_limit = 6;
	break;
	case 81 ... 96:
	/* Max 48 */
	pixel_async_compute_thread_limit = 7;
	break;
	default:
	/* No limit applied */
	pixel_async_compute_thread_limit = 0;
	}

	switch (hw_threads_in_wg) {
	case 0 ... 64:
	/* Minimum is Max 32 but lets use spec recommended value below */
	FALLTHROUGH;
	case 65 ... 80:
	/* Minimum is Max 40 but lets use spec recommended value below */
	FALLTHROUGH;
	case 81 ... 96:
	/* Minimum is Max 48 but lets use spec recommended value below */
	FALLTHROUGH;
	case 97 ... 112:
	/* Minimum is Max 56 but lets use spec recommended value below */
	FALLTHROUGH;
	case 113 ... 120:
	/* Max 60 */
	z_pass_async_compute_thread_limit = 0;
	break;
	default:
	/* Max 64/No limit applied */
	z_pass_async_compute_thread_limit = 1;
	}

	switch (hw_threads_in_wg) {
	case 0 ... 64:
	/* Max 32 */
	np_z_async_throttle_settings = 1;
	break;
	case 65 ... 80:
	/* Max 40 */
	np_z_async_throttle_settings = 2;
	break;
	case 81 ... 96:
	/* Max 48 */
	np_z_async_throttle_settings = 3;
	break;
	default:
	/* Use the same settings as the Pixel shader Async compute setting,
	* for values >= async compute settings disables the limits
	*/
	np_z_async_throttle_settings = 0;
	}
	}

	assert(np_z_async_throttle_settings != 0 \|\| pixel_async_compute_thread_limit == 0);
	*ret_pixel_async_compute_thread_limit = pixel_async_compute_thread_limit;
	*ret_z_pass_async_compute_thread_limit = z_pass_async_compute_thread_limit;
	*ret_np_z_async_throttle_settings = np_z_async_throttle_settings;
	}

	int
	intel_compute_threads_group_dispatch_size(uint32_t hw_threads_in_wg)
	{
	/* Following value calculated based on overdispatch is disabled. In case if
	* compute overdispatch disabled set to 1, then we need to use TG Size 1.
	*/
	switch (hw_threads_in_wg) {
	case 1 ... 16:
	return 0;
	case 17 ... 32:
	return 1;
	default:
	return 2;
	}
	}