radeonsi: implement user_data_amd for 5, 6, and 7 components correctly

NIR can't handle those component counts, so we have to split it into 2
SGPR vectors where each has max 4 components.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28725>
diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
index c6f6e3f..01238a1 100644
--- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
+++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c
@@ -744,10 +744,17 @@
          s->gsvs_ring[stream_id] : nir_undef(b, 4, 32);
       break;
    }
-   case nir_intrinsic_load_user_data_amd:
-      replacement = ac_nir_load_arg(b, &args->ac, args->cs_user_data);
-      replacement = nir_pad_vector(b, replacement, 8);
+   case nir_intrinsic_load_user_data_amd: {
+      nir_def *low_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[0]);
+      replacement = nir_pad_vector(b, low_vec4, 8);
+
+      if (args->cs_user_data[1].used && intrin->def.num_components > 4) {
+         nir_def *high_vec4 = ac_nir_load_arg(b, &args->ac, args->cs_user_data[1]);
+         for (unsigned i = 0; i < high_vec4->num_components; i++)
+            replacement = nir_vector_insert_imm(b, replacement, nir_channel(b, high_vec4, i), 4 + i);
+      }
       break;
+   }
    default:
       return false;
    }
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b4d8ace..4c96a07 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -677,7 +677,12 @@
       unsigned cs_user_data_dwords =
          shader->selector->info.base.cs.user_data_components_amd;
       if (cs_user_data_dwords) {
-         ac_add_arg(&args->ac, AC_ARG_SGPR, cs_user_data_dwords, AC_ARG_INT, &args->cs_user_data);
+         ac_add_arg(&args->ac, AC_ARG_SGPR, MIN2(cs_user_data_dwords, 4), AC_ARG_INT,
+                    &args->cs_user_data[0]);
+         if (cs_user_data_dwords > 4) {
+            ac_add_arg(&args->ac, AC_ARG_SGPR, cs_user_data_dwords - 4, AC_ARG_INT,
+                       &args->cs_user_data[1]);
+         }
       }
 
       /* Some descriptors can be in user SGPRs. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 8e989fe..30df288 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -75,7 +75,7 @@
    struct ac_arg color_start;
    /* CS */
    struct ac_arg block_size;
-   struct ac_arg cs_user_data;
+   struct ac_arg cs_user_data[2];
    struct ac_arg cs_shaderbuf[3];
    struct ac_arg cs_image[3];
 };