src/tools/miri/src/intrinsics/simd.rs - third_party/rust - Git at Google

 use rustc_apfloat::{Float, Round};
 use rustc_middle::ty::layout::{HasParamEnv, LayoutOf};
 use rustc_middle::{mir, ty, ty::FloatTy};
 use rustc_span::{sym, Symbol};
 use rustc_target::abi::{Endian, HasDataLayout};

 use crate::helpers::{bool_to_simd_element, check_arg_count, simd_element_to_bool, ToHost, ToSoft};
 use crate::*;

 #[derive(Copy, Clone)]
 pub(crate) enum MinMax {
     Min,
     Max,
 }

 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
 pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
     /// Calls the simd intrinsic `intrinsic`; the `simd_` prefix has already been removed.
     /// Returns `Ok(true)` if the intrinsic was handled.
     fn emulate_simd_intrinsic(
         &mut self,
         intrinsic_name: &str,
         generic_args: ty::GenericArgsRef<'tcx>,
         args: &[OpTy<'tcx, Provenance>],
         dest: &MPlaceTy<'tcx, Provenance>,
     ) -> InterpResult<'tcx, EmulateItemResult> {
         let this = self.eval_context_mut();
         match intrinsic_name {
             #[rustfmt::skip]
             | "neg"
             | "fabs"
             | "ceil"
             | "floor"
             | "round"
             | "trunc"
             | "fsqrt"
             | "fsin"
             | "fcos"
             | "fexp"
             | "fexp2"
             | "flog"
             | "flog2"
             | "flog10"
             | "ctlz"
             | "cttz"
             | "bswap"
             | "bitreverse"
             => {
                 let [op] = check_arg_count(args)?;
                 let (op, op_len) = this.operand_to_simd(op)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 assert_eq!(dest_len, op_len);

                 #[derive(Copy, Clone)]
                 enum Op<'a> {
                     MirOp(mir::UnOp),
                     Abs,
                     Round(rustc_apfloat::Round),
                     Numeric(Symbol),
                     HostOp(&'a str),
                 }
                 let which = match intrinsic_name {
                     "neg" => Op::MirOp(mir::UnOp::Neg),
                     "fabs" => Op::Abs,
                     "ceil" => Op::Round(rustc_apfloat::Round::TowardPositive),
                     "floor" => Op::Round(rustc_apfloat::Round::TowardNegative),
                     "round" => Op::Round(rustc_apfloat::Round::NearestTiesToAway),
                     "trunc" => Op::Round(rustc_apfloat::Round::TowardZero),
                     "ctlz" => Op::Numeric(sym::ctlz),
                     "cttz" => Op::Numeric(sym::cttz),
                     "bswap" => Op::Numeric(sym::bswap),
                     "bitreverse" => Op::Numeric(sym::bitreverse),
                     _ => Op::HostOp(intrinsic_name),
                 };

                 for i in 0..dest_len {
                     let op = this.read_immediate(&this.project_index(&op, i)?)?;
                     let dest = this.project_index(&dest, i)?;
                     let val = match which {
                         Op::MirOp(mir_op) => {
                             // This already does NaN adjustments
                             this.wrapping_unary_op(mir_op, &op)?.to_scalar()
                         }
                         Op::Abs => {
                             // Works for f32 and f64.
                             let ty::Float(float_ty) = op.layout.ty.kind() else {
                                 span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name)
                             };
                             let op = op.to_scalar();
                             // "Bitwise" operation, no NaN adjustments
                             match float_ty {
                                 FloatTy::F16 => unimplemented!("f16_f128"),
                                 FloatTy::F32 => Scalar::from_f32(op.to_f32()?.abs()),
                                 FloatTy::F64 => Scalar::from_f64(op.to_f64()?.abs()),
                                 FloatTy::F128 => unimplemented!("f16_f128"),
                             }
                         }
                         Op::HostOp(host_op) => {
                             let ty::Float(float_ty) = op.layout.ty.kind() else {
                                 span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name)
                             };
                             // Using host floats (but it's fine, these operations do not have guaranteed precision).
                             match float_ty {
                                 FloatTy::F16 => unimplemented!("f16_f128"),
                                 FloatTy::F32 => {
                                     let f = op.to_scalar().to_f32()?;
                                     let f_host = f.to_host();
                                     let res = match host_op {
                                         "fsqrt" => f_host.sqrt(), // FIXME Using host floats, this should use full-precision soft-floats
                                         "fsin" => f_host.sin(),
                                         "fcos" => f_host.cos(),
                                         "fexp" => f_host.exp(),
                                         "fexp2" => f_host.exp2(),
                                         "flog" => f_host.ln(),
                                         "flog2" => f_host.log2(),
                                         "flog10" => f_host.log10(),
                                         _ => bug!(),
                                     };
                                     let res = res.to_soft();
                                     let res = this.adjust_nan(res, &[f]);
                                     Scalar::from(res)
                                 }
                                 FloatTy::F64 => {
                                     let f = op.to_scalar().to_f64()?;
                                     let f_host = f.to_host();
                                     let res = match host_op {
                                         "fsqrt" => f_host.sqrt(),
                                         "fsin" => f_host.sin(),
                                         "fcos" => f_host.cos(),
                                         "fexp" => f_host.exp(),
                                         "fexp2" => f_host.exp2(),
                                         "flog" => f_host.ln(),
                                         "flog2" => f_host.log2(),
                                         "flog10" => f_host.log10(),
                                         _ => bug!(),
                                     };
                                     let res = res.to_soft();
                                     let res = this.adjust_nan(res, &[f]);
                                     Scalar::from(res)
                                 }
                                 FloatTy::F128 => unimplemented!("f16_f128"),
                             }
                         }
                         Op::Round(rounding) => {
                             let ty::Float(float_ty) = op.layout.ty.kind() else {
                                 span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name)
                             };
                             match float_ty {
                                 FloatTy::F16 => unimplemented!("f16_f128"),
                                 FloatTy::F32 => {
                                     let f = op.to_scalar().to_f32()?;
                                     let res = f.round_to_integral(rounding).value;
                                     let res = this.adjust_nan(res, &[f]);
                                     Scalar::from_f32(res)
                                 }
                                 FloatTy::F64 => {
                                     let f = op.to_scalar().to_f64()?;
                                     let res = f.round_to_integral(rounding).value;
                                     let res = this.adjust_nan(res, &[f]);
                                     Scalar::from_f64(res)
                                 }
                                 FloatTy::F128 => unimplemented!("f16_f128"),
                             }
                         }
                         Op::Numeric(name) => {
                             this.numeric_intrinsic(name, op.to_scalar(), op.layout, op.layout)?
                         }
                     };
                     this.write_scalar(val, &dest)?;
                 }
             }
             #[rustfmt::skip]
             | "add"
             | "sub"
             | "mul"
             | "div"
             | "rem"
             | "shl"
             | "shr"
             | "and"
             | "or"
             | "xor"
             | "eq"
             | "ne"
             | "lt"
             | "le"
             | "gt"
             | "ge"
             | "fmax"
             | "fmin"
             | "saturating_add"
             | "saturating_sub"
             | "arith_offset"
             => {
                 use mir::BinOp;

                 let [left, right] = check_arg_count(args)?;
                 let (left, left_len) = this.operand_to_simd(left)?;
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 assert_eq!(dest_len, left_len);
                 assert_eq!(dest_len, right_len);

                 enum Op {
                     MirOp(BinOp),
                     SaturatingOp(BinOp),
                     FMinMax(MinMax),
                     WrappingOffset,
                 }
                 let which = match intrinsic_name {
                     "add" => Op::MirOp(BinOp::Add),
                     "sub" => Op::MirOp(BinOp::Sub),
                     "mul" => Op::MirOp(BinOp::Mul),
                     "div" => Op::MirOp(BinOp::Div),
                     "rem" => Op::MirOp(BinOp::Rem),
                     "shl" => Op::MirOp(BinOp::Shl),
                     "shr" => Op::MirOp(BinOp::Shr),
                     "and" => Op::MirOp(BinOp::BitAnd),
                     "or" => Op::MirOp(BinOp::BitOr),
                     "xor" => Op::MirOp(BinOp::BitXor),
                     "eq" => Op::MirOp(BinOp::Eq),
                     "ne" => Op::MirOp(BinOp::Ne),
                     "lt" => Op::MirOp(BinOp::Lt),
                     "le" => Op::MirOp(BinOp::Le),
                     "gt" => Op::MirOp(BinOp::Gt),
                     "ge" => Op::MirOp(BinOp::Ge),
                     "fmax" => Op::FMinMax(MinMax::Max),
                     "fmin" => Op::FMinMax(MinMax::Min),
                     "saturating_add" => Op::SaturatingOp(BinOp::Add),
                     "saturating_sub" => Op::SaturatingOp(BinOp::Sub),
                     "arith_offset" => Op::WrappingOffset,
                     _ => unreachable!(),
                 };

                 for i in 0..dest_len {
                     let left = this.read_immediate(&this.project_index(&left, i)?)?;
                     let right = this.read_immediate(&this.project_index(&right, i)?)?;
                     let dest = this.project_index(&dest, i)?;
                     let val = match which {
                         Op::MirOp(mir_op) => {
                             // This does NaN adjustments.
                             let (val, overflowed) = this.overflowing_binary_op(mir_op, &left, &right)?;
                             if matches!(mir_op, BinOp::Shl | BinOp::Shr) {
                                 // Shifts have extra UB as SIMD operations that the MIR binop does not have.
                                 // See <https://github.com/rust-lang/rust/issues/91237>.
                                 if overflowed {
                                     let r_val = right.to_scalar().to_bits(right.layout.size)?;
                                     throw_ub_format!("overflowing shift by {r_val} in `simd_{intrinsic_name}` in SIMD lane {i}");
                                 }
                             }
                             if matches!(mir_op, BinOp::Eq | BinOp::Ne | BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge) {
                                 // Special handling for boolean-returning operations
                                 assert_eq!(val.layout.ty, this.tcx.types.bool);
                                 let val = val.to_scalar().to_bool().unwrap();
                                 bool_to_simd_element(val, dest.layout.size)
                             } else {
                                 assert_ne!(val.layout.ty, this.tcx.types.bool);
                                 assert_eq!(val.layout.ty, dest.layout.ty);
                                 val.to_scalar()
                             }
                         }
                         Op::SaturatingOp(mir_op) => {
                             this.saturating_arith(mir_op, &left, &right)?
                         }
                         Op::WrappingOffset => {
                             let ptr = left.to_scalar().to_pointer(this)?;
                             let offset_count = right.to_scalar().to_target_isize(this)?;
                             let pointee_ty = left.layout.ty.builtin_deref(true).unwrap().ty;

                             let pointee_size = i64::try_from(this.layout_of(pointee_ty)?.size.bytes()).unwrap();
                             let offset_bytes = offset_count.wrapping_mul(pointee_size);
                             let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, this);
                             Scalar::from_maybe_pointer(offset_ptr, this)
                         }
                         Op::FMinMax(op) => {
                             this.fminmax_op(op, &left, &right)?
                         }
                     };
                     this.write_scalar(val, &dest)?;
                 }
             }
             "fma" => {
                 let [a, b, c] = check_arg_count(args)?;
                 let (a, a_len) = this.operand_to_simd(a)?;
                 let (b, b_len) = this.operand_to_simd(b)?;
                 let (c, c_len) = this.operand_to_simd(c)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 assert_eq!(dest_len, a_len);
                 assert_eq!(dest_len, b_len);
                 assert_eq!(dest_len, c_len);

                 for i in 0..dest_len {
                     let a = this.read_scalar(&this.project_index(&a, i)?)?;
                     let b = this.read_scalar(&this.project_index(&b, i)?)?;
                     let c = this.read_scalar(&this.project_index(&c, i)?)?;
                     let dest = this.project_index(&dest, i)?;

                     // Works for f32 and f64.
                     // FIXME: using host floats to work around https://github.com/rust-lang/miri/issues/2468.
                     let ty::Float(float_ty) = dest.layout.ty.kind() else {
                         span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name)
                     };
                     let val = match float_ty {
                         FloatTy::F16 => unimplemented!("f16_f128"),
                         FloatTy::F32 => {
                             let a = a.to_f32()?;
                             let b = b.to_f32()?;
                             let c = c.to_f32()?;
                             let res = a.to_host().mul_add(b.to_host(), c.to_host()).to_soft();
                             let res = this.adjust_nan(res, &[a, b, c]);
                             Scalar::from(res)
                         }
                         FloatTy::F64 => {
                             let a = a.to_f64()?;
                             let b = b.to_f64()?;
                             let c = c.to_f64()?;
                             let res = a.to_host().mul_add(b.to_host(), c.to_host()).to_soft();
                             let res = this.adjust_nan(res, &[a, b, c]);
                             Scalar::from(res)
                         }
                         FloatTy::F128 => unimplemented!("f16_f128"),
                     };
                     this.write_scalar(val, &dest)?;
                 }
             }
             #[rustfmt::skip]
             | "reduce_and"
             | "reduce_or"
             | "reduce_xor"
             | "reduce_any"
             | "reduce_all"
             | "reduce_max"
             | "reduce_min" => {
                 use mir::BinOp;

                 let [op] = check_arg_count(args)?;
                 let (op, op_len) = this.operand_to_simd(op)?;

                 let imm_from_bool =
                     |b| ImmTy::from_scalar(Scalar::from_bool(b), this.machine.layouts.bool);

                 enum Op {
                     MirOp(BinOp),
                     MirOpBool(BinOp),
                     MinMax(MinMax),
                 }
                 let which = match intrinsic_name {
                     "reduce_and" => Op::MirOp(BinOp::BitAnd),
                     "reduce_or" => Op::MirOp(BinOp::BitOr),
                     "reduce_xor" => Op::MirOp(BinOp::BitXor),
                     "reduce_any" => Op::MirOpBool(BinOp::BitOr),
                     "reduce_all" => Op::MirOpBool(BinOp::BitAnd),
                     "reduce_max" => Op::MinMax(MinMax::Max),
                     "reduce_min" => Op::MinMax(MinMax::Min),
                     _ => unreachable!(),
                 };

                 // Initialize with first lane, then proceed with the rest.
                 let mut res = this.read_immediate(&this.project_index(&op, 0)?)?;
                 if matches!(which, Op::MirOpBool(_)) {
                     // Convert to `bool` scalar.
                     res = imm_from_bool(simd_element_to_bool(res)?);
                 }
                 for i in 1..op_len {
                     let op = this.read_immediate(&this.project_index(&op, i)?)?;
                     res = match which {
                         Op::MirOp(mir_op) => {
                             this.wrapping_binary_op(mir_op, &res, &op)?
                         }
                         Op::MirOpBool(mir_op) => {
                             let op = imm_from_bool(simd_element_to_bool(op)?);
                             this.wrapping_binary_op(mir_op, &res, &op)?
                         }
                         Op::MinMax(mmop) => {
                             if matches!(res.layout.ty.kind(), ty::Float(_)) {
                                 ImmTy::from_scalar(this.fminmax_op(mmop, &res, &op)?, res.layout)
                             } else {
                                 // Just boring integers, so NaNs to worry about
                                 let mirop = match mmop {
                                     MinMax::Min => BinOp::Le,
                                     MinMax::Max => BinOp::Ge,
                                 };
                                 if this.wrapping_binary_op(mirop, &res, &op)?.to_scalar().to_bool()? {
                                     res
                                 } else {
                                     op
                                 }
                             }
                         }
                     };
                 }
                 this.write_immediate(*res, dest)?;
             }
             #[rustfmt::skip]
             | "reduce_add_ordered"
             | "reduce_mul_ordered" => {
                 use mir::BinOp;

                 let [op, init] = check_arg_count(args)?;
                 let (op, op_len) = this.operand_to_simd(op)?;
                 let init = this.read_immediate(init)?;

                 let mir_op = match intrinsic_name {
                     "reduce_add_ordered" => BinOp::Add,
                     "reduce_mul_ordered" => BinOp::Mul,
                     _ => unreachable!(),
                 };

                 let mut res = init;
                 for i in 0..op_len {
                     let op = this.read_immediate(&this.project_index(&op, i)?)?;
                     res = this.wrapping_binary_op(mir_op, &res, &op)?;
                 }
                 this.write_immediate(*res, dest)?;
             }
             "select" => {
                 let [mask, yes, no] = check_arg_count(args)?;
                 let (mask, mask_len) = this.operand_to_simd(mask)?;
                 let (yes, yes_len) = this.operand_to_simd(yes)?;
                 let (no, no_len) = this.operand_to_simd(no)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 assert_eq!(dest_len, mask_len);
                 assert_eq!(dest_len, yes_len);
                 assert_eq!(dest_len, no_len);

                 for i in 0..dest_len {
                     let mask = this.read_immediate(&this.project_index(&mask, i)?)?;
                     let yes = this.read_immediate(&this.project_index(&yes, i)?)?;
                     let no = this.read_immediate(&this.project_index(&no, i)?)?;
                     let dest = this.project_index(&dest, i)?;

                     let val = if simd_element_to_bool(mask)? { yes } else { no };
                     this.write_immediate(*val, &dest)?;
                 }
             }
             // Variant of `select` that takes a bitmask rather than a "vector of bool".
             "select_bitmask" => {
                 let [mask, yes, no] = check_arg_count(args)?;
                 let (yes, yes_len) = this.operand_to_simd(yes)?;
                 let (no, no_len) = this.operand_to_simd(no)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;
                 let bitmask_len = dest_len.next_multiple_of(8);

                 // The mask must be an integer or an array.
                 assert!(
                     mask.layout.ty.is_integral()
                         || matches!(mask.layout.ty.kind(), ty::Array(elemty, _) if elemty == &this.tcx.types.u8)
                 );
                 assert!(bitmask_len <= 64);
                 assert_eq!(bitmask_len, mask.layout.size.bits());
                 assert_eq!(dest_len, yes_len);
                 assert_eq!(dest_len, no_len);
                 let dest_len = u32::try_from(dest_len).unwrap();
                 let bitmask_len = u32::try_from(bitmask_len).unwrap();

                 // To read the mask, we transmute it to an integer.
                 // That does the right thing wrt endianness.
                 let mask_ty = this.machine.layouts.uint(mask.layout.size).unwrap();
                 let mask = mask.transmute(mask_ty, this)?;
                 let mask: u64 = this.read_scalar(&mask)?.to_bits(mask_ty.size)?.try_into().unwrap();

                 for i in 0..dest_len {
                     let bit_i = simd_bitmask_index(i, dest_len, this.data_layout().endian);
                     let mask = mask & 1u64.checked_shl(bit_i).unwrap();
                     let yes = this.read_immediate(&this.project_index(&yes, i.into())?)?;
                     let no = this.read_immediate(&this.project_index(&no, i.into())?)?;
                     let dest = this.project_index(&dest, i.into())?;

                     let val = if mask != 0 { yes } else { no };
                     this.write_immediate(*val, &dest)?;
                 }
                 for i in dest_len..bitmask_len {
                     // If the mask is "padded", ensure that padding is all-zero.
                     // This deliberately does not use `simd_bitmask_index`; these bits are outside
                     // the bitmask. It does not matter in which order we check them.
                     let mask = mask & 1u64.checked_shl(i).unwrap();
                     if mask != 0 {
                         throw_ub_format!(
                             "a SIMD bitmask less than 8 bits long must be filled with 0s for the remaining bits"
                         );
                     }
                 }
             }
             // Converts a "vector of bool" into a bitmask.
             "bitmask" => {
                 let [op] = check_arg_count(args)?;
                 let (op, op_len) = this.operand_to_simd(op)?;
                 let bitmask_len = op_len.next_multiple_of(8);

                 // Returns either an unsigned integer or array of `u8`.
                 assert!(
                     dest.layout.ty.is_integral()
                         || matches!(dest.layout.ty.kind(), ty::Array(elemty, _) if elemty == &this.tcx.types.u8)
                 );
                 assert!(bitmask_len <= 64);
                 assert_eq!(bitmask_len, dest.layout.size.bits());
                 let op_len = u32::try_from(op_len).unwrap();

                 let mut res = 0u64;
                 for i in 0..op_len {
                     let op = this.read_immediate(&this.project_index(&op, i.into())?)?;
                     if simd_element_to_bool(op)? {
                         res |= 1u64
                             .checked_shl(simd_bitmask_index(i, op_len, this.data_layout().endian))
                             .unwrap();
                     }
                 }
                 // We have to change the type of the place to be able to write `res` into it. This
                 // transmutes the integer to an array, which does the right thing wrt endianness.
                 let dest =
                     dest.transmute(this.machine.layouts.uint(dest.layout.size).unwrap(), this)?;
                 this.write_int(res, &dest)?;
             }
             "cast" | "as" | "cast_ptr" | "expose_provenance" | "with_exposed_provenance" => {
                 let [op] = check_arg_count(args)?;
                 let (op, op_len) = this.operand_to_simd(op)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 assert_eq!(dest_len, op_len);

                 let unsafe_cast = intrinsic_name == "cast";
                 let safe_cast = intrinsic_name == "as";
                 let ptr_cast = intrinsic_name == "cast_ptr";
                 let expose_cast = intrinsic_name == "expose_provenance";
                 let from_exposed_cast = intrinsic_name == "with_exposed_provenance";

                 for i in 0..dest_len {
                     let op = this.read_immediate(&this.project_index(&op, i)?)?;
                     let dest = this.project_index(&dest, i)?;

                     let val = match (op.layout.ty.kind(), dest.layout.ty.kind()) {
                         // Int-to-(int|float): always safe
                         (ty::Int(_) | ty::Uint(_), ty::Int(_) | ty::Uint(_) | ty::Float(_))
                             if safe_cast || unsafe_cast =>
                             this.int_to_int_or_float(&op, dest.layout)?,
                         // Float-to-float: always safe
                         (ty::Float(_), ty::Float(_)) if safe_cast || unsafe_cast =>
                             this.float_to_float_or_int(&op, dest.layout)?,
                         // Float-to-int in safe mode
                         (ty::Float(_), ty::Int(_) | ty::Uint(_)) if safe_cast =>
                             this.float_to_float_or_int(&op, dest.layout)?,
                         // Float-to-int in unchecked mode
                         (ty::Float(_), ty::Int(_) | ty::Uint(_)) if unsafe_cast => {
                             this.float_to_int_checked(&op, dest.layout, Round::TowardZero)?
                                 .ok_or_else(|| {
                                     err_ub_format!(
                                         "`simd_cast` intrinsic called on {op} which cannot be represented in target type `{:?}`",
                                         dest.layout.ty
                                     )
                                 })?
                         }
                         // Ptr-to-ptr cast
                         (ty::RawPtr(..), ty::RawPtr(..)) if ptr_cast =>
                             this.ptr_to_ptr(&op, dest.layout)?,
                         // Ptr/Int casts
                         (ty::RawPtr(..), ty::Int(_) | ty::Uint(_)) if expose_cast =>
                             this.pointer_expose_provenance_cast(&op, dest.layout)?,
                         (ty::Int(_) | ty::Uint(_), ty::RawPtr(..)) if from_exposed_cast =>
                             this.pointer_with_exposed_provenance_cast(&op, dest.layout)?,
                         // Error otherwise
                         _ =>
                             throw_unsup_format!(
                                 "Unsupported SIMD cast from element type {from_ty} to {to_ty}",
                                 from_ty = op.layout.ty,
                                 to_ty = dest.layout.ty,
                             ),
                     };
                     this.write_immediate(*val, &dest)?;
                 }
             }
             "shuffle_generic" => {
                 let [left, right] = check_arg_count(args)?;
                 let (left, left_len) = this.operand_to_simd(left)?;
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 let index = generic_args[2]
                     .expect_const()
                     .eval(*this.tcx, this.param_env(), this.tcx.span)
                     .unwrap()
                     .unwrap_branch();
                 let index_len = index.len();

                 assert_eq!(left_len, right_len);
                 assert_eq!(index_len as u64, dest_len);

                 for i in 0..dest_len {
                     let src_index: u64 = index[usize::try_from(i).unwrap()]
                         .unwrap_leaf()
                         .try_to_u32()
                         .unwrap()
                         .into();
                     let dest = this.project_index(&dest, i)?;

                     let val = if src_index < left_len {
                         this.read_immediate(&this.project_index(&left, src_index)?)?
                     } else if src_index < left_len.checked_add(right_len).unwrap() {
                         let right_idx = src_index.checked_sub(left_len).unwrap();
                         this.read_immediate(&this.project_index(&right, right_idx)?)?
                     } else {
                         throw_ub_format!(
                             "`simd_shuffle_generic` index {src_index} is out-of-bounds for 2 vectors with length {dest_len}"
                         );
                     };
                     this.write_immediate(*val, &dest)?;
                 }
             }
             "shuffle" => {
                 let [left, right, index] = check_arg_count(args)?;
                 let (left, left_len) = this.operand_to_simd(left)?;
                 let (right, right_len) = this.operand_to_simd(right)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 // `index` is an array, not a SIMD type
                 let ty::Array(_, index_len) = index.layout.ty.kind() else {
                     span_bug!(
                         this.cur_span(),
                         "simd_shuffle index argument has non-array type {}",
                         index.layout.ty
                     )
                 };
                 let index_len = index_len.eval_target_usize(*this.tcx, this.param_env());

                 assert_eq!(left_len, right_len);
                 assert_eq!(index_len, dest_len);

                 for i in 0..dest_len {
                     let src_index: u64 = this
                         .read_immediate(&this.project_index(index, i)?)?
                         .to_scalar()
                         .to_u32()?
                         .into();
                     let dest = this.project_index(&dest, i)?;

                     let val = if src_index < left_len {
                         this.read_immediate(&this.project_index(&left, src_index)?)?
                     } else if src_index < left_len.checked_add(right_len).unwrap() {
                         let right_idx = src_index.checked_sub(left_len).unwrap();
                         this.read_immediate(&this.project_index(&right, right_idx)?)?
                     } else {
                         throw_ub_format!(
                             "`simd_shuffle` index {src_index} is out-of-bounds for 2 vectors with length {dest_len}"
                         );
                     };
                     this.write_immediate(*val, &dest)?;
                 }
             }
             "gather" => {
                 let [passthru, ptrs, mask] = check_arg_count(args)?;
                 let (passthru, passthru_len) = this.operand_to_simd(passthru)?;
                 let (ptrs, ptrs_len) = this.operand_to_simd(ptrs)?;
                 let (mask, mask_len) = this.operand_to_simd(mask)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 assert_eq!(dest_len, passthru_len);
                 assert_eq!(dest_len, ptrs_len);
                 assert_eq!(dest_len, mask_len);

                 for i in 0..dest_len {
                     let passthru = this.read_immediate(&this.project_index(&passthru, i)?)?;
                     let ptr = this.read_immediate(&this.project_index(&ptrs, i)?)?;
                     let mask = this.read_immediate(&this.project_index(&mask, i)?)?;
                     let dest = this.project_index(&dest, i)?;

                     let val = if simd_element_to_bool(mask)? {
                         let place = this.deref_pointer(&ptr)?;
                         this.read_immediate(&place)?
                     } else {
                         passthru
                     };
                     this.write_immediate(*val, &dest)?;
                 }
             }
             "scatter" => {
                 let [value, ptrs, mask] = check_arg_count(args)?;
                 let (value, value_len) = this.operand_to_simd(value)?;
                 let (ptrs, ptrs_len) = this.operand_to_simd(ptrs)?;
                 let (mask, mask_len) = this.operand_to_simd(mask)?;

                 assert_eq!(ptrs_len, value_len);
                 assert_eq!(ptrs_len, mask_len);

                 for i in 0..ptrs_len {
                     let value = this.read_immediate(&this.project_index(&value, i)?)?;
                     let ptr = this.read_immediate(&this.project_index(&ptrs, i)?)?;
                     let mask = this.read_immediate(&this.project_index(&mask, i)?)?;

                     if simd_element_to_bool(mask)? {
                         let place = this.deref_pointer(&ptr)?;
                         this.write_immediate(*value, &place)?;
                     }
                 }
             }
             "masked_load" => {
                 let [mask, ptr, default] = check_arg_count(args)?;
                 let (mask, mask_len) = this.operand_to_simd(mask)?;
                 let ptr = this.read_pointer(ptr)?;
                 let (default, default_len) = this.operand_to_simd(default)?;
                 let (dest, dest_len) = this.mplace_to_simd(dest)?;

                 assert_eq!(dest_len, mask_len);
                 assert_eq!(dest_len, default_len);

                 for i in 0..dest_len {
                     let mask = this.read_immediate(&this.project_index(&mask, i)?)?;
                     let default = this.read_immediate(&this.project_index(&default, i)?)?;
                     let dest = this.project_index(&dest, i)?;

                     let val = if simd_element_to_bool(mask)? {
                         // Size * u64 is implemented as always checked
                         #[allow(clippy::arithmetic_side_effects)]
                         let ptr = ptr.wrapping_offset(dest.layout.size * i, this);
                         let place = this.ptr_to_mplace(ptr, dest.layout);
                         this.read_immediate(&place)?
                     } else {
                         default
                     };
                     this.write_immediate(*val, &dest)?;
                 }
             }
             "masked_store" => {
                 let [mask, ptr, vals] = check_arg_count(args)?;
                 let (mask, mask_len) = this.operand_to_simd(mask)?;
                 let ptr = this.read_pointer(ptr)?;
                 let (vals, vals_len) = this.operand_to_simd(vals)?;

                 assert_eq!(mask_len, vals_len);

                 for i in 0..vals_len {
                     let mask = this.read_immediate(&this.project_index(&mask, i)?)?;
                     let val = this.read_immediate(&this.project_index(&vals, i)?)?;

                     if simd_element_to_bool(mask)? {
                         // Size * u64 is implemented as always checked
                         #[allow(clippy::arithmetic_side_effects)]
                         let ptr = ptr.wrapping_offset(val.layout.size * i, this);
                         let place = this.ptr_to_mplace(ptr, val.layout);
                         this.write_immediate(*val, &place)?
                     };
                 }
             }

             _ => return Ok(EmulateItemResult::NotSupported),
         }
         Ok(EmulateItemResult::NeedsJumping)
     }

     fn fminmax_op(
         &self,
         op: MinMax,
         left: &ImmTy<'tcx, Provenance>,
         right: &ImmTy<'tcx, Provenance>,
     ) -> InterpResult<'tcx, Scalar<Provenance>> {
         let this = self.eval_context_ref();
         assert_eq!(left.layout.ty, right.layout.ty);
         let ty::Float(float_ty) = left.layout.ty.kind() else {
             bug!("fmax operand is not a float")
         };
         let left = left.to_scalar();
         let right = right.to_scalar();
         Ok(match float_ty {
             FloatTy::F16 => unimplemented!("f16_f128"),
             FloatTy::F32 => {
                 let left = left.to_f32()?;
                 let right = right.to_f32()?;
                 let res = match op {
                     MinMax::Min => left.min(right),
                     MinMax::Max => left.max(right),
                 };
                 let res = this.adjust_nan(res, &[left, right]);
                 Scalar::from_f32(res)
             }
             FloatTy::F64 => {
                 let left = left.to_f64()?;
                 let right = right.to_f64()?;
                 let res = match op {
                     MinMax::Min => left.min(right),
                     MinMax::Max => left.max(right),
                 };
                 let res = this.adjust_nan(res, &[left, right]);
                 Scalar::from_f64(res)
             }
             FloatTy::F128 => unimplemented!("f16_f128"),
         })
     }
 }

 fn simd_bitmask_index(idx: u32, vec_len: u32, endianness: Endian) -> u32 {
     assert!(idx < vec_len);
     match endianness {
         Endian::Little => idx,
         #[allow(clippy::arithmetic_side_effects)] // idx < vec_len
         Endian::Big => vec_len - 1 - idx, // reverse order of bits
     }
 }