blob: bc2cbd965200cf647d3a83e296a134c4347ce13b [file] [log] [blame]
-- Xe3 adds a new ARF to store scalar values. It supports only a limited
-- set of operations.
check_ver(30)
local r = execute {
src=[[
// The new scalar register s0 has 32 bytes.
//
// Only MOV can have scalar as destination. Offsets in scalar
// must be aligned to word boundary. This non-overlapping MOVs
// will fill the first 16 bytes.
mov(1) s0<1>UW 0x1111UW;
mov(1) r2<1>UW 0x2222UW;
mov(1) s0.1<1>UW r2<0,1,0>UW {I@1};
mov(1) s0.1<1>UD 0x33333333UD {I@1};
mov(4) r4<1>UW 0x4444UW;
mov(1) s0.1<1>UQ r4<0,1,0>UQ {I@1};
// Scalar content can be broadcasted back into GRFs.
mov(16) r20<1>UD s0.0<0,1,0>UD;
mov(16) r32<1>UD s0.1<0,1,0>UD;
mov(16) r96<1>UD s0.2<0,1,0>UD;
mov(16) r64<1>UD s0.3<0,1,0>UD;
// Scalar can store a list of register numbers to be used as source for
// SEND. Note the scalar will contain the hex value for r11 and r20.
// And the scalar register is indexed as bytes in r[...] syntax, unlike
// in the MOV.
mov(16) r11<1>UD 0x30000000UD;
mov(1) s0.4<1>UD 0x0000140bUD;
send(16) nullUD r[s0.16] 0x04000504 0x00000000 ugm MsgDesc: () { I@1 $1 };
// A larger SEND, note that registers for the payload don't need to be
// contiguous, the hardware will gather them together.
add(16) r11<1>UD r11<1,1,0>UD 0x4UD {A@1, $1.src};
mov(1) s0.4<1>UD 0x4060200bUD;
send(16) nullUD r[s0.16] 0x08002504 0x00000000 ugm MsgDesc: ( ) { I@1 $1 };
@eot
]]
}
expected = {[0] = 0x22221111, 0x33333333, 0x44444444, 0x44444444}
print("result")
dump(r, 4)
print("expected")
dump(expected, 4)
for i=0,3 do
if r[i] ~= expected[i] then
print("FAIL")
return
end
end
print("OK")