diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/bf16.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/bf16.ll | 73 |
1 files changed, 28 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 98658834e897..bf4302c156d8 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -5678,22 +5678,18 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) { ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x4 -; GFX11-NEXT: scratch_store_b128 off, v[18:21], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[10:13], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[6:9], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[2:5], s0 -; GFX11-NEXT: scratch_store_b16 off, v1, s0 offset:128 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 +; GFX11-NEXT: s_clause 0x5 +; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[30:33], s1 -; GFX11-NEXT: scratch_store_b128 off, v[26:29], s2 -; GFX11-NEXT: scratch_store_b128 off, v[22:25], s3 -; GFX11-NEXT: scratch_store_b128 off, v[14:17], s0 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96 +; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0 %ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1 @@ -8827,19 +8823,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: global_load_u16 v32, v[1:2], off offset:54 ; GFX11-NEXT: global_load_u16 v33, v[1:2], off offset:58 ; GFX11-NEXT: global_load_u16 v1, v[1:2], off offset:62 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s3, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s4, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s5, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s6, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s7, s0, 0x90 -; GFX11-NEXT: s_add_i32 s8, s0, 0x70 -; GFX11-NEXT: s_add_i32 s9, s0, 0x60 -; GFX11-NEXT: s_add_i32 s10, s0, 0x50 -; GFX11-NEXT: s_add_i32 s11, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(31) ; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v3 ; GFX11-NEXT: s_waitcnt vmcnt(30) @@ -8936,23 +8919,23 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: v_cvt_f64_f32_e32 v[5:6], v5 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[3:4], v2 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[1:2], v37 -; GFX11-NEXT: scratch_store_b128 off, v[96:99], s1 -; GFX11-NEXT: scratch_store_b128 off, v[84:87], s2 -; GFX11-NEXT: scratch_store_b128 off, v[80:83], s3 -; GFX11-NEXT: scratch_store_b128 off, v[68:71], s4 -; GFX11-NEXT: scratch_store_b128 off, v[64:67], s5 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s6 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s7 -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s8 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s9 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s10 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s11 -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_store_b128 v0, v[96:99], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[84:87], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[80:83], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[68:71], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[64:67], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] %load = load <32 x bfloat>, ptr addrspace(1) %ptr %fpext = fpext <32 x bfloat> %load to <32 x double> |