summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/bf16.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/bf16.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/bf16.ll73
1 files changed, 28 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 98658834e897..bf4302c156d8 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -5678,22 +5678,18 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) {
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_clause 0x4
-; GFX11-NEXT: scratch_store_b128 off, v[18:21], s0 offset:64
-; GFX11-NEXT: scratch_store_b128 off, v[10:13], s0 offset:32
-; GFX11-NEXT: scratch_store_b128 off, v[6:9], s0 offset:16
-; GFX11-NEXT: scratch_store_b128 off, v[2:5], s0
-; GFX11-NEXT: scratch_store_b16 off, v1, s0 offset:128
-; GFX11-NEXT: s_add_i32 s1, s0, 0x70
-; GFX11-NEXT: s_add_i32 s2, s0, 0x60
-; GFX11-NEXT: s_add_i32 s3, s0, 0x50
-; GFX11-NEXT: s_add_i32 s0, s0, 48
+; GFX11-NEXT: s_clause 0x5
+; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
+; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
+; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[30:33], s1
-; GFX11-NEXT: scratch_store_b128 off, v[26:29], s2
-; GFX11-NEXT: scratch_store_b128 off, v[22:25], s3
-; GFX11-NEXT: scratch_store_b128 off, v[14:17], s0
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
+; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
%ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
@@ -8827,19 +8823,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX11-NEXT: global_load_u16 v32, v[1:2], off offset:54
; GFX11-NEXT: global_load_u16 v33, v[1:2], off offset:58
; GFX11-NEXT: global_load_u16 v1, v[1:2], off offset:62
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_add_i32 s1, s0, 0xf0
-; GFX11-NEXT: s_add_i32 s2, s0, 0xe0
-; GFX11-NEXT: s_add_i32 s3, s0, 0xd0
-; GFX11-NEXT: s_add_i32 s4, s0, 0xc0
-; GFX11-NEXT: s_add_i32 s5, s0, 0xb0
-; GFX11-NEXT: s_add_i32 s6, s0, 0xa0
-; GFX11-NEXT: s_add_i32 s7, s0, 0x90
-; GFX11-NEXT: s_add_i32 s8, s0, 0x70
-; GFX11-NEXT: s_add_i32 s9, s0, 0x60
-; GFX11-NEXT: s_add_i32 s10, s0, 0x50
-; GFX11-NEXT: s_add_i32 s11, s0, 48
; GFX11-NEXT: s_waitcnt vmcnt(31)
; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v3
; GFX11-NEXT: s_waitcnt vmcnt(30)
@@ -8936,23 +8919,23 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX11-NEXT: v_cvt_f64_f32_e32 v[5:6], v5
; GFX11-NEXT: v_cvt_f64_f32_e32 v[3:4], v2
; GFX11-NEXT: v_cvt_f64_f32_e32 v[1:2], v37
-; GFX11-NEXT: scratch_store_b128 off, v[96:99], s1
-; GFX11-NEXT: scratch_store_b128 off, v[84:87], s2
-; GFX11-NEXT: scratch_store_b128 off, v[80:83], s3
-; GFX11-NEXT: scratch_store_b128 off, v[68:71], s4
-; GFX11-NEXT: scratch_store_b128 off, v[64:67], s5
-; GFX11-NEXT: scratch_store_b128 off, v[52:55], s6
-; GFX11-NEXT: scratch_store_b128 off, v[48:51], s7
-; GFX11-NEXT: scratch_store_b128 off, v[33:36], s0 offset:128
-; GFX11-NEXT: scratch_store_b128 off, v[29:32], s8
-; GFX11-NEXT: scratch_store_b128 off, v[25:28], s9
-; GFX11-NEXT: scratch_store_b128 off, v[21:24], s10
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64
-; GFX11-NEXT: scratch_store_b128 off, v[13:16], s11
-; GFX11-NEXT: s_clause 0x2
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
+; GFX11-NEXT: s_clause 0xf
+; GFX11-NEXT: scratch_store_b128 v0, v[96:99], off offset:240
+; GFX11-NEXT: scratch_store_b128 v0, v[84:87], off offset:224
+; GFX11-NEXT: scratch_store_b128 v0, v[80:83], off offset:208
+; GFX11-NEXT: scratch_store_b128 v0, v[68:71], off offset:192
+; GFX11-NEXT: scratch_store_b128 v0, v[64:67], off offset:176
+; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160
+; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144
+; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128
+; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96
+; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80
+; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48
+; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
%load = load <32 x bfloat>, ptr addrspace(1) %ptr
%fpext = fpext <32 x bfloat> %load to <32 x double>