summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll582
1 files changed, 226 insertions, 356 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index c1d682689903..3b078c41f4a8 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -1989,256 +1989,138 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX11-NEXT: s_mov_b32 s2, s0
; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_clause 0x7
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:1024
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:512
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:256
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:128
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:64
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:32
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:16
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
-; GFX11-NEXT: s_add_i32 s1, s0, 0x7f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x7e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x7d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x7c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x7b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x7a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x790
-; GFX11-NEXT: s_add_i32 s2, s0, 0x780
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x770
-; GFX11-NEXT: s_add_i32 s2, s0, 0x760
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x750
-; GFX11-NEXT: s_add_i32 s2, s0, 0x740
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x730
-; GFX11-NEXT: s_add_i32 s2, s0, 0x720
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x710
-; GFX11-NEXT: s_add_i32 s2, s0, 0x700
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x6f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x6e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x6d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x6c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x6b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x6a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x690
-; GFX11-NEXT: s_add_i32 s2, s0, 0x680
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x670
-; GFX11-NEXT: s_add_i32 s2, s0, 0x660
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x650
-; GFX11-NEXT: s_add_i32 s2, s0, 0x640
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x630
-; GFX11-NEXT: s_add_i32 s2, s0, 0x620
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x610
-; GFX11-NEXT: s_add_i32 s2, s0, 0x600
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x5f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x5e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x5d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x5c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x5b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x5a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x590
-; GFX11-NEXT: s_add_i32 s2, s0, 0x580
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x570
-; GFX11-NEXT: s_add_i32 s2, s0, 0x560
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x550
-; GFX11-NEXT: s_add_i32 s2, s0, 0x540
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x530
-; GFX11-NEXT: s_add_i32 s2, s0, 0x520
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x510
-; GFX11-NEXT: s_add_i32 s2, s0, 0x500
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x4f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x4e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x4d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x4c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x4b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x4a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x490
-; GFX11-NEXT: s_add_i32 s2, s0, 0x480
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x470
-; GFX11-NEXT: s_add_i32 s2, s0, 0x460
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x450
-; GFX11-NEXT: s_add_i32 s2, s0, 0x440
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x430
-; GFX11-NEXT: s_add_i32 s2, s0, 0x420
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x410
-; GFX11-NEXT: s_add_i32 s2, s0, 0x3f0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x3e0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x3d0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x3c0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x3b0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x3a0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x390
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x380
-; GFX11-NEXT: s_add_i32 s2, s0, 0x370
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x360
-; GFX11-NEXT: s_add_i32 s2, s0, 0x350
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x340
-; GFX11-NEXT: s_add_i32 s2, s0, 0x330
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x320
-; GFX11-NEXT: s_add_i32 s2, s0, 0x310
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x300
-; GFX11-NEXT: s_add_i32 s2, s0, 0x2f0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x2e0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x2d0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x2c0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x2b0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x2a0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x290
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x280
-; GFX11-NEXT: s_add_i32 s2, s0, 0x270
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x260
-; GFX11-NEXT: s_add_i32 s2, s0, 0x250
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x240
-; GFX11-NEXT: s_add_i32 s2, s0, 0x230
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x220
-; GFX11-NEXT: s_add_i32 s2, s0, 0x210
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x1f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x1e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x1d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x1c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x1b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x1a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x190
-; GFX11-NEXT: s_add_i32 s2, s0, 0x180
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x170
-; GFX11-NEXT: s_add_i32 s2, s0, 0x160
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x150
-; GFX11-NEXT: s_add_i32 s2, s0, 0x140
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x130
-; GFX11-NEXT: s_add_i32 s2, s0, 0x120
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x110
-; GFX11-NEXT: s_add_i32 s2, s0, 0xf0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0xe0
-; GFX11-NEXT: s_add_i32 s2, s0, 0xd0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0xc0
-; GFX11-NEXT: s_add_i32 s2, s0, 0xb0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0xa0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x90
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x70
-; GFX11-NEXT: s_add_i32 s2, s0, 0x60
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x50
-; GFX11-NEXT: s_add_i32 s0, s0, 48
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2032
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2016
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2000
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1984
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1968
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1952
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1936
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1920
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1904
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1888
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1872
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1856
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1840
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1824
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1808
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1792
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1776
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1760
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1744
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1728
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1712
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1696
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1680
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1664
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1648
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1632
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1616
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1600
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1584
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1568
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1552
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1536
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1520
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1504
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1488
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1472
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1456
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1440
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1424
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1408
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1392
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1376
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1360
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1344
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1328
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1312
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1296
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1280
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1264
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1248
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1232
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1216
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1200
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1184
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1168
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1152
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1136
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1120
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1104
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1088
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1072
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1056
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1040
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1024
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1008
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:992
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:976
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:960
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:944
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:928
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:912
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:896
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:880
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:864
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:848
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:832
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:816
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:800
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:784
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:768
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:752
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:736
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:720
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:704
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:688
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:672
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:656
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:640
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:624
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:608
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:592
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:576
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:560
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:544
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:528
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:512
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:496
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:480
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:464
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:448
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:432
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:416
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:400
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:384
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:368
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:352
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:336
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:320
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:304
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:288
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:272
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:256
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:224
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:208
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:192
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:176
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:160
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:144
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:128
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:80
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
ret <512 x i32> zeroinitializer
@@ -2636,7 +2518,6 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
; GFX11-LABEL: return_72xi32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_clause 0xc
; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212
; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208
@@ -2651,93 +2532,82 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172
; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168
; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164
-; GFX11-NEXT: s_clause 0x14
-; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:32
-; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:28
-; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:24
-; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
-; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
-; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
-; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
-; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
-; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
-; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:80
-; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:76
-; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:72
-; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:96
-; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:92
-; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:88
-; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:112
-; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:108
-; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:104
-; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:128
-; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:124
-; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:120
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64
+; GFX11-NEXT: s_clause 0x11
+; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16
+; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:12
+; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:8
+; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:32
+; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:28
+; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:24
+; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:48
+; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:44
+; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:40
+; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:64
+; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:60
+; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:56
+; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:80
+; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:76
+; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:72
+; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:96
+; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:92
+; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:88
+; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: scratch_load_b32 v23, off, s32 offset:112
+; GFX11-NEXT: scratch_load_b32 v22, off, s32 offset:108
+; GFX11-NEXT: scratch_load_b32 v21, off, s32 offset:104
+; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64
; GFX11-NEXT: s_clause 0x2
-; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:144
-; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:140
-; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:136
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32
+; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:128
+; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:124
+; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:120
+; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48
; GFX11-NEXT: s_clause 0x2
-; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:160
-; GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:156
-; GFX11-NEXT: scratch_load_b32 v10, off, s32 offset:152
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16
+; GFX11-NEXT: scratch_load_b32 v15, off, s32 offset:144
+; GFX11-NEXT: scratch_load_b32 v14, off, s32 offset:140
+; GFX11-NEXT: scratch_load_b32 v13, off, s32 offset:136
+; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32
; GFX11-NEXT: s_clause 0xd
-; GFX11-NEXT: scratch_load_b32 v8, off, s32 offset:16
-; GFX11-NEXT: scratch_load_b32 v7, off, s32 offset:12
-; GFX11-NEXT: scratch_load_b32 v6, off, s32 offset:8
-; GFX11-NEXT: scratch_load_b32 v5, off, s32 offset:4
-; GFX11-NEXT: scratch_load_b32 v9, off, s32 offset:148
-; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:132
-; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:116
-; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:100
-; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:84
-; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:68
-; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
-; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
-; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:160
+; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:156
+; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:152
+; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:148
+; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:132
+; GFX11-NEXT: scratch_load_b32 v16, off, s32 offset:116
+; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:100
+; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:84
+; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68
+; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:52
+; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:36
+; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v32, off, s32
-; GFX11-NEXT: s_add_i32 s1, s0, 0x110
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x100
-; GFX11-NEXT: s_add_i32 s3, s0, 0xf0
-; GFX11-NEXT: s_add_i32 s34, s0, 0xe0
-; GFX11-NEXT: s_add_i32 s35, s0, 0xd0
-; GFX11-NEXT: s_add_i32 s36, s0, 0xc0
-; GFX11-NEXT: s_add_i32 s37, s0, 0xb0
-; GFX11-NEXT: s_add_i32 s38, s0, 0xa0
-; GFX11-NEXT: s_add_i32 s39, s0, 0x90
-; GFX11-NEXT: s_add_i32 s40, s0, 0x70
-; GFX11-NEXT: s_add_i32 s41, s0, 0x60
-; GFX11-NEXT: s_add_i32 s42, s0, 0x50
-; GFX11-NEXT: s_add_i32 s43, s0, 48
; GFX11-NEXT: s_waitcnt vmcnt(10)
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:128
+; GFX11-NEXT: scratch_store_b128 v0, v[60:63], off offset:272
; GFX11-NEXT: s_waitcnt vmcnt(9)
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s1
+; GFX11-NEXT: scratch_store_b128 v0, v[12:15], off offset:256
; GFX11-NEXT: s_waitcnt vmcnt(8)
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s2
+; GFX11-NEXT: scratch_store_b128 v0, v[16:19], off offset:240
; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: scratch_store_b128 off, v[60:63], s3
+; GFX11-NEXT: scratch_store_b128 v0, v[20:23], off offset:224
; GFX11-NEXT: s_waitcnt vmcnt(6)
-; GFX11-NEXT: scratch_store_b128 off, v[56:59], s34
+; GFX11-NEXT: scratch_store_b128 v0, v[56:59], off offset:208
; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: scratch_store_b128 off, v[41:44], s35
+; GFX11-NEXT: scratch_store_b128 v0, v[41:44], off offset:192
; GFX11-NEXT: s_waitcnt vmcnt(4)
-; GFX11-NEXT: scratch_store_b128 off, v[37:40], s36
+; GFX11-NEXT: scratch_store_b128 v0, v[37:40], off offset:176
; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: scratch_store_b128 off, v[52:55], s37
+; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160
; GFX11-NEXT: s_waitcnt vmcnt(2)
-; GFX11-NEXT: scratch_store_b128 off, v[48:51], s38
+; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144
; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: scratch_store_b128 off, v[33:36], s39
+; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[29:32], s40
-; GFX11-NEXT: scratch_store_b128 off, v[25:28], s41
-; GFX11-NEXT: scratch_store_b128 off, v[21:24], s42
-; GFX11-NEXT: scratch_store_b128 off, v[13:16], s43
+; GFX11-NEXT: s_clause 0x3
+; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96
+; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT: s_clause 0xc
; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:164
; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:168
@@ -3306,7 +3176,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-LABEL: call_72xi32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s46, s33
+; GFX11-NEXT: s_mov_b32 s34, s33
; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00
@@ -3353,11 +3223,11 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: s_add_i32 s0, s32, 32
; GFX11-NEXT: s_add_i32 s1, s32, 16
+; GFX11-NEXT: s_add_i32 s2, s33, 0x200
+; GFX11-NEXT: v_writelane_b32 v60, s30, 0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
-; GFX11-NEXT: s_add_i32 s0, s33, 0x200
-; GFX11-NEXT: v_writelane_b32 v60, s30, 0
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, 0
+; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
@@ -3373,14 +3243,14 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
-; GFX11-NEXT: s_mov_b32 s45, return_72xi32@abs32@hi
-; GFX11-NEXT: s_mov_b32 s44, return_72xi32@abs32@lo
+; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi
+; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo
; GFX11-NEXT: v_writelane_b32 v60, s31, 1
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45]
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624
; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:640
-; GFX11-NEXT: s_add_i32 s0, s32, 0xa0
+; GFX11-NEXT: s_add_i32 s2, s32, 0xa0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: v_mov_b32_e32 v32, v48
; GFX11-NEXT: s_clause 0x9
@@ -3431,38 +3301,38 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6
; GFX11-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9
; GFX11-NEXT: v_mov_b32_e32 v9, v20
-; GFX11-NEXT: scratch_store_b32 off, v11, s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x90
+; GFX11-NEXT: scratch_store_b32 off, v11, s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x90
; GFX11-NEXT: v_mov_b32_e32 v11, v22
-; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x80
+; GFX11-NEXT: scratch_store_b128 off, v[4:7], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x80
; GFX11-NEXT: v_mov_b32_e32 v5, v16
-; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0
+; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2
; GFX11-NEXT: v_mov_b32_e32 v0, 24
-; GFX11-NEXT: s_add_i32 s0, s32, 0x70
+; GFX11-NEXT: s_add_i32 s2, s32, 0x70
; GFX11-NEXT: v_mov_b32_e32 v6, v17
-; GFX11-NEXT: scratch_store_b128 off, v[12:15], s0
+; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2
; GFX11-NEXT: v_mov_b32_e32 v13, v24
-; GFX11-NEXT: s_add_i32 s0, s32, 0x6c
+; GFX11-NEXT: s_add_i32 s2, s32, 0x6c
; GFX11-NEXT: v_mov_b32_e32 v7, v18
-; GFX11-NEXT: scratch_store_b32 off, v0, s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x60
+; GFX11-NEXT: scratch_store_b32 off, v0, s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x60
; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26
-; GFX11-NEXT: scratch_store_b96 off, v[56:58], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x50
+; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x50
; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45
-; GFX11-NEXT: scratch_store_b128 off, v[40:43], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 64
+; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 64
; GFX11-NEXT: v_mov_b32_e32 v14, v25
-; GFX11-NEXT: scratch_store_b128 off, v[52:55], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 48
+; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 48
; GFX11-NEXT: v_mov_b32_e32 v16, v27
-; GFX11-NEXT: scratch_store_b128 off, v[36:39], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 32
+; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 32
; GFX11-NEXT: v_mov_b32_e32 v30, v46
-; GFX11-NEXT: scratch_store_b128 off, v[48:51], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 16
-; GFX11-NEXT: scratch_store_b128 off, v[32:35], s0
+; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 16
+; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2
; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, 42
@@ -3470,10 +3340,10 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572
; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556
; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540
-; GFX11-NEXT: s_add_i32 s0, s33, 0x400
+; GFX11-NEXT: s_add_i32 s2, s33, 0x400
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_mov_b32_e32 v0, s0
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45]
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_clause 0xb
; GFX11-NEXT: scratch_load_b32 v59, off, s33
; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4
@@ -3493,7 +3363,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_addk_i32 s32, 0xf600
-; GFX11-NEXT: s_mov_b32 s33, s46
+; GFX11-NEXT: s_mov_b32 s33, s34
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry: