diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll | 582 |
1 files changed, 226 insertions, 356 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index c1d682689903..3b078c41f4a8 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -1989,256 +1989,138 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 { ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 ; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x7 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:1024 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:512 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:256 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x790 -; GFX11-NEXT: s_add_i32 s2, s0, 0x780 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x770 -; GFX11-NEXT: s_add_i32 s2, s0, 0x760 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x750 -; GFX11-NEXT: s_add_i32 s2, s0, 0x740 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x730 -; GFX11-NEXT: s_add_i32 s2, s0, 0x720 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x710 -; GFX11-NEXT: s_add_i32 s2, s0, 0x700 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x690 -; GFX11-NEXT: s_add_i32 s2, s0, 0x680 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x670 -; GFX11-NEXT: s_add_i32 s2, s0, 0x660 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x650 -; GFX11-NEXT: s_add_i32 s2, s0, 0x640 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x630 -; GFX11-NEXT: s_add_i32 s2, s0, 0x620 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x610 -; GFX11-NEXT: s_add_i32 s2, s0, 0x600 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x590 -; GFX11-NEXT: s_add_i32 s2, s0, 0x580 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x570 -; GFX11-NEXT: s_add_i32 s2, s0, 0x560 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x550 -; GFX11-NEXT: s_add_i32 s2, s0, 0x540 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x530 -; GFX11-NEXT: s_add_i32 s2, s0, 0x520 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x510 -; GFX11-NEXT: s_add_i32 s2, s0, 0x500 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x490 -; GFX11-NEXT: s_add_i32 s2, s0, 0x480 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x470 -; GFX11-NEXT: s_add_i32 s2, s0, 0x460 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x450 -; GFX11-NEXT: s_add_i32 s2, s0, 0x440 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x430 -; GFX11-NEXT: s_add_i32 s2, s0, 0x420 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x410 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x390 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x380 -; GFX11-NEXT: s_add_i32 s2, s0, 0x370 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x360 -; GFX11-NEXT: s_add_i32 s2, s0, 0x350 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x340 -; GFX11-NEXT: s_add_i32 s2, s0, 0x330 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x320 -; GFX11-NEXT: s_add_i32 s2, s0, 0x310 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x300 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x290 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x280 -; GFX11-NEXT: s_add_i32 s2, s0, 0x270 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x260 -; GFX11-NEXT: s_add_i32 s2, s0, 0x250 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x240 -; GFX11-NEXT: s_add_i32 s2, s0, 0x230 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x220 -; GFX11-NEXT: s_add_i32 s2, s0, 0x210 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x190 -; GFX11-NEXT: s_add_i32 s2, s0, 0x180 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x170 -; GFX11-NEXT: s_add_i32 s2, s0, 0x160 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x150 -; GFX11-NEXT: s_add_i32 s2, s0, 0x140 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x130 -; GFX11-NEXT: s_add_i32 s2, s0, 0x120 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: s_add_i32 s2, s0, 0xf0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xd0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xb0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x90 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2032 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2016 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2000 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1984 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1968 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1952 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1936 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1920 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1904 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1888 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1872 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1856 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1840 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1824 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1808 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1792 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1776 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1760 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1744 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1728 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1712 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1696 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1680 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1664 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1648 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1632 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1616 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1600 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1584 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1568 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1552 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1536 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1520 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1504 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1488 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1472 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1456 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1440 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1424 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1408 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1392 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1376 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1360 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1344 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1328 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1312 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1296 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1280 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1264 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1248 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1232 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1216 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1200 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1184 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1168 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1152 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1136 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1120 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1104 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1088 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1072 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1056 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1040 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1024 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1008 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:992 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:976 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:960 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:944 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:928 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:912 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:896 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:880 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:864 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:848 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:832 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:816 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:800 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:784 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:768 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:752 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:736 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:720 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:704 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:688 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:672 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:656 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:640 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:624 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:608 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:592 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:576 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:560 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:544 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:528 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:512 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:496 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:480 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:464 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:448 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:432 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:416 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:400 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:384 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:368 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:352 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:336 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:320 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:304 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:288 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:272 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:256 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: ret <512 x i32> zeroinitializer @@ -2636,7 +2518,6 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-LABEL: return_72xi32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212 ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208 @@ -2651,93 +2532,82 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172 ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168 ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164 -; GFX11-NEXT: s_clause 0x14 -; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:124 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:120 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 +; GFX11-NEXT: s_clause 0x11 +; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16 +; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:12 +; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:8 +; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:32 +; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:24 +; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:48 +; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:44 +; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:40 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:64 +; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:60 +; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:56 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:80 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:76 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:72 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:96 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:92 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:88 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_load_b32 v23, off, s32 offset:112 +; GFX11-NEXT: scratch_load_b32 v22, off, s32 offset:108 +; GFX11-NEXT: scratch_load_b32 v21, off, s32 offset:104 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:136 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 +; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:128 +; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:124 +; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:120 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v10, off, s32 offset:152 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 +; GFX11-NEXT: scratch_load_b32 v15, off, s32 offset:144 +; GFX11-NEXT: scratch_load_b32 v14, off, s32 offset:140 +; GFX11-NEXT: scratch_load_b32 v13, off, s32 offset:136 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: s_clause 0xd -; GFX11-NEXT: scratch_load_b32 v8, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v7, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v6, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v5, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v9, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:160 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:156 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:152 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:148 +; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:132 +; GFX11-NEXT: scratch_load_b32 v16, off, s32 offset:116 +; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:100 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:84 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 +; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:36 +; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x100 -; GFX11-NEXT: s_add_i32 s3, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s34, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s35, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s36, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s37, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s38, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s39, s0, 0x90 -; GFX11-NEXT: s_add_i32 s40, s0, 0x70 -; GFX11-NEXT: s_add_i32 s41, s0, 0x60 -; GFX11-NEXT: s_add_i32 s42, s0, 0x50 -; GFX11-NEXT: s_add_i32 s43, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(10) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[60:63], off offset:272 ; GFX11-NEXT: s_waitcnt vmcnt(9) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[12:15], off offset:256 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[16:19], off offset:240 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[60:63], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[20:23], off offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[56:59], s34 +; GFX11-NEXT: scratch_store_b128 v0, v[56:59], off offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[41:44], s35 +; GFX11-NEXT: scratch_store_b128 v0, v[41:44], off offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[37:40], s36 +; GFX11-NEXT: scratch_store_b128 v0, v[37:40], off offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s37 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s38 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s39 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s40 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s41 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s42 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s43 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:164 ; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:168 @@ -3306,7 +3176,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-LABEL: call_72xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s46, s33 +; GFX11-NEXT: s_mov_b32 s34, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00 @@ -3353,11 +3223,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 +; GFX11-NEXT: s_add_i32 s2, s33, 0x200 +; GFX11-NEXT: v_writelane_b32 v60, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 -; GFX11-NEXT: s_add_i32 s0, s33, 0x200 -; GFX11-NEXT: v_writelane_b32 v60, s30, 0 -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, 0 +; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0 @@ -3373,14 +3243,14 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0 ; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0 ; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0 -; GFX11-NEXT: s_mov_b32 s45, return_72xi32@abs32@hi -; GFX11-NEXT: s_mov_b32 s44, return_72xi32@abs32@lo +; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v60, s31, 1 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624 ; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:640 -; GFX11-NEXT: s_add_i32 s0, s32, 0xa0 +; GFX11-NEXT: s_add_i32 s2, s32, 0xa0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_mov_b32_e32 v32, v48 ; GFX11-NEXT: s_clause 0x9 @@ -3431,38 +3301,38 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6 ; GFX11-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9 ; GFX11-NEXT: v_mov_b32_e32 v9, v20 -; GFX11-NEXT: scratch_store_b32 off, v11, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x90 +; GFX11-NEXT: scratch_store_b32 off, v11, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x90 ; GFX11-NEXT: v_mov_b32_e32 v11, v22 -; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x80 +; GFX11-NEXT: scratch_store_b128 off, v[4:7], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x80 ; GFX11-NEXT: v_mov_b32_e32 v5, v16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-NEXT: v_mov_b32_e32 v0, 24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x70 +; GFX11-NEXT: s_add_i32 s2, s32, 0x70 ; GFX11-NEXT: v_mov_b32_e32 v6, v17 -; GFX11-NEXT: scratch_store_b128 off, v[12:15], s0 +; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2 ; GFX11-NEXT: v_mov_b32_e32 v13, v24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x6c +; GFX11-NEXT: s_add_i32 s2, s32, 0x6c ; GFX11-NEXT: v_mov_b32_e32 v7, v18 -; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x60 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x60 ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26 -; GFX11-NEXT: scratch_store_b96 off, v[56:58], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x50 +; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x50 ; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45 -; GFX11-NEXT: scratch_store_b128 off, v[40:43], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 64 ; GFX11-NEXT: v_mov_b32_e32 v14, v25 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 48 +; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 48 ; GFX11-NEXT: v_mov_b32_e32 v16, v27 -; GFX11-NEXT: scratch_store_b128 off, v[36:39], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 32 +; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 32 ; GFX11-NEXT: v_mov_b32_e32 v30, v46 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 16 -; GFX11-NEXT: scratch_store_b128 off, v[32:35], s0 +; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 16 +; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2 ; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, 42 @@ -3470,10 +3340,10 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572 ; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556 ; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540 -; GFX11-NEXT: s_add_i32 s0, s33, 0x400 +; GFX11-NEXT: s_add_i32 s2, s33, 0x400 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v0, s0 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_load_b32 v59, off, s33 ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4 @@ -3493,7 +3363,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0xf600 -; GFX11-NEXT: s_mov_b32 s33, s46 +; GFX11-NEXT: s_mov_b32 s33, s34 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: |