diff options
author | choikwa <5455710+choikwa@users.noreply.github.com> | 2024-04-16 06:04:37 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-16 06:04:37 -0400 |
commit | 422bf13f336923da89055f8e70e49e7e9ced2c70 (patch) | |
tree | 3939a6b5250a44534a01ebb05598ab04a9a8cffa | |
parent | ca4cf973279a3991248056a73bcb2bac8b37d035 (diff) |
[AMDGPU] In VectorLegalizer::Expand, if UnrollVectorOp returns Load, … (#88475)
…return only Load since other output is chain.
Added testcase that showed mismatched expected arity when Load and chain
were returned as separate items after
003b58f65bdd5d9c7d0c1b355566c9ef430c0e7d
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/build_vector.ll | 97 |
2 files changed, 105 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 57a3f6a65e00..7a9cfdf5c3fd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1159,8 +1159,14 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { } SDValue Unrolled = DAG.UnrollVectorOp(Node); - for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) - Results.push_back(Unrolled.getValue(I)); + if (Node->getNumValues() == 1) { + Results.push_back(Unrolled); + } else { + assert(Node->getNumValues() == Unrolled->getNumValues() && + "VectorLegalizer Expand returned wrong number of results!"); + for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) + Results.push_back(Unrolled.getValue(I)); + } } SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { diff --git a/llvm/test/CodeGen/AMDGPU/build_vector.ll b/llvm/test/CodeGen/AMDGPU/build_vector.ll index 37412ac3aa54..99755133f36d 100644 --- a/llvm/test/CodeGen/AMDGPU/build_vector.ll +++ b/llvm/test/CodeGen/AMDGPU/build_vector.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8,GFX678,ALL ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10,GFX1011,ALL ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX1011,ALL +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,ALL ; ALL-LABEL: {{^}}build_vector2: ; R600: MOV @@ -96,3 +97,99 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32 store <2 x i16> %ins.1, ptr addrspace(1) %out ret void } + +; R600-LABEL: build_v2i32_from_v4i16_shuffle: +; R600: ; %bb.0: ; %entry +; R600-NEXT: ALU 0, @10, KC0[], KC1[] +; R600-NEXT: TEX 1 @6 +; R600-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: Fetch clause starting at 6: +; R600-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3 +; R600-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 +; R600-NEXT: ALU clause starting at 10: +; R600-NEXT: MOV * T0.X, 0.0, +; R600-NEXT: ALU clause starting at 11: +; R600-NEXT: LSHL * T0.Y, T1.X, literal.x, +; R600-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; R600-NEXT: LSHL T0.X, T0.X, literal.x, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; R600-NEXT: 16(2.242078e-44), 2(2.802597e-45) +; +; GFX6-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s4, s0 +; GFX6-NEXT: s_mov_b32 s5, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GFX6-NEXT: s_endpgm +; +; GFX8-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s6, -1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, s0 +; GFX8-NEXT: s_mov_b32 s5, s1 +; GFX8-NEXT: s_lshl_b32 s0, s3, 16 +; GFX8-NEXT: s_lshl_b32 s1, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_mov_b32_e32 v1, s0 +; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GFX8-NEXT: s_endpgm +; +; GFX10-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX940-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX940-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NEXT: s_lshl_b32 s3, s3, 16 +; GFX940-NEXT: s_lshl_b32 s2, s2, 16 +; GFX940-NEXT: v_mov_b32_e32 v0, s2 +; GFX940-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 +; GFX940-NEXT: s_endpgm +define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) { +entry: + %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2> + %zextended = zext <2 x i16> %shuf to <2 x i32> + %shifted = shl <2 x i32> %zextended, <i32 16, i32 16> + store <2 x i32> %shifted, ptr addrspace(1) %out + ret void +} |