diff options
author | Wang Pengcheng <wangpengcheng.pp@bytedance.com> | 2024-03-29 15:38:39 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-29 15:38:39 +0800 |
commit | 610b9e23c5a3040aacc6fe85de8694f80bf5bdf5 (patch) | |
tree | bf9689459ca1a6a311a21a4b4e718c975f022dd9 | |
parent | e1873d99729012191c72d532226771139b6e8519 (diff) |
[SDAG] Use shifts if ISD::MUL is illegal when lowering ISD::CTPOP (#86505)
We can avoid libcalls.
Fixes #86205
18 files changed, 839 insertions, 1120 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 4981d7b80b0b..c00fe1f8ff54 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6389,12 +6389,26 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks. auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01))); - auto ResTmp = B.buildMul(Ty, B8Count, MulMask); // Shift count result from 8 high bits to low bits. auto C_SizeM8 = B.buildConstant(Ty, Size - 8); - B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + auto IsMulSupported = [this](const LLT Ty) { + auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action; + return Action == Legal || Action == WidenScalar || Action == Custom; + }; + if (IsMulSupported(Ty)) { + auto ResTmp = B.buildMul(Ty, B8Count, MulMask); + B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + } else { + auto ResTmp = B8Count; + for (unsigned Shift = 8; Shift < Size; Shift *= 2) { + auto ShiftC = B.buildConstant(Ty, Shift); + auto Shl = B.buildShl(Ty, ResTmp, ShiftC); + ResTmp = B.buildAdd(Ty, ResTmp, Shl); + } + B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8); + } MI.eraseFromParent(); return Legalized; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8be03b66e155..962f0d98e3be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8710,11 +8710,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { } // v = (v * 0x01010101...) >> (Len - 8) - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); - return DAG.getNode(ISD::SRL, dl, VT, - DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, dl, ShVT)); + SDValue V; + if (isOperationLegalOrCustomOrPromote( + ISD::MUL, getTypeToTransformTo(*DAG.getContext(), VT))) { + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01); + } else { + V = Op; + for (unsigned Shift = 8; Shift < Len; Shift *= 2) { + SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl); + V = DAG.getNode(ISD::ADD, dl, VT, V, + DAG.getNode(ISD::SHL, dl, VT, V, ShiftC)); + } + } + return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT)); } SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { @@ -8767,10 +8777,22 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { return Op; // v = (v * 0x01010101...) >> (Len - 8) - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); - return DAG.getNode(ISD::VP_LSHR, dl, VT, - DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL), + SDValue V; + if (isOperationLegalOrCustomOrPromote( + ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) { + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL); + } else { + V = Op; + for (unsigned Shift = 8; Shift < Len; Shift *= 2) { + SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl); + V = DAG.getNode(ISD::VP_ADD, dl, VT, V, + DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL), + Mask, VL); + } + } + return DAG.getNode(ISD::VP_LSHR, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT), Mask, VL); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir index d2352be81503..27f2f0bafa95 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctpop-no-implicit-float.mir @@ -37,6 +37,7 @@ body: | ; CHECK-NEXT: %ctpop:_(s32) = G_LSHR [[MUL]], [[C7]](s64) ; CHECK-NEXT: $w0 = COPY %ctpop(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; ; CHECK-CSSC-LABEL: name: s32 ; CHECK-CSSC: liveins: $w0 ; CHECK-CSSC-NEXT: {{ $}} @@ -77,11 +78,12 @@ body: | ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]] ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; CHECK-NEXT: %ctpop:_(s64) = G_LSHR [[MUL]], [[C7]](s64) ; CHECK-NEXT: $x0 = COPY %ctpop(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 + ; ; CHECK-CSSC-LABEL: name: s64 ; CHECK-CSSC: liveins: $x0 ; CHECK-CSSC-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir index 4c0b3c617721..f518e9ec9e58 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctpop.mir @@ -29,8 +29,8 @@ body: | ; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; MIPS32-NEXT: $v0 = COPY [[LSHR3]](s32) ; MIPS32-NEXT: RetRA implicit $v0 @@ -70,8 +70,8 @@ body: | ; MIPS32-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; MIPS32-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; MIPS32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; MIPS32-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; MIPS32-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; MIPS32-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; MIPS32-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir index a890a411544e..354fc109a463 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv32.mir @@ -50,8 +50,8 @@ body: | ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32) ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] @@ -129,8 +129,8 @@ body: | ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32) @@ -201,8 +201,8 @@ body: | ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C10]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C12]](s32) ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[LSHR8]] @@ -267,8 +267,8 @@ body: | ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C13]](s32) ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C14]], [[LSHR8]] @@ -306,8 +306,8 @@ body: | ; RV32I-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C26]] ; RV32I-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C28]](s32) ; RV32I-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C29]], [[LSHR17]] @@ -389,8 +389,8 @@ body: | ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C13]] ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND8]], [[C14]] ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C15]](s32) ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C16]], [[LSHR6]] @@ -468,8 +468,8 @@ body: | ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C15]] ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C16]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C18]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C17]](s32) @@ -540,8 +540,8 @@ body: | ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C10]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C11]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C12]](s32) ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C13]], [[LSHR8]] @@ -606,8 +606,8 @@ body: | ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C11]] ; RV32I-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C12]] ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C13]](s32) ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C14]], [[LSHR8]] @@ -645,8 +645,8 @@ body: | ; RV32I-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD4]], [[C26]] ; RV32I-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C27]] ; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C28]](s32) ; RV32I-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C29]], [[LSHR17]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir index add8a565202d..38a4b9c6dae3 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctlz-rv64.mir @@ -283,8 +283,8 @@ body: | ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C11]] ; RV64I-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C13]](s64) ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C14]], [[LSHR9]] @@ -583,8 +583,8 @@ body: | ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C11]] ; RV64I-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C12]] ; RV64I-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C13]](s64) ; RV64I-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C14]], [[LSHR9]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir index d4eb5ebc2e29..c64669cb7341 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv32.mir @@ -35,8 +35,8 @@ body: | ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -90,8 +90,8 @@ body: | ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND5]], [[C8]] ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C10]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C9]](s32) @@ -143,8 +143,8 @@ body: | ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -190,8 +190,8 @@ body: | ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C5]] ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND3]], [[C6]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C7]](s32) ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; RV32I-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C8]](s32) @@ -210,8 +210,8 @@ body: | ; RV32I-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C13]] ; RV32I-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C14]] ; RV32I-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND7]], [[C14]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C15]](s32) ; RV32I-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[LSHR3]] ; RV32I-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir index e2434ba9301c..196b367e5927 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctpop-rv64.mir @@ -205,8 +205,8 @@ body: | ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C5]] ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND3]], [[C6]] ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C7]](s64) ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir index 19555a702b73..372becaf08d9 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv32.mir @@ -39,8 +39,8 @@ body: | ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -98,8 +98,8 @@ body: | ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32) @@ -155,8 +155,8 @@ body: | ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C8]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -208,8 +208,8 @@ body: | ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32) ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[C10]] @@ -234,8 +234,8 @@ body: | ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C17]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C19]](s32) ; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD3]], [[LSHR7]] ; RV32I-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -304,8 +304,8 @@ body: | ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C10]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -363,8 +363,8 @@ body: | ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND6]], [[C9]] ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[MUL]], [[C11]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[C10]](s32) @@ -420,8 +420,8 @@ body: | ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C6]] ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C7]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C8]](s32) ; RV32I-NEXT: $x10 = COPY [[LSHR3]](s32) ; RV32I-NEXT: PseudoRET implicit $x10 @@ -473,8 +473,8 @@ body: | ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C7]] ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND4]], [[C8]] ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C9]](s32) ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[C10]] @@ -499,8 +499,8 @@ body: | ; RV32I-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C17]] ; RV32I-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 16843009 - ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND9]], [[C18]] ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C19]](s32) ; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD3]], [[LSHR7]] ; RV32I-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir index e030e3ce2a80..e51a2143efd0 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-cttz-rv64.mir @@ -221,8 +221,8 @@ body: | ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ADD2]], [[C6]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64) ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 @@ -457,8 +457,8 @@ body: | ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[ADD2]], [[C6]] ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 72340172838076673 - ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND4]], [[C7]] ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[MUL]], [[C8]](s64) ; RV64I-NEXT: $x10 = COPY [[LSHR3]](s64) ; RV64I-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index 455e6e54c9b3..549d531e829e 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -1160,8 +1160,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: beqz a0, .LBB10_2 ; RV32I-NEXT: # %bb.1: # %cond.false -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -1189,12 +1187,11 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB10_2: ; RV32I-NEXT: li a0, 32 @@ -1205,8 +1202,6 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV64I-NEXT: sext.w a1, a0 ; RV64I-NEXT: beqz a1, .LBB10_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -1232,14 +1227,13 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB10_2: ; RV64I-NEXT: li a0, 32 @@ -1354,19 +1348,16 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: test_ctlz_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -1377,28 +1368,26 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -1409,43 +1398,27 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB11_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB11_3 -; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB11_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctlz_i64: ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB11_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -1481,14 +1454,13 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB11_2: ; RV64I-NEXT: li a0, 64 @@ -1831,8 +1803,6 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV32I-LABEL: test_ctlz_i32_zero_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -1860,18 +1830,15 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctlz_i32_zero_undef: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -1897,14 +1864,13 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctlz_i32_zero_undef: @@ -2005,19 +1971,16 @@ define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind { define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV32I-LABEL: test_ctlz_i64_zero_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB15_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -2028,28 +1991,26 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB15_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -2060,41 +2021,25 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB15_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB15_3 -; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB15_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctlz_i64_zero_undef: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -2130,14 +2075,13 @@ define i64 @test_ctlz_i64_zero_undef(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctlz_i64_zero_undef: @@ -2464,8 +2408,6 @@ define i16 @test_ctpop_i16(i16 %a) nounwind { define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32I-LABEL: test_ctpop_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: addi a2, a2, 1365 @@ -2482,18 +2424,15 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctpop_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -2508,14 +2447,13 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctpop_i32: @@ -2578,8 +2516,6 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; ; RV32XTHEADBB-LABEL: test_ctpop_i32: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi sp, sp, -16 -; RV32XTHEADBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32XTHEADBB-NEXT: srli a1, a0, 1 ; RV32XTHEADBB-NEXT: lui a2, 349525 ; RV32XTHEADBB-NEXT: addi a2, a2, 1365 @@ -2596,18 +2532,15 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV32XTHEADBB-NEXT: lui a1, 61681 ; RV32XTHEADBB-NEXT: addi a1, a1, -241 ; RV32XTHEADBB-NEXT: and a0, a0, a1 -; RV32XTHEADBB-NEXT: lui a1, 4112 -; RV32XTHEADBB-NEXT: addi a1, a1, 257 -; RV32XTHEADBB-NEXT: call __mulsi3 +; RV32XTHEADBB-NEXT: slli a1, a0, 8 +; RV32XTHEADBB-NEXT: add a0, a0, a1 +; RV32XTHEADBB-NEXT: slli a1, a0, 16 +; RV32XTHEADBB-NEXT: add a0, a0, a1 ; RV32XTHEADBB-NEXT: srli a0, a0, 24 -; RV32XTHEADBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: addi sp, sp, 16 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctpop_i32: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: addi sp, sp, -16 -; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64XTHEADBB-NEXT: srli a1, a0, 1 ; RV64XTHEADBB-NEXT: lui a2, 349525 ; RV64XTHEADBB-NEXT: addiw a2, a2, 1365 @@ -2622,14 +2555,13 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { ; RV64XTHEADBB-NEXT: srli a1, a0, 4 ; RV64XTHEADBB-NEXT: add a0, a0, a1 ; RV64XTHEADBB-NEXT: lui a1, 61681 -; RV64XTHEADBB-NEXT: addiw a1, a1, -241 +; RV64XTHEADBB-NEXT: addi a1, a1, -241 ; RV64XTHEADBB-NEXT: and a0, a0, a1 -; RV64XTHEADBB-NEXT: lui a1, 4112 -; RV64XTHEADBB-NEXT: addiw a1, a1, 257 -; RV64XTHEADBB-NEXT: call __muldi3 +; RV64XTHEADBB-NEXT: slli a1, a0, 8 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: slli a1, a0, 16 +; RV64XTHEADBB-NEXT: add a0, a0, a1 ; RV64XTHEADBB-NEXT: srliw a0, a0, 24 -; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64XTHEADBB-NEXT: addi sp, sp, 16 ; RV64XTHEADBB-NEXT: ret %1 = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %1 @@ -2638,65 +2570,48 @@ define i32 @test_ctpop_i32(i32 %a) nounwind { define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV32I-LABEL: test_ctpop_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s2, a2, 1365 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub a1, a1, a0 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi s3, a0, 819 -; RV32I-NEXT: and a0, a1, s3 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a4, a1, a2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s4, a1, -241 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s0, 1 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub s0, s0, a0 -; RV32I-NEXT: and a0, s0, s3 -; RV32I-NEXT: srli s0, s0, 2 -; RV32I-NEXT: and a1, s0, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a5, a1, 8 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: slli a5, a1, 16 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: and a3, a5, a3 +; RV32I-NEXT: sub a0, a0, a3 +; RV32I-NEXT: and a3, a0, a2 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 16 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add a0, a0, s5 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_ctpop_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -2719,14 +2634,13 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32M-LABEL: test_ctpop_i64: @@ -2814,65 +2728,48 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; ; RV32XTHEADBB-LABEL: test_ctpop_i64: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi sp, sp, -32 -; RV32XTHEADBB-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32XTHEADBB-NEXT: mv s0, a0 -; RV32XTHEADBB-NEXT: srli a0, a1, 1 -; RV32XTHEADBB-NEXT: lui a2, 349525 -; RV32XTHEADBB-NEXT: addi s2, a2, 1365 -; RV32XTHEADBB-NEXT: and a0, a0, s2 -; RV32XTHEADBB-NEXT: sub a1, a1, a0 -; RV32XTHEADBB-NEXT: lui a0, 209715 -; RV32XTHEADBB-NEXT: addi s3, a0, 819 -; RV32XTHEADBB-NEXT: and a0, a1, s3 +; RV32XTHEADBB-NEXT: srli a2, a1, 1 +; RV32XTHEADBB-NEXT: lui a3, 349525 +; RV32XTHEADBB-NEXT: addi a3, a3, 1365 +; RV32XTHEADBB-NEXT: and a2, a2, a3 +; RV32XTHEADBB-NEXT: sub a1, a1, a2 +; RV32XTHEADBB-NEXT: lui a2, 209715 +; RV32XTHEADBB-NEXT: addi a2, a2, 819 +; RV32XTHEADBB-NEXT: and a4, a1, a2 ; RV32XTHEADBB-NEXT: srli a1, a1, 2 -; RV32XTHEADBB-NEXT: and a1, a1, s3 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: srli a1, a0, 4 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: lui a1, 61681 -; RV32XTHEADBB-NEXT: addi s4, a1, -241 -; RV32XTHEADBB-NEXT: and a0, a0, s4 -; RV32XTHEADBB-NEXT: lui a1, 4112 -; RV32XTHEADBB-NEXT: addi s1, a1, 257 -; RV32XTHEADBB-NEXT: mv a1, s1 -; RV32XTHEADBB-NEXT: call __mulsi3 -; RV32XTHEADBB-NEXT: srli s5, a0, 24 -; RV32XTHEADBB-NEXT: srli a0, s0, 1 -; RV32XTHEADBB-NEXT: and a0, a0, s2 -; RV32XTHEADBB-NEXT: sub s0, s0, a0 -; RV32XTHEADBB-NEXT: and a0, s0, s3 -; RV32XTHEADBB-NEXT: srli s0, s0, 2 -; RV32XTHEADBB-NEXT: and a1, s0, s3 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: srli a1, a0, 4 -; RV32XTHEADBB-NEXT: add a0, a0, a1 -; RV32XTHEADBB-NEXT: and a0, a0, s4 -; RV32XTHEADBB-NEXT: mv a1, s1 -; RV32XTHEADBB-NEXT: call __mulsi3 +; RV32XTHEADBB-NEXT: and a1, a1, a2 +; RV32XTHEADBB-NEXT: add a1, a4, a1 +; RV32XTHEADBB-NEXT: srli a4, a1, 4 +; RV32XTHEADBB-NEXT: add a1, a1, a4 +; RV32XTHEADBB-NEXT: lui a4, 61681 +; RV32XTHEADBB-NEXT: addi a4, a4, -241 +; RV32XTHEADBB-NEXT: and a1, a1, a4 +; RV32XTHEADBB-NEXT: slli a5, a1, 8 +; RV32XTHEADBB-NEXT: add a1, a1, a5 +; RV32XTHEADBB-NEXT: slli a5, a1, 16 +; RV32XTHEADBB-NEXT: add a1, a1, a5 +; RV32XTHEADBB-NEXT: srli a1, a1, 24 +; RV32XTHEADBB-NEXT: srli a5, a0, 1 +; RV32XTHEADBB-NEXT: and a3, a5, a3 +; RV32XTHEADBB-NEXT: sub a0, a0, a3 +; RV32XTHEADBB-NEXT: and a3, a0, a2 +; RV32XTHEADBB-NEXT: srli a0, a0, 2 +; RV32XTHEADBB-NEXT: and a0, a0, a2 +; RV32XTHEADBB-NEXT: add a0, a3, a0 +; RV32XTHEADBB-NEXT: srli a2, a0, 4 +; RV32XTHEADBB-NEXT: add a0, a0, a2 +; RV32XTHEADBB-NEXT: and a0, a0, a4 +; RV32XTHEADBB-NEXT: slli a2, a0, 8 +; RV32XTHEADBB-NEXT: add a0, a0, a2 +; RV32XTHEADBB-NEXT: slli a2, a0, 16 +; RV32XTHEADBB-NEXT: add a0, a0, a2 ; RV32XTHEADBB-NEXT: srli a0, a0, 24 -; RV32XTHEADBB-NEXT: add a0, a0, s5 +; RV32XTHEADBB-NEXT: add a0, a0, a1 ; RV32XTHEADBB-NEXT: li a1, 0 -; RV32XTHEADBB-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32XTHEADBB-NEXT: addi sp, sp, 32 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctpop_i64: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: addi sp, sp, -16 -; RV64XTHEADBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64XTHEADBB-NEXT: srli a1, a0, 1 ; RV64XTHEADBB-NEXT: lui a2, 349525 ; RV64XTHEADBB-NEXT: addiw a2, a2, 1365 @@ -2895,14 +2792,13 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; RV64XTHEADBB-NEXT: slli a2, a1, 32 ; RV64XTHEADBB-NEXT: add a1, a1, a2 ; RV64XTHEADBB-NEXT: and a0, a0, a1 -; RV64XTHEADBB-NEXT: lui a1, 4112 -; RV64XTHEADBB-NEXT: addiw a1, a1, 257 -; RV64XTHEADBB-NEXT: slli a2, a1, 32 -; RV64XTHEADBB-NEXT: add a1, a1, a2 -; RV64XTHEADBB-NEXT: call __muldi3 +; RV64XTHEADBB-NEXT: slli a1, a0, 8 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: slli a1, a0, 16 +; RV64XTHEADBB-NEXT: add a0, a0, a1 +; RV64XTHEADBB-NEXT: slli a1, a0, 32 +; RV64XTHEADBB-NEXT: add a0, a0, a1 ; RV64XTHEADBB-NEXT: srli a0, a0, 56 -; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64XTHEADBB-NEXT: addi sp, sp, 16 ; RV64XTHEADBB-NEXT: ret %1 = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll index adf614435b31..9ae30e646fdb 100644 --- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll +++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll @@ -602,19 +602,16 @@ define signext i32 @ctlz(i64 %b) nounwind { ; ; RV32I-LABEL: ctlz: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB7_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -625,28 +622,26 @@ define signext i32 @ctlz(i64 %b) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: andi a0, a0, 63 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -657,41 +652,25 @@ define signext i32 @ctlz(i64 %b) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB7_2 -; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi s1, a0, 32 -; RV32I-NEXT: j .LBB7_3 -; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: srli s1, s1, 24 -; RV32I-NEXT: .LBB7_3: # %entry -; RV32I-NEXT: andi a0, s1, 63 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: andi a0, a0, 63 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -727,15 +706,14 @@ define signext i32 @ctlz(i64 %b) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 2 ; RV64I-NEXT: srli a0, a0, 58 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index 3731b9719445..b45ab135fa1c 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %cond.false -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -40,12 +38,11 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: li a0, 32 @@ -64,19 +61,16 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: ctlz_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -87,28 +81,26 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -119,35 +111,21 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB1_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB1_3 -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB1_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32XTHEADBB-LABEL: ctlz_i64: diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 36c107061795..7e6c3f9c87d2 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -11,8 +11,6 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %cond.false -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 @@ -40,12 +38,11 @@ define i32 @ctlz_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: li a0, 32 @@ -64,19 +61,16 @@ declare i64 @llvm.ctlz.i64(i64, i1) define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-LABEL: ctlz_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: lui a2, 349525 +; RV32I-NEXT: addi a4, a2, 1365 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a3, a2, 819 +; RV32I-NEXT: lui a2, 61681 +; RV32I-NEXT: addi a2, a2, -241 +; RV32I-NEXT: bnez a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -87,28 +81,26 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s4, a2, 1365 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s5, a1, 819 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s6, a1, -241 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s3, a1, 257 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: or a0, s2, a0 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: addi a0, a0, 32 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: srli a0, a1, 1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 4 @@ -119,35 +111,21 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: not a0, a0 ; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: and a1, a1, s4 +; RV32I-NEXT: and a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: and a1, a0, s5 +; RV32I-NEXT: and a1, a0, a3 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s5 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: srli a1, a0, 4 ; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s6 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: bnez s0, .LBB1_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: addi a0, a0, 32 -; RV32I-NEXT: j .LBB1_3 -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: srli a0, s1, 24 -; RV32I-NEXT: .LBB1_3: ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctlz_i64: @@ -275,8 +253,6 @@ declare i32 @llvm.ctpop.i32(i32) define i32 @ctpop_i32(i32 %a) nounwind { ; RV32I-LABEL: ctpop_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: srli a1, a0, 1 ; RV32I-NEXT: lui a2, 349525 ; RV32I-NEXT: addi a2, a2, 1365 @@ -293,12 +269,11 @@ define i32 @ctpop_i32(i32 %a) nounwind { ; RV32I-NEXT: lui a1, 61681 ; RV32I-NEXT: addi a1, a1, -241 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi a1, a1, 257 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_i32: @@ -390,58 +365,42 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { ; RV32I-LABEL: ctpop_v2i32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s4, a1, 819 -; RV32I-NEXT: and a1, a0, s4 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a4, a0, a2 ; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s5, a1, -241 -; RV32I-NEXT: and a0, a0, s5 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s2, a0, 24 -; RV32I-NEXT: srli a0, s0, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub s0, s0, a0 -; RV32I-NEXT: and a0, s0, s4 -; RV32I-NEXT: srli s0, s0, 2 -; RV32I-NEXT: and a1, s0, s4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s5 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli a1, a0, 24 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a4, a0 +; RV32I-NEXT: srli a4, a0, 4 +; RV32I-NEXT: add a0, a0, a4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a5, a0, 8 +; RV32I-NEXT: add a0, a0, a5 +; RV32I-NEXT: slli a5, a0, 16 +; RV32I-NEXT: add a0, a0, a5 +; RV32I-NEXT: srli a0, a0, 24 +; RV32I-NEXT: srli a5, a1, 1 +; RV32I-NEXT: and a3, a5, a3 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: and a3, a1, a2 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: srli a2, a1, 4 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a2, a1, 8 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: slli a2, a1, 16 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: srli a1, a1, 24 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_v2i32: @@ -558,59 +517,44 @@ declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { ; RV32I-LABEL: ctpop_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: srli a0, a1, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s2, a2, 1365 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub a1, a1, a0 -; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi s3, a0, 819 -; RV32I-NEXT: and a0, a1, s3 +; RV32I-NEXT: srli a2, a1, 1 +; RV32I-NEXT: lui a3, 349525 +; RV32I-NEXT: addi a3, a3, 1365 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: lui a2, 209715 +; RV32I-NEXT: addi a2, a2, 819 +; RV32I-NEXT: and a4, a1, a2 ; RV32I-NEXT: srli a1, a1, 2 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s4, a1, -241 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s0, 1 -; RV32I-NEXT: and a0, a0, s2 -; RV32I-NEXT: sub s0, s0, a0 -; RV32I-NEXT: and a0, s0, s3 -; RV32I-NEXT: srli s0, s0, 2 -; RV32I-NEXT: and a1, s0, s3 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: srli a4, a1, 4 +; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: lui a4, 61681 +; RV32I-NEXT: addi a4, a4, -241 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: slli a5, a1, 8 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: slli a5, a1, 16 +; RV32I-NEXT: add a1, a1, a5 +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: srli a5, a0, 1 +; RV32I-NEXT: and a3, a5, a3 +; RV32I-NEXT: sub a0, a0, a3 +; RV32I-NEXT: and a3, a0, a2 +; RV32I-NEXT: srli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: srli a2, a0, 4 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: slli a2, a0, 16 +; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add a0, a0, s5 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: li a1, 0 -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_i64: @@ -738,99 +682,82 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV32I-LABEL: ctpop_v2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -48 -; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a0, 4(a1) -; RV32I-NEXT: lw s2, 8(a1) -; RV32I-NEXT: lw s5, 12(a1) -; RV32I-NEXT: lw s6, 0(a1) -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: lui a2, 349525 -; RV32I-NEXT: addi s3, a2, 1365 -; RV32I-NEXT: and a1, a1, s3 -; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi s4, a1, 819 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: lui a1, 61681 -; RV32I-NEXT: addi s7, a1, -241 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: lui a1, 4112 -; RV32I-NEXT: addi s1, a1, 257 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s8, a0, 24 -; RV32I-NEXT: srli a0, s6, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s6, a0 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add s8, a0, s8 -; RV32I-NEXT: srli a0, s5, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s5, a0 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli s5, a0, 24 -; RV32I-NEXT: srli a0, s2, 1 -; RV32I-NEXT: and a0, a0, s3 -; RV32I-NEXT: sub a0, s2, a0 -; RV32I-NEXT: and a1, a0, s4 -; RV32I-NEXT: srli a0, a0, 2 -; RV32I-NEXT: and a0, a0, s4 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: srli a1, a0, 4 -; RV32I-NEXT: add a0, a0, a1 -; RV32I-NEXT: and a0, a0, s7 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __mulsi3 -; RV32I-NEXT: srli a0, a0, 24 -; RV32I-NEXT: add a0, a0, s5 -; RV32I-NEXT: sw zero, 12(s0) -; RV32I-NEXT: sw zero, 4(s0) -; RV32I-NEXT: sw a0, 8(s0) -; RV32I-NEXT: sw s8, 0(s0) -; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: srli a5, a3, 1 +; RV32I-NEXT: lui a6, 349525 +; RV32I-NEXT: addi a6, a6, 1365 +; RV32I-NEXT: and a5, a5, a6 +; RV32I-NEXT: sub a3, a3, a5 +; RV32I-NEXT: lui a5, 209715 +; RV32I-NEXT: addi a5, a5, 819 +; RV32I-NEXT: and a7, a3, a5 +; RV32I-NEXT: srli a3, a3, 2 +; RV32I-NEXT: and a3, a3, a5 +; RV32I-NEXT: add a3, a7, a3 +; RV32I-NEXT: srli a7, a3, 4 +; RV32I-NEXT: add a3, a3, a7 +; RV32I-NEXT: lui a7, 61681 +; RV32I-NEXT: addi a7, a7, -241 +; RV32I-NEXT: and a3, a3, a7 +; RV32I-NEXT: slli t0, a3, 8 +; RV32I-NEXT: add a3, a3, t0 +; RV32I-NEXT: slli t0, a3, 16 +; RV32I-NEXT: add a3, a3, t0 +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: srli t0, a1, 1 +; RV32I-NEXT: and t0, t0, a6 +; RV32I-NEXT: sub a1, a1, t0 +; RV32I-NEXT: and t0, a1, a5 +; RV32I-NEXT: srli a1, a1, 2 +; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: add a1, t0, a1 +; RV32I-NEXT: srli t0, a1, 4 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: and a1, a1, a7 +; RV32I-NEXT: slli t0, a1, 8 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: slli t0, a1, 16 +; RV32I-NEXT: add a1, a1, t0 +; RV32I-NEXT: srli a1, a1, 24 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: srli a3, a4, 1 +; RV32I-NEXT: and a3, a3, a6 +; RV32I-NEXT: sub a4, a4, a3 +; RV32I-NEXT: and a3, a4, a5 +; RV32I-NEXT: srli a4, a4, 2 +; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: srli a4, a3, 4 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: and a3, a3, a7 +; RV32I-NEXT: slli a4, a3, 8 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: slli a4, a3, 16 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: srli a4, a2, 1 +; RV32I-NEXT: and a4, a4, a6 +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: and a4, a2, a5 +; RV32I-NEXT: srli a2, a2, 2 +; RV32I-NEXT: and a2, a2, a5 +; RV32I-NEXT: add a2, a4, a2 +; RV32I-NEXT: srli a4, a2, 4 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: and a2, a2, a7 +; RV32I-NEXT: slli a4, a2, 8 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: slli a4, a2, 16 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: srli a2, a2, 24 +; RV32I-NEXT: add a2, a2, a3 +; RV32I-NEXT: sw zero, 12(a0) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop_v2i64: diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index 73bfc6480b4d..acd63f24bb8f 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -317,8 +317,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -354,14 +352,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll index 7feef4dad411..b0e447b71178 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll @@ -307,8 +307,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -344,14 +342,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 @@ -623,8 +620,6 @@ declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-LABEL: ctpop_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -647,14 +642,13 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i64: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll index 1f62ea9f5681..6cdab888ffcd 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -38,14 +36,13 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: li a0, 32 @@ -66,8 +63,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -93,14 +88,13 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: j .LBB1_3 ; RV64I-NEXT: .LBB1_2: ; RV64I-NEXT: li a0, 32 @@ -125,50 +119,45 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; RV64I-LABEL: log2_ceil_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: addiw a0, a0, -1 -; RV64I-NEXT: li s0, 32 -; RV64I-NEXT: li a1, 32 -; RV64I-NEXT: beqz a0, .LBB2_2 +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: li a2, 32 +; RV64I-NEXT: beqz a1, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a2, a1, 1 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a1, a0, 24 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 24 ; RV64I-NEXT: .LBB2_2: # %cond.end -; RV64I-NEXT: sub a0, s0, a1 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: log2_ceil_i32: @@ -189,48 +178,42 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findLastSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: srliw a0, a0, 1 -; RV64I-NEXT: or a0, s0, a0 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: xori a0, a0, 31 -; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 24 +; RV64I-NEXT: xori a1, a1, 31 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: findLastSet_i32: @@ -256,10 +239,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srliw a0, a0, 1 ; RV64I-NEXT: beqz a0, .LBB4_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: .cfi_def_cfa_offset 16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -285,14 +264,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB4_2: ; RV64I-NEXT: li a0, 32 @@ -317,8 +295,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -354,14 +330,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 2269d8d04c9c..4d5ef5db8605 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -11,8 +11,6 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -38,14 +36,13 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: li a0, 32 @@ -64,8 +61,6 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -91,14 +86,13 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: j .LBB1_3 ; RV64I-NEXT: .LBB1_2: ; RV64I-NEXT: li a0, 32 @@ -121,50 +115,45 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; RV64I-LABEL: log2_ceil_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: addiw a0, a0, -1 -; RV64I-NEXT: li s0, 32 -; RV64I-NEXT: li a1, 32 -; RV64I-NEXT: beqz a0, .LBB2_2 +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: li a0, 32 +; RV64I-NEXT: li a2, 32 +; RV64I-NEXT: beqz a1, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: srliw a1, a0, 1 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a2, a1, 1 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a1, a0, 24 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 24 ; RV64I-NEXT: .LBB2_2: # %cond.end -; RV64I-NEXT: sub a0, s0, a1 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: log2_ceil_i32: @@ -183,48 +172,42 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findLastSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: srliw a0, a0, 1 -; RV64I-NEXT: or a0, s0, a0 -; RV64I-NEXT: srliw a1, a0, 2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 4 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 8 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srliw a1, a0, 16 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: not a0, a0 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: srliw a1, a0, 1 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: srliw a2, a1, 2 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 4 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 8 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srliw a2, a1, 16 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: srli a2, a1, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: add a0, a2, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: xori a0, a0, 31 -; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: addi a2, a2, -241 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 24 +; RV64I-NEXT: xori a1, a1, 31 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: findLastSet_i32: @@ -248,10 +231,6 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srliw a0, a0, 1 ; RV64I-NEXT: beqz a0, .LBB4_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: .cfi_def_cfa_offset 16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srliw a1, a0, 2 @@ -277,14 +256,13 @@ define i32 @ctlz_lshr_i32(i32 signext %a) { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB4_2: ; RV64I-NEXT: li a0, 32 @@ -307,8 +285,6 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srli a1, a0, 2 @@ -344,14 +320,13 @@ define i64 @ctlz_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 64 @@ -544,8 +519,6 @@ declare i32 @llvm.ctpop.i32(i32) define signext i32 @ctpop_i32(i32 signext %a) nounwind { ; RV64I-LABEL: ctpop_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -560,14 +533,13 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i32: @@ -657,8 +629,6 @@ define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind { define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; RV64I-LABEL: ctpop_i32_load: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 @@ -674,14 +644,13 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind { ; RV64I-NEXT: srli a1, a0, 4 ; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: addi a1, a1, -241 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srliw a0, a0, 24 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i32_load: @@ -699,58 +668,42 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { ; RV64I-LABEL: ctpop_v2i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw s3, a2, 1365 -; RV64I-NEXT: and a1, a1, s3 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw s4, a1, 819 -; RV64I-NEXT: and a1, a0, s4 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: and a4, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, s4 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw s5, a1, -241 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw s1, a1, 257 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw s2, a0, 24 -; RV64I-NEXT: srli a0, s0, 1 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: sub s0, s0, a0 -; RV64I-NEXT: and a0, s0, s4 -; RV64I-NEXT: srli s0, s0, 2 -; RV64I-NEXT: and a1, s0, s4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a1, a0, 24 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a4, a0 +; RV64I-NEXT: srli a4, a0, 4 +; RV64I-NEXT: add a0, a0, a4 +; RV64I-NEXT: lui a4, 61681 +; RV64I-NEXT: addi a4, a4, -241 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: slli a5, a0, 8 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: slli a5, a0, 16 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: srli a5, a1, 1 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: sub a1, a1, a3 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srliw a1, a1, 24 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_v2i32: @@ -875,8 +828,6 @@ declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-LABEL: ctpop_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srli a1, a0, 1 ; RV64I-NEXT: lui a2, 349525 ; RV64I-NEXT: addiw a2, a2, 1365 @@ -899,14 +850,13 @@ define i64 @ctpop_i64(i64 %a) nounwind { ; RV64I-NEXT: slli a2, a1, 32 ; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw a1, a1, 257 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add a1, a1, a2 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 16 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: srli a0, a0, 56 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_i64: @@ -998,66 +948,52 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; RV64I-LABEL: ctpop_v2i64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -64 -; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s5, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: srli a1, a0, 1 -; RV64I-NEXT: lui a2, 349525 -; RV64I-NEXT: addiw a2, a2, 1365 -; RV64I-NEXT: slli a3, a2, 32 -; RV64I-NEXT: add s3, a2, a3 -; RV64I-NEXT: and a1, a1, s3 -; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add s4, a1, a2 -; RV64I-NEXT: and a1, a0, s4 +; RV64I-NEXT: srli a2, a0, 1 +; RV64I-NEXT: lui a3, 349525 +; RV64I-NEXT: addiw a3, a3, 1365 +; RV64I-NEXT: slli a4, a3, 32 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: lui a2, 209715 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a4, a2, 32 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: and a4, a0, a2 ; RV64I-NEXT: srli a0, a0, 2 -; RV64I-NEXT: and a0, a0, s4 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a4, a0 +; RV64I-NEXT: srli a4, a0, 4 +; RV64I-NEXT: add a0, a0, a4 +; RV64I-NEXT: lui a4, 61681 +; RV64I-NEXT: addiw a4, a4, -241 +; RV64I-NEXT: slli a5, a4, 32 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: slli a5, a0, 8 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: slli a5, a0, 16 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: slli a5, a0, 32 +; RV64I-NEXT: add a0, a0, a5 +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: srli a5, a1, 1 +; RV64I-NEXT: and a3, a5, a3 +; RV64I-NEXT: sub a1, a1, a3 +; RV64I-NEXT: and a3, a1, a2 +; RV64I-NEXT: srli a1, a1, 2 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: srli a2, a1, 4 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: and a1, a1, a4 +; RV64I-NEXT: slli a2, a1, 8 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a1, 16 +; RV64I-NEXT: add a1, a1, a2 ; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: add s5, a1, a2 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: lui a1, 4112 -; RV64I-NEXT: addiw s1, a1, 257 -; RV64I-NEXT: slli a1, s1, 32 -; RV64I-NEXT: add s1, s1, a1 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srli s2, a0, 56 -; RV64I-NEXT: srli a0, s0, 1 -; RV64I-NEXT: and a0, a0, s3 -; RV64I-NEXT: sub s0, s0, a0 -; RV64I-NEXT: and a0, s0, s4 -; RV64I-NEXT: srli s0, s0, 2 -; RV64I-NEXT: and a1, s0, s4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 4 -; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: and a0, a0, s5 -; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srli a1, a0, 56 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s4, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s5, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 56 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop_v2i64: |