diff options
author | Philip Reames <preames@rivosinc.com> | 2024-03-28 16:34:04 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-28 16:34:04 -0700 |
commit | 9ea0396f1608681e524e1159bfa8568f3ccfbb99 (patch) | |
tree | c0b159bde8a20d0036b6982cb9058292c24e07b9 | |
parent | bbbcc1d99d08855069f4501c896c43a6d4d7b598 (diff) |
[RISCV] Extend pattern matches involving shNadd to support disjoint or (#87001)
I tried to add representative tests while not duplicating complete
coverage. If there's other tests you'd like to see, let me know.
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 50 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rv64zba.ll | 89 |
2 files changed, 114 insertions, 25 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 549bc039fabb..434b071e628a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -676,38 +676,38 @@ let Predicates = [HasStdExtZba] in { foreach i = {1,2,3} in { defvar shxadd = !cast<Instruction>("SH"#i#"ADD"); - def : Pat<(XLenVT (add_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)), + def : Pat<(XLenVT (add_like_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)), (shxadd GPR:$rs1, GPR:$rs2)>; defvar pat = !cast<ComplexPattern>("sh"#i#"add_op"); // More complex cases use a ComplexPattern. - def : Pat<(XLenVT (add_non_imm12 pat:$rs1, GPR:$rs2)), + def : Pat<(XLenVT (add_like_non_imm12 pat:$rs1, GPR:$rs2)), (shxadd pat:$rs1, GPR:$rs2)>; } -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), (SH1ADD (XLenVT (SH1ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2), (SH1ADD (XLenVT (SH2ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2), (SH1ADD (XLenVT (SH3ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2), (SH2ADD (XLenVT (SH1ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2), (SH2ADD (XLenVT (SH2ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2), (SH2ADD (XLenVT (SH3ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2), (SH3ADD (XLenVT (SH1ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2), (SH3ADD (XLenVT (SH2ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2), +def : Pat<(add_like (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2), (SH3ADD (XLenVT (SH3ADD GPR:$rs1, GPR:$rs1)), GPR:$rs2)>; -def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy4:$i), +def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy4:$i), (SH2ADD (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy2XForm CSImm12MulBy4:$i))), GPR:$r)>; -def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy8:$i), +def : Pat<(add_like (XLenVT GPR:$r), CSImm12MulBy8:$i), (SH3ADD (XLenVT (ADDI (XLenVT X0), (SimmShiftRightBy3XForm CSImm12MulBy8:$i))), GPR:$r)>; @@ -762,37 +762,37 @@ def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>; foreach i = {1,2,3} in { defvar shxadd_uw = !cast<Instruction>("SH"#i#"ADD_UW"); - def : Pat<(i64 (add_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), (XLenVT GPR:$rs2))), + def : Pat<(i64 (add_like_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), (XLenVT GPR:$rs2))), (shxadd_uw GPR:$rs1, GPR:$rs2)>; } -def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (XLenVT GPR:$rs2))), (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (XLenVT GPR:$rs2))), (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenVT GPR:$rs2))), (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; // More complex cases use a ComplexPattern. foreach i = {1,2,3} in { defvar pat = !cast<ComplexPattern>("sh"#i#"add_uw_op"); - def : Pat<(i64 (add_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))), + def : Pat<(i64 (add_like_non_imm12 pat:$rs1, (XLenVT GPR:$rs2))), (!cast<Instruction>("SH"#i#"ADD_UW") pat:$rs1, GPR:$rs2)>; } -def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))), (SH1ADD (XLenVT (SRLIW GPR:$rs1, 1)), GPR:$rs2)>; -def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), (XLenVT GPR:$rs2))), (SH2ADD (XLenVT (SRLIW GPR:$rs1, 2)), GPR:$rs2)>; -def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), (XLenVT GPR:$rs2))), (SH3ADD (XLenVT (SRLIW GPR:$rs1, 3)), GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. -def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), (XLenVT GPR:$rs2))), (SH1ADD_UW (XLenVT (SRLI GPR:$rs1, 1)), GPR:$rs2)>; -def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2))), (SH2ADD_UW (XLenVT (SRLI GPR:$rs1, 2)), GPR:$rs2)>; -def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))), +def : Pat<(i64 (add_like_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))), (SH3ADD_UW (XLenVT (SRLI GPR:$rs1, 3)), GPR:$rs2)>; def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)), @@ -879,7 +879,7 @@ def : Pat<(zext GPR:$src), (ADD_UW GPR:$src, (XLenVT X0))>; foreach i = {1,2,3} in { defvar shxadd = !cast<Instruction>("SH"#i#"ADD"); - def : Pat<(i32 (add_non_imm12 (shl GPR:$rs1, (i64 i)), GPR:$rs2)), + def : Pat<(i32 (add_like_non_imm12 (shl GPR:$rs1, (i64 i)), GPR:$rs2)), (shxadd GPR:$rs1, GPR:$rs2)>; } } diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index d9d83633a853..c81c6aeaab89 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -209,6 +209,24 @@ define i64 @sh1adduw_2(i64 %0, i64 %1) { ret i64 %5 } +define i64 @sh1adduw_3(i64 %0, i64 %1) { +; RV64I-LABEL: sh1adduw_3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 31 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh1adduw_3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add.uw a0, a0, a1 +; RV64ZBA-NEXT: ret + %3 = shl i64 %0, 1 + %4 = and i64 %3, 8589934590 + %5 = or disjoint i64 %4, %1 + ret i64 %5 +} + define signext i32 @sh2adduw(i32 signext %0, ptr %1) { ; RV64I-LABEL: sh2adduw: ; RV64I: # %bb.0: @@ -247,6 +265,24 @@ define i64 @sh2adduw_2(i64 %0, i64 %1) { ret i64 %5 } +define i64 @sh2adduw_3(i64 %0, i64 %1) { +; RV64I-LABEL: sh2adduw_3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 30 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh2adduw_3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add.uw a0, a0, a1 +; RV64ZBA-NEXT: ret + %3 = shl i64 %0, 2 + %4 = and i64 %3, 17179869180 + %5 = or disjoint i64 %4, %1 + ret i64 %5 +} + define i64 @sh3adduw(i32 signext %0, ptr %1) { ; RV64I-LABEL: sh3adduw: ; RV64I: # %bb.0: @@ -285,6 +321,24 @@ define i64 @sh3adduw_2(i64 %0, i64 %1) { ret i64 %5 } +define i64 @sh3adduw_3(i64 %0, i64 %1) { +; RV64I-LABEL: sh3adduw_3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 29 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: sh3adduw_3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add.uw a0, a0, a1 +; RV64ZBA-NEXT: ret + %3 = shl i64 %0, 3 + %4 = and i64 %3, 34359738360 + %5 = or disjoint i64 %4, %1 + ret i64 %5 +} + ; Type legalization inserts a sext_inreg after the first add. That add will be ; selected as sh2add which does not sign extend. SimplifyDemandedBits is unable ; to remove the sext_inreg because it has multiple uses. The ashr will use the @@ -335,6 +389,24 @@ define i64 @addmul6(i64 %a, i64 %b) { ret i64 %d } +define i64 @disjointormul6(i64 %a, i64 %b) { +; RV64I-LABEL: disjointormul6: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 6 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: disjointormul6: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a0, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret + %c = mul i64 %a, 6 + %d = or disjoint i64 %c, %b + ret i64 %d +} + define i64 @addmul10(i64 %a, i64 %b) { ; RV64I-LABEL: addmul10: ; RV64I: # %bb.0: @@ -1099,6 +1171,23 @@ define i64 @add4104(i64 %a) { ret i64 %c } +define i64 @add4104_2(i64 %a) { +; RV64I-LABEL: add4104_2: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, 8 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: add4104_2: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: li a1, 1026 +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: ret + %c = or disjoint i64 %a, 4104 + ret i64 %c +} + define i64 @add8208(i64 %a) { ; RV64I-LABEL: add8208: ; RV64I: # %bb.0: |