[InstCombine] Handle scalable geps in EmitGEPOffset (#71565)

This adds scalable handling for scalable vectors in emitGEPOffset. This was noticed in some tests that Biplob was creating, so might be unlikely to come up much in practice. I've attempted to add test coverage for various places EmitGEPOffset is called. The vscale intrinsics will currently emit multiple copies, relying on later CSE to combine them.
author: David Green <david.green@arm.com> 2023-11-08 15:50:32 +0000
committer: GitHub <noreply@github.com> 2023-11-08 15:50:32 +0000
commit: 3ccbd68480d0b8f777502300024725b9fc4ed81f (patch)
tree: 772b2b3908b4d62e37d6b5eca65202d009d540b6
parent: 98d8b688bd351850ca99d3e5e9dfdf582c83c4f4 (diff)
3 files changed, 138 insertions, 4 deletions
diff --git a/llvm/lib/Analysis/Local.cpp b/llvm/lib/Analysis/Local.cpp
index 672e6a190a57..ded600766384 100644
--- a/llvm/lib/Analysis/Local.cpp
+++ b/llvm/lib/Analysis/Local.cpp
@@ -45,7 +45,8 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
   for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
        ++i, ++GTI) {
     Value *Op = *i;
-    uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+    TypeSize TSize = DL.getTypeAllocSize(GTI.getIndexedType());
+    uint64_t Size = TSize.getKnownMinValue() & PtrSizeMask;
     if (Constant *OpC = dyn_cast<Constant>(Op)) {
       if (OpC->isZeroValue())
         continue;
@@ -70,10 +71,12 @@ Value *llvm::emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL,
     // Convert to correct type.
     if (Op->getType() != IntIdxTy)
       Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName() + ".c");
-    if (Size != 1) {
+    if (Size != 1 || TSize.isScalable()) {
       // We'll let instcombine(mul) convert this to a shl if possible.
-      Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size),
-                              GEP->getName() + ".idx", false /*NUW*/,
+      auto *ScaleC = ConstantInt::get(IntIdxTy, Size);
+      Value *Scale =
+          !TSize.isScalable() ? ScaleC : Builder->CreateVScale(ScaleC);
+      Op = Builder->CreateMul(Op, Scale, GEP->getName() + ".idx", false /*NUW*/,
                               isInBounds /*NSW*/);
     }
     AddOffset(Op);
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll
index 5cc6d9f80bac..7d266b7b246a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -396,3 +396,73 @@ define i1 @test61_as1(ptr addrspace(1) %foo, i16 %i, i16 %j) {
   ret i1 %cmp
 ; Don't transform non-inbounds GEPs.
 }
+
+define i1 @test_scalable_same(ptr %x) {
+; CHECK-LABEL: @test_scalable_same(
+; CHECK-NEXT:    ret i1 false
+;
+  %a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
+  %b = getelementptr inbounds <vscale x 4 x i8>, ptr %x, i64 8
+  %c = icmp ugt ptr %a, %b
+  ret i1 %c
+}
+
+define i1 @test_scalable_x(ptr %x) {
+; CHECK-LABEL: @test_scalable_x(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[A_IDX_MASK:%.*]] = and i64 [[TMP1]], 576460752303423487
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[A_IDX_MASK]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
+  %c = icmp eq ptr %a, %x
+  ret i1 %c
+}
+
+define i1 @test_scalable_xc(ptr %x) {
+; CHECK-LABEL: @test_scalable_xc(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[A_IDX_MASK:%.*]] = and i64 [[TMP1]], 576460752303423487
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[A_IDX_MASK]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
+  %c = icmp eq ptr %x, %a
+  ret i1 %c
+}
+
+define i1 @test_scalable_xy(ptr %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test_scalable_xy(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 2
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP2]], [[J:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[TMP3]], 4
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP4]], [[I:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[GEP2_IDX]], [[GEP1_IDX]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %bit = addrspacecast ptr %foo to ptr addrspace(3)
+  %gep1 = getelementptr inbounds <vscale x 4 x i32>, ptr addrspace(3) %bit, i64 %i
+  %gep2 = getelementptr inbounds <vscale x 4 x i8>, ptr %foo, i64 %j
+  %cast1 = addrspacecast ptr addrspace(3) %gep1 to ptr
+  %cmp = icmp ult ptr %cast1, %gep2
+  ret i1 %cmp
+}
+
+define i1 @test_scalable_ij(ptr %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test_scalable_ij(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP1]], 4
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP2]], [[I:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[TMP3]], 2
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP4]], [[J:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[GEP1_IDX]], [[GEP2_IDX]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep1 = getelementptr inbounds <vscale x 4 x i32>, ptr %foo, i64 %i
+  %gep2 = getelementptr inbounds <vscale x 4 x i8>, ptr %foo, i64 %j
+  %cmp = icmp ult ptr %gep1, %gep2
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll
index a8f8a82e675b..af5fa5352ace 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -370,6 +370,67 @@ define i64 @gep_diff_with_bitcast(ptr %p, i64 %idx) {
   ret i64 %i6
 }
 
+define i64 @sub_scalable(ptr noundef %val1) {
+; CHECK-LABEL: @sub_scalable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+entry:
+  %gep1 = getelementptr <vscale x 4 x i32>, ptr %val1, i64 1
+  %sub.ptr.lhs.cast.i = ptrtoint ptr %gep1 to i64
+  %sub.ptr.rhs.cast.i = ptrtoint ptr %val1 to i64
+  %sub.ptr.sub.i = sub i64 %sub.ptr.lhs.cast.i, %sub.ptr.rhs.cast.i
+  ret i64 %sub.ptr.sub.i
+}
+
+define i64 @sub_scalable2(ptr noundef %val1) {
+; CHECK-LABEL: @sub_scalable2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = shl i64 [[TMP2]], 5
+; CHECK-NEXT:    [[GEPDIFF:%.*]] = sub i64 [[TMP1]], [[GEP2_IDX]]
+; CHECK-NEXT:    ret i64 [[GEPDIFF]]
+;
+entry:
+  %gep1 = getelementptr <vscale x 4 x i32>, ptr %val1, i64 1
+  %sub.ptr.lhs.cast.i = ptrtoint ptr %gep1 to i64
+  %gep2 = getelementptr <vscale x 4 x i32>, ptr %val1, i64 2
+  %sub.ptr.rhs.cast.i = ptrtoint ptr %gep2 to i64
+  %sub.ptr.sub.i = sub i64 %sub.ptr.lhs.cast.i, %sub.ptr.rhs.cast.i
+  ret i64 %sub.ptr.sub.i
+}
+
+define i64 @nullptrtoint_scalable_c() {
+; CHECK-LABEL: @nullptrtoint_scalable_c(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[PTR_IDX:%.*]] = shl i64 [[TMP0]], 7
+; CHECK-NEXT:    ret i64 [[PTR_IDX]]
+;
+entry:
+  %ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 8
+  %ret = ptrtoint ptr %ptr to i64
+  ret i64 %ret
+}
+
+define i64 @nullptrtoint_scalable_x(i64 %x) {
+; CHECK-LABEL: @nullptrtoint_scalable_x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT:    [[PTR_IDX:%.*]] = mul nsw i64 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i64 [[PTR_IDX]]
+;
+entry:
+  %ptr = getelementptr inbounds <vscale x 4 x i32>, ptr null, i64 %x
+  %ret = ptrtoint ptr %ptr to i64
+  ret i64 %ret
+}
+
 define i1 @_gep_phi1(ptr noundef %str1) {
 ; CHECK-LABEL: @_gep_phi1(
 ; CHECK-NEXT:  entry:
author	David Green <david.green@arm.com>	2023-11-08 15:50:32 +0000
committer	GitHub <noreply@github.com>	2023-11-08 15:50:32 +0000
commit	3ccbd68480d0b8f777502300024725b9fc4ed81f (patch)
tree	772b2b3908b4d62e37d6b5eca65202d009d540b6
parent	98d8b688bd351850ca99d3e5e9dfdf582c83c4f4 (diff)