diff options
author | Florian Hahn <flo@fhahn.com> | 2024-05-03 18:54:00 +0100 |
---|---|---|
committer | Florian Hahn <flo@fhahn.com> | 2024-05-03 18:54:00 +0100 |
commit | 401ecb4ccc2a319e55155b6a8558aa3478e5405e (patch) | |
tree | f5397000f512edf30add1b836c614151da128583 | |
parent | 76508dce4380e0cea2ecb396200a161f7dbefd0b (diff) |
[LV] Add test showing miscompile with store reductions and RT checks.
Add anew test showing how a loop gets vectorized incorrectly with a
invariant store reduction where the same location is also read, when
vectorizing with runtime checks.
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll | 53 |
1 files changed, 44 insertions, 9 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index 5584aa969367..2eda68742094 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -17,7 +17,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope !0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 @@ -118,6 +118,41 @@ exit: ret void } +; Check that if we have a read from an invariant address, we do not vectorize, +; even if we vectorize with runtime checks. The test below is a variant of +; @reduc_store_load with a non-constant dependence distance, resulting in +; vectorization with runtime checks. +; +; FIXME: currently this gets vectorized incorrectly. +; CHECK-LABEL: @reduc_store_load_with_non_constant_distance_dependence +; CHECK: vector.body: +define void @reduc_store_load_with_non_constant_distance_dependence(ptr %dst, ptr noalias %dst.2, i64 %off) { +entry: + %gep.dst = getelementptr inbounds i32, ptr %dst, i64 42 + %dst.2.off = getelementptr inbounds i32, ptr %dst.2, i64 %off + store i32 0, ptr %gep.dst, align 4 + br label %for.body + +for.body: + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep.src = getelementptr inbounds i32, ptr %dst.2, i64 %iv + %0 = load i32, ptr %gep.src, align 4 + %iv.off = mul i64 %iv, 2 + %add = add nsw i32 %sum, %0 + %lv = load i32, ptr %gep.dst + store i32 %add, ptr %gep.dst, align 4 + %gep.src.2 = getelementptr inbounds i32, ptr %dst.2.off, i64 %iv + store i32 %lv, ptr %gep.src.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} + + ; Final value is not guaranteed to be stored in an invariant address. ; We don't vectorize in that case. ; @@ -186,10 +221,10 @@ for.end: ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !12 -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !12 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !12 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !12 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2 @@ -204,10 +239,10 @@ for.end: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP19]], align 4, !alias.scope !12 -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope !12 -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope !12 -; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4, !alias.scope !12 +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP21]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP27]], i32 1 ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 2 |