diff options
author | Hans Wennborg <hans@hanshq.net> | 2018-02-02 13:50:25 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2018-02-02 13:50:25 +0000 |
commit | 13240bd5e5ef6c29e521c875ee2b68e08f5aa363 (patch) | |
tree | b41403e40b92fad54d09f70292bc4821dab96e3d | |
parent | 52d11d163ef739537988545ba54a8b6fc1e18bcb (diff) |
Merging r323907 and r323913:
------------------------------------------------------------------------
r323907 | mareko | 2018-01-31 21:17:52 +0100 (Wed, 31 Jan 2018) | 11 lines
[SeparateConstOffsetFromGEP] Preserve metadata when splitting GEPs
Summary:
!amdgpu.uniform needs to be preserved for AMDGPU, otherwise bad things
happen.
Reviewers: arsenm, nhaehnle, jingyue, broune, majnemer, bjarke.roune, dblaikie
Subscribers: wdng, tpr, llvm-commits
Differential Revision: https://reviews.llvm.org/D42744
------------------------------------------------------------------------
------------------------------------------------------------------------
r323913 | mareko | 2018-01-31 21:49:19 +0100 (Wed, 31 Jan 2018) | 1 line
[SeparateConstOffsetFromGEP] Fix up addrspace in the AMDGPU test
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@324088 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp | 2 | ||||
-rw-r--r-- | test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll | 45 |
2 files changed, 47 insertions, 0 deletions
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 8fa9ffb6d014..4a96e0ddca16 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1071,6 +1071,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + NewGEP->copyMetadata(*GEP); // Inherit the inbounds attribute of the original GEP. cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); } else { @@ -1095,6 +1096,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { Type::getInt8Ty(GEP->getContext()), NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep", GEP); + NewGEP->copyMetadata(*GEP); // Inherit the inbounds attribute of the original GEP. cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds); if (GEP->getType() != I8PtrTy) diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll index 23ec0ca25544..43fe18f1aa25 100644 --- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll +++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll @@ -92,3 +92,48 @@ define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y store float %tmp21, float addrspace(1)* %output, align 4 ret void } + +; IR-LABEL: @keep_metadata( +; IR: getelementptr {{.*}} !amdgpu.uniform +; IR: getelementptr {{.*}} !amdgpu.uniform +; IR: getelementptr {{.*}} !amdgpu.uniform +define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 { +main_body: + %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8 + %23 = bitcast float %22 to i32 + %24 = shl i32 %23, 1 + %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %1, i32 0, i32 %24, !amdgpu.uniform !0 + %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0 + %27 = shl i32 %23, 2 + %28 = or i32 %27, 3 + %29 = bitcast [0 x <8 x i32>] addrspace(2)* %1 to [0 x <4 x i32>] addrspace(2)* + %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i32 0, i32 %28, !amdgpu.uniform !0 + %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0 + %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8 + %33 = extractelement <4 x float> %32, i32 0 + %34 = extractelement <4 x float> %32, i32 1 + %35 = extractelement <4 x float> %32, i32 2 + %36 = extractelement <4 x float> %32, i32 3 + %37 = bitcast float %4 to i32 + %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4 + %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5 + %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6 + %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7 + %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8 + %43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19 + ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43 +} + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6 + +; Function Attrs: nounwind readonly +declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7 + + +!0 = !{} + +attributes #5 = { "InitialPSInputAddr"="45175" } +attributes #6 = { nounwind readnone speculatable } +attributes #7 = { nounwind readonly } +attributes #8 = { nounwind readnone } |