summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Savonichev <andrew.savonichev@intel.com>2018-12-10 12:03:00 +0000
committerAndrew Savonichev <andrew.savonichev@intel.com>2018-12-10 12:03:00 +0000
commit453d65eef03dda87e18c04f33b79f8d4e47b2e57 (patch)
tree3bb73a1627a0826ca14a05d8b5618891b82bfece
parent6a72a16cd26cbbba30598119eb033e6535d1a9ca (diff)
[OpenCL][CodeGen] Fix replacing memcpy with addrspacecast
Summary: If a function argument is byval and RV is located in default or alloca address space an optimization of creating addrspacecast instead of memcpy is performed. That is not correct for OpenCL, where that can lead to a situation of address space casting from __private * to __global *. See an example below: ``` typedef struct { int x; } MyStruct; void foo(MyStruct val) {} kernel void KernelOneMember(__global MyStruct* x) { foo (*x); } ``` for this code clang generated following IR: ... %0 = load %struct.MyStruct addrspace(1)*, %struct.MyStruct addrspace(1)** %x.addr, align 4 %1 = addrspacecast %struct.MyStruct addrspace(1)* %0 to %struct.MyStruct* ... So the optimization was disallowed for OpenCL if RV is located in an address space different than that of the argument (0). Reviewers: yaxunl, Anastasia Reviewed By: Anastasia Subscribers: cfe-commits, asavonic Differential Revision: https://reviews.llvm.org/D54947 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@348752 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/CGCall.cpp21
-rw-r--r--test/CodeGenOpenCL/addr-space-struct-arg.cl15
2 files changed, 29 insertions, 7 deletions
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 6d89f3919f..2fddc4bd99 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -3958,15 +3958,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} else if (I->hasLValue()) {
auto LV = I->getKnownLValue();
auto AS = LV.getAddressSpace();
+
if ((!ArgInfo.getIndirectByVal() &&
(LV.getAlignment() >=
- getContext().getTypeAlignInChars(I->Ty))) ||
- (ArgInfo.getIndirectByVal() &&
- ((AS != LangAS::Default && AS != LangAS::opencl_private &&
- AS != CGM.getASTAllocaAddressSpace())))) {
+ getContext().getTypeAlignInChars(I->Ty)))) {
+ NeedCopy = true;
+ }
+ if (!getLangOpts().OpenCL) {
+ if ((ArgInfo.getIndirectByVal() &&
+ (AS != LangAS::Default &&
+ AS != CGM.getASTAllocaAddressSpace()))) {
+ NeedCopy = true;
+ }
+ }
+ // For OpenCL even if RV is located in default or alloca address space
+ // we don't want to perform address space cast for it.
+ else if ((ArgInfo.getIndirectByVal() &&
+ Addr.getType()->getAddressSpace() != IRFuncTy->
+ getParamType(FirstIRArg)->getPointerAddressSpace())) {
NeedCopy = true;
}
}
+
if (NeedCopy) {
// Create an aligned temporary, and copy to it.
Address AI = CreateMemTempWithoutCast(
diff --git a/test/CodeGenOpenCL/addr-space-struct-arg.cl b/test/CodeGenOpenCL/addr-space-struct-arg.cl
index f8d7073f92..6f923b7fd4 100644
--- a/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,6 +1,9 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
+
+typedef int int2 __attribute__((ext_vector_type(2)));
typedef struct {
int cells[9];
@@ -130,6 +133,12 @@ kernel void KernelOneMember(struct StructOneMember u) {
FuncOneMember(u);
}
+// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32
+// SPIR-NOT: addrspacecast
+kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
+ FuncOneMember(*u);
+}
+
// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
// AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8