diff options
author | Alexey Sotkin <alexey.sotkin@intel.com> | 2018-02-22 11:54:14 +0000 |
---|---|---|
committer | Alexey Sotkin <alexey.sotkin@intel.com> | 2018-02-22 11:54:14 +0000 |
commit | 9acbe6ca409da5a824a2ee6e967dedddc7cf4b69 (patch) | |
tree | 0a3243db274d85243f6b462ea47365ded5a61d88 /test/CodeGenOpenCL | |
parent | 0972dc790621cb6c6718ef9a5090a993cc0724c2 (diff) |
[OpenCL] Add '-cl-uniform-work-group-size' compile option
Summary:
OpenCL 2.0 specification defines '-cl-uniform-work-group-size' option,
which requires that the global work-size be a multiple of the work-group
size specified to clEnqueueNDRangeKernel and allows optimizations that
are made possible by this restriction.
The patch introduces the support of this option.
To keep information about whether an OpenCL kernel has uniform work
group size or not, clang generates 'uniform-work-group-size' function
attribute for every kernel:
- "uniform-work-group-size"="true" for OpenCL 1.2 and lower,
- "uniform-work-group-size"="true" for OpenCL 2.0 and higher if
'-cl-uniform-work-group-size' option was specified,
- "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no
'-cl-uniform-work-group-size' options was specified.
If the function is not an OpenCL kernel, 'uniform-work-group-size'
attribute isn't generated.
Patch by: krisb
Reviewers: yaxunl, Anastasia, b-sumner
Reviewed By: yaxunl, Anastasia
Subscribers: nhaehnle, yaxunl, Anastasia, cfe-commits
Differential Revision: https://reviews.llvm.org/D43570
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@325771 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGenOpenCL')
-rw-r--r-- | test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 | ||||
-rw-r--r-- | test/CodeGenOpenCL/cl-uniform-wg-size.cl | 16 | ||||
-rw-r--r-- | test/CodeGenOpenCL/convergent.cl | 5 |
3 files changed, 20 insertions, 3 deletions
diff --git a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 1027fd740c..aec00e7601 100644 --- a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -425,7 +425,7 @@ struct_char_arr32 func_ret_struct_char_arr32() return s; } -// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 { +// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 { // CHECK: ret i32 0 transparent_u func_transparent_union_ret() { diff --git a/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/test/CodeGenOpenCL/cl-uniform-wg-size.cl new file mode 100644 index 0000000000..76ace5dca2 --- /dev/null +++ b/test/CodeGenOpenCL/cl-uniform-wg-size.cl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM +// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM + +kernel void ker() {}; +// CHECK: define{{.*}}@ker() #0 + +void foo() {}; +// CHECK: define{{.*}}@foo() #1 + +// CHECK-LABEL: attributes #0 +// CHECK-UNIFORM: "uniform-work-group-size"="true" +// CHECK-NONUNIFORM: "uniform-work-group-size"="false" + +// CHECK-LABEL: attributes #1 +// CHECK-NOT: uniform-work-group-size diff --git a/test/CodeGenOpenCL/convergent.cl b/test/CodeGenOpenCL/convergent.cl index 285b637ca6..a011920761 100644 --- a/test/CodeGenOpenCL/convergent.cl +++ b/test/CodeGenOpenCL/convergent.cl @@ -127,7 +127,7 @@ void test_not_unroll() { // CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]] // CHECK-LABEL: @assume_convergent_asm -// CHECK: tail call void asm sideeffect "s_barrier", ""() #4 +// CHECK: tail call void asm sideeffect "s_barrier", ""() #5 kernel void assume_convergent_asm() { __asm__ volatile("s_barrier"); @@ -138,4 +138,5 @@ kernel void assume_convergent_asm() // CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} } // CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } // CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} } -// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } +// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} } +// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} } |