summaryrefslogtreecommitdiffstats
path: root/test/CodeGenCUDA
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-03-02 18:28:50 +0000
committerArtem Belevich <tra@google.com>2016-03-02 18:28:50 +0000
commit3ee234f49d5671180916e75e77c7829fd29cec51 (patch)
tree8c0cb1980648e541c0caeb3f18bbfdde917badd7 /test/CodeGenCUDA
parent5a333e954bbf581c26df310194df2001d2aa6b3f (diff)
[CUDA] Emit host-side 'shadows' for device-side global variables
... and register them with CUDA runtime. This is needed for commonly used cudaMemcpy*() APIs that use address of host-side shadow to access their counterparts on device side. Fixes PR26340 Differential Revision: http://reviews.llvm.org/D17779 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@262498 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGenCUDA')
-rw-r--r--test/CodeGenCUDA/device-stub.cu47
-rw-r--r--test/CodeGenCUDA/filter-decl.cu6
2 files changed, 46 insertions, 7 deletions
diff --git a/test/CodeGenCUDA/device-stub.cu b/test/CodeGenCUDA/device-stub.cu
index 7f5e159151..81d23a2990 100644
--- a/test/CodeGenCUDA/device-stub.cu
+++ b/test/CodeGenCUDA/device-stub.cu
@@ -2,6 +2,40 @@
#include "Inputs/cuda.h"
+// CHECK-DAG: @device_var = internal global i32
+__device__ int device_var;
+
+// CHECK-DAG: @constant_var = internal global i32
+__constant__ int constant_var;
+
+// CHECK-DAG: @shared_var = internal global i32
+__shared__ int shared_var;
+
+// Make sure host globals don't get internalized...
+// CHECK-DAG: @host_var = global i32
+int host_var;
+// ... and that extern vars remain external.
+// CHECK-DAG: @ext_host_var = external global i32
+extern int ext_host_var;
+
+// Shadows for external device-side variables are *definitions* of
+// those variables.
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __device__ int ext_device_var;
+// CHECK-DAG: @ext_device_var = internal global i32
+extern __constant__ int ext_constant_var;
+
+void use_pointers() {
+ int *p;
+ p = &device_var;
+ p = &constant_var;
+ p = &shared_var;
+ p = &host_var;
+ p = &ext_device_var;
+ p = &ext_constant_var;
+ p = &ext_host_var;
+}
+
// Make sure that all parts of GPU code init/cleanup are there:
// * constant unnamed string with the kernel name
// CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
@@ -32,9 +66,14 @@ __global__ void kernelfunc(int i, int j, int k) {}
// CHECK: call{{.*}}kernelfunc
void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
-// Test that we've built a function to register kernels
-// CHECK: define internal void @__cuda_register_kernels
+// Test that we've built a function to register kernels and global vars.
+// CHECK: define internal void @__cuda_register_globals
// CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
+// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// CHECK: ret void
// Test that we've built contructor..
// CHECK: define internal void @__cuda_module_ctor
@@ -42,8 +81,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper
// .. stores return value in __cuda_gpubin_handle
// CHECK-NEXT: store{{.*}}__cuda_gpubin_handle
-// .. and then calls __cuda_register_kernels
-// CHECK-NEXT: call void @__cuda_register_kernels
+// .. and then calls __cuda_register_globals
+// CHECK-NEXT: call void @__cuda_register_globals
// Test that we've created destructor.
// CHECK: define internal void @__cuda_module_dtor
diff --git a/test/CodeGenCUDA/filter-decl.cu b/test/CodeGenCUDA/filter-decl.cu
index 023ae61f3a..bc744a07a3 100644
--- a/test/CodeGenCUDA/filter-decl.cu
+++ b/test/CodeGenCUDA/filter-decl.cu
@@ -9,15 +9,15 @@
// CHECK-DEVICE-NOT: module asm "file scope asm is host only"
__asm__("file scope asm is host only");
-// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-HOST: constantdata = internal global
// CHECK-DEVICE: constantdata = externally_initialized global
__constant__ char constantdata[256];
-// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-HOST: devicedata = internal global
// CHECK-DEVICE: devicedata = externally_initialized global
__device__ char devicedata[256];
-// CHECK-HOST-NOT: shareddata = global
+// CHECK-HOST: shareddata = internal global
// CHECK-DEVICE: shareddata = global
__shared__ char shareddata[256];