diff options
author | Artem Belevich <tra@google.com> | 2016-03-02 18:28:50 +0000 |
---|---|---|
committer | Artem Belevich <tra@google.com> | 2016-03-02 18:28:50 +0000 |
commit | 3ee234f49d5671180916e75e77c7829fd29cec51 (patch) | |
tree | 8c0cb1980648e541c0caeb3f18bbfdde917badd7 /test/CodeGenCUDA | |
parent | 5a333e954bbf581c26df310194df2001d2aa6b3f (diff) |
[CUDA] Emit host-side 'shadows' for device-side global variables
... and register them with CUDA runtime.
This is needed for commonly used cudaMemcpy*() APIs that use address of
host-side shadow to access their counterparts on device side.
Fixes PR26340
Differential Revision: http://reviews.llvm.org/D17779
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@262498 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGenCUDA')
-rw-r--r-- | test/CodeGenCUDA/device-stub.cu | 47 | ||||
-rw-r--r-- | test/CodeGenCUDA/filter-decl.cu | 6 |
2 files changed, 46 insertions, 7 deletions
diff --git a/test/CodeGenCUDA/device-stub.cu b/test/CodeGenCUDA/device-stub.cu index 7f5e159151..81d23a2990 100644 --- a/test/CodeGenCUDA/device-stub.cu +++ b/test/CodeGenCUDA/device-stub.cu @@ -2,6 +2,40 @@ #include "Inputs/cuda.h" +// CHECK-DAG: @device_var = internal global i32 +__device__ int device_var; + +// CHECK-DAG: @constant_var = internal global i32 +__constant__ int constant_var; + +// CHECK-DAG: @shared_var = internal global i32 +__shared__ int shared_var; + +// Make sure host globals don't get internalized... +// CHECK-DAG: @host_var = global i32 +int host_var; +// ... and that extern vars remain external. +// CHECK-DAG: @ext_host_var = external global i32 +extern int ext_host_var; + +// Shadows for external device-side variables are *definitions* of +// those variables. +// CHECK-DAG: @ext_device_var = internal global i32 +extern __device__ int ext_device_var; +// CHECK-DAG: @ext_device_var = internal global i32 +extern __constant__ int ext_constant_var; + +void use_pointers() { + int *p; + p = &device_var; + p = &constant_var; + p = &shared_var; + p = &host_var; + p = &ext_device_var; + p = &ext_constant_var; + p = &ext_host_var; +} + // Make sure that all parts of GPU code init/cleanup are there: // * constant unnamed string with the kernel name // CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00" @@ -32,9 +66,14 @@ __global__ void kernelfunc(int i, int j, int k) {} // CHECK: call{{.*}}kernelfunc void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } -// Test that we've built a function to register kernels -// CHECK: define internal void @__cuda_register_kernels +// Test that we've built a function to register kernels and global vars. +// CHECK: define internal void @__cuda_register_globals // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0 +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0 +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0 +// CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0 +// CHECK: ret void // Test that we've built contructor.. // CHECK: define internal void @__cuda_module_ctor @@ -42,8 +81,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper // .. stores return value in __cuda_gpubin_handle // CHECK-NEXT: store{{.*}}__cuda_gpubin_handle -// .. and then calls __cuda_register_kernels -// CHECK-NEXT: call void @__cuda_register_kernels +// .. and then calls __cuda_register_globals +// CHECK-NEXT: call void @__cuda_register_globals // Test that we've created destructor. // CHECK: define internal void @__cuda_module_dtor diff --git a/test/CodeGenCUDA/filter-decl.cu b/test/CodeGenCUDA/filter-decl.cu index 023ae61f3a..bc744a07a3 100644 --- a/test/CodeGenCUDA/filter-decl.cu +++ b/test/CodeGenCUDA/filter-decl.cu @@ -9,15 +9,15 @@ // CHECK-DEVICE-NOT: module asm "file scope asm is host only" __asm__("file scope asm is host only"); -// CHECK-HOST-NOT: constantdata = externally_initialized global +// CHECK-HOST: constantdata = internal global // CHECK-DEVICE: constantdata = externally_initialized global __constant__ char constantdata[256]; -// CHECK-HOST-NOT: devicedata = externally_initialized global +// CHECK-HOST: devicedata = internal global // CHECK-DEVICE: devicedata = externally_initialized global __device__ char devicedata[256]; -// CHECK-HOST-NOT: shareddata = global +// CHECK-HOST: shareddata = internal global // CHECK-DEVICE: shareddata = global __shared__ char shareddata[256]; |