Creating branches/google/stable and tags/google/stable/2019-05-14 from r360103upstream/google/stable

git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/google/stable@360714 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jordan Rupprecht <rupprecht@google.com> 2019-05-14 21:58:59 +0000
committer: Jordan Rupprecht <rupprecht@google.com> 2019-05-14 21:58:59 +0000
commit: b35a2aa71f76a334a9c98c0a3c3995b5d902d2b9 (patch)
tree: cdff4a5d1a715d4ad622fd8f190128b54bebe440 /test/SemaCUDA
parent: 3748d41833787fcbf59cc5624e8d2b042a8991bc (diff)
parent: 741e05796da92b46d4f7bcbee00702ff37df6489 (diff)
11 files changed, 288 insertions, 12 deletions
diff --git a/test/SemaCUDA/Inputs/cuda.h b/test/SemaCUDA/Inputs/cuda.h
index 4544369411..2600bfa9c4 100644
--- a/test/SemaCUDA/Inputs/cuda.h
+++ b/test/SemaCUDA/Inputs/cuda.h
@@ -18,9 +18,17 @@ struct dim3 {
 };
 
 typedef struct cudaStream *cudaStream_t;
-
-int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
-                      cudaStream_t stream = 0);
+typedef enum cudaError {} cudaError_t;
+
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+                                 size_t sharedSize = 0,
+                                 cudaStream_t stream = 0);
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+                                           size_t sharedSize = 0,
+                                           cudaStream_t stream = 0);
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+                                        dim3 blockDim, void **args,
+                                        size_t sharedMem, cudaStream_t stream);
 
 // Host- and device-side placement new overloads.
 void *operator new(__SIZE_TYPE__, void *p) { return p; }
diff --git a/test/SemaCUDA/amdgpu-attrs.cu b/test/SemaCUDA/amdgpu-attrs.cu
index 63abda9eea..4811ef796c 100644
--- a/test/SemaCUDA/amdgpu-attrs.cu
+++ b/test/SemaCUDA/amdgpu-attrs.cu
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
 #include "Inputs/cuda.h"
 
 
@@ -78,3 +78,119 @@ __global__ void vec_type_hint_int() {}
 // expected-error@+2{{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel function}}
 __attribute__((intel_reqd_sub_group_size(64)))
 __global__ void intel_reqd_sub_group_size_64() {}
+
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("32", 64)))
+__global__ void non_int_min_flat_work_group_size_32_64() {}
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, "64")))
+__global__ void non_int_max_flat_work_group_size_32_64() {}
+
+int nc_min = 32, nc_max = 64;
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(nc_min, 64)))
+__global__ void non_cint_min_flat_work_group_size_32_64() {}
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, nc_max)))
+__global__ void non_cint_max_flat_work_group_size_32_64() {}
+
+const int c_min = 16, c_max = 32;
+__attribute__((amdgpu_flat_work_group_size(c_min * 2, 64)))
+__global__ void cint_min_flat_work_group_size_32_64() {}
+__attribute__((amdgpu_flat_work_group_size(32, c_max * 2)))
+__global__ void cint_max_flat_work_group_size_32_64() {}
+
+// expected-error@+3{{'T' does not refer to a value}}
+// expected-note@+1{{declared here}}
+template<typename T>
+__attribute__((amdgpu_flat_work_group_size(T, 64)))
+__global__ void template_class_min_flat_work_group_size_32_64() {}
+// expected-error@+3{{'T' does not refer to a value}}
+// expected-note@+1{{declared here}}
+template<typename T>
+__attribute__((amdgpu_flat_work_group_size(32, T)))
+__global__ void template_class_max_flat_work_group_size_32_64() {}
+
+template<unsigned a, unsigned b>
+__attribute__((amdgpu_flat_work_group_size(a, b)))
+__global__ void template_flat_work_group_size_32_64() {}
+template __global__ void template_flat_work_group_size_32_64<32, 64>();
+
+template<unsigned a, unsigned b, unsigned c>
+__attribute__((amdgpu_flat_work_group_size(a + b, b + c)))
+__global__ void template_complex_flat_work_group_size_32_64() {}
+template __global__ void template_complex_flat_work_group_size_32_64<16, 16, 48>();
+
+unsigned ipow2(unsigned n) { return n == 0 ? 1 : 2 * ipow2(n - 1); }
+constexpr unsigned ce_ipow2(unsigned n) { return n == 0 ? 1 : 2 * ce_ipow2(n - 1); }
+
+__attribute__((amdgpu_flat_work_group_size(ce_ipow2(5), ce_ipow2(6))))
+__global__ void cexpr_flat_work_group_size_32_64() {}
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(ipow2(5), 64)))
+__global__ void non_cexpr_min_flat_work_group_size_32_64() {}
+// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, ipow2(6))))
+__global__ void non_cexpr_max_flat_work_group_size_32_64() {}
+
+// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu("2")))
+__global__ void non_int_min_waves_per_eu_2() {}
+// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu(2, "4")))
+__global__ void non_int_max_waves_per_eu_2_4() {}
+
+// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu(nc_min)))
+__global__ void non_cint_min_waves_per_eu_2() {}
+// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu(2, nc_max)))
+__global__ void non_cint_min_waves_per_eu_2_4() {}
+
+__attribute__((amdgpu_waves_per_eu(c_min / 8)))
+__global__ void cint_min_waves_per_eu_2() {}
+__attribute__((amdgpu_waves_per_eu(c_min / 8, c_max / 8)))
+__global__ void cint_min_waves_per_eu_2_4() {}
+
+// expected-error@+3{{'T' does not refer to a value}}
+// expected-note@+1{{declared here}}
+template<typename T>
+__attribute__((amdgpu_waves_per_eu(T)))
+__global__ void cint_min_waves_per_eu_2() {}
+// expected-error@+3{{'T' does not refer to a value}}
+// expected-note@+1{{declared here}}
+template<typename T>
+__attribute__((amdgpu_waves_per_eu(2, T)))
+__global__ void cint_min_waves_per_eu_2_4() {}
+
+template<unsigned a>
+__attribute__((amdgpu_waves_per_eu(a)))
+__global__ void template_waves_per_eu_2() {}
+template __global__ void template_waves_per_eu_2<2>();
+
+template<unsigned a, unsigned b>
+__attribute__((amdgpu_waves_per_eu(a, b)))
+__global__ void template_waves_per_eu_2_4() {}
+template __global__ void template_waves_per_eu_2_4<2, 4>();
+
+template<unsigned a, unsigned b, unsigned c>
+__attribute__((amdgpu_waves_per_eu(a + b, c - b)))
+__global__ void template_complex_waves_per_eu_2_4() {}
+template __global__ void template_complex_waves_per_eu_2_4<1, 1, 5>();
+
+// expected-error@+2{{expression contains unexpanded parameter pack 'Args'}}
+template<unsigned... Args>
+__attribute__((amdgpu_waves_per_eu(Args)))
+__global__ void template_waves_per_eu_2() {}
+template __global__ void template_waves_per_eu_2<2, 4>();
+
+__attribute__((amdgpu_waves_per_eu(ce_ipow2(1))))
+__global__ void cexpr_waves_per_eu_2() {}
+__attribute__((amdgpu_waves_per_eu(ce_ipow2(1), ce_ipow2(2))))
+__global__ void cexpr_waves_per_eu_2_4() {}
+// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu(ipow2(1))))
+__global__ void non_cexpr_waves_per_eu_2() {}
+// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}}
+__attribute__((amdgpu_waves_per_eu(2, ipow2(2))))
+__global__ void non_cexpr_waves_per_eu_2_4() {}
diff --git a/test/SemaCUDA/amdgpu-size_t.cu b/test/SemaCUDA/amdgpu-size_t.cu
new file mode 100644
index 0000000000..66999782aa
--- /dev/null
+++ b/test/SemaCUDA/amdgpu-size_t.cu
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only -verify %s
+
+// expected-no-diagnostics
+typedef unsigned __int64 size_t;
+typedef __int64 intptr_t;
+typedef unsigned __int64 uintptr_t;
+
diff --git a/test/SemaCUDA/amdgpu-windows-vectorcall.cu b/test/SemaCUDA/amdgpu-windows-vectorcall.cu
new file mode 100644
index 0000000000..7636572f69
--- /dev/null
+++ b/test/SemaCUDA/amdgpu-windows-vectorcall.cu
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only -verify %s
+
+__cdecl void hostf1();
+__vectorcall void (*hostf2)() = hostf1; // expected-error {{cannot initialize a variable of type 'void ((*))() __attribute__((vectorcall))' with an lvalue of type 'void () __attribute__((cdecl))'}}
diff --git a/test/SemaCUDA/asm_delayed_diags.cu b/test/SemaCUDA/asm_delayed_diags.cu
new file mode 100644
index 0000000000..457054f608
--- /dev/null
+++ b/test/SemaCUDA/asm_delayed_diags.cu
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DHOST -triple x86_64-unknown-linux-gnu -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DHOST -DHOST_USED -triple x86_64-unknown-linux-gnu -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s -DDEVICE_NOT_USED -triple nvptx-unknown-cuda -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s -DDEVICE -triple nvptx-unknown-cuda -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s -DDEVICE -DDEVICE_USED -triple nvptx-unknown-cuda -Wuninitialized
+
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+#if (defined(HOST) && !defined(HOST_USED)) || defined(DEVICE_NOT_USED)
+// expected-no-diagnostics
+#endif
+
+#include "Inputs/cuda.h"
+
+static __device__ __host__ void t1(int r) {
+  __asm__("PR3908 %[lf] %[xx] %[li] %[r]"
+          : [ r ] "+r"(r)
+          : [ lf ] "mx"(0), [ li ] "mr"(0), [ xx ] "x"((double)(0)));
+}
+
+static __device__ __host__ unsigned t2(signed char input) {
+  unsigned output;
+  __asm__("xyz"
+          : "=a"(output)
+          : "0"(input));
+  return output;
+}
+
+static __device__ __host__ double t3(double x) {
+  register long double result;
+  __asm __volatile("frndint"
+                   : "=t"(result)
+                   : "0"(x));
+  return result;
+}
+
+static __device__ __host__ unsigned char t4(unsigned char a, unsigned char b) {
+  unsigned int la = a;
+  unsigned int lb = b;
+  unsigned int bigres;
+  unsigned char res;
+  __asm__("0:\n1:\n"
+          : [ bigres ] "=la"(bigres)
+          : [ la ] "0"(la), [ lb ] "c"(lb)
+          : "edx", "cc");
+  res = bigres;
+  return res;
+}
+
+static __device__ __host__ void t5(void) {
+  __asm__ __volatile__(
+      "finit"
+      :
+      :
+      : "st", "st(1)", "st(2)", "st(3)",
+        "st(4)", "st(5)", "st(6)", "st(7)",
+        "fpsr", "fpcr");
+}
+
+typedef long long __m256i __attribute__((__vector_size__(32)));
+static __device__ __host__ void t6(__m256i *p) {
+  __asm__ volatile("vmovaps  %0, %%ymm0" ::"m"(*(__m256i *)p)
+                   : "ymm0");
+}
+
+static __device__ __host__ void t7(__m256i *p) {
+  __asm__ volatile("vmovaps  %0, %%ymm0" ::"m"(*(__m256i *)p)
+                   : "r0");
+}
+
+#ifdef DEVICE
+__device__ int m() {
+  t1(0);
+  t2(0);
+  t3(0);
+  t4(0, 0);
+  t5();
+  t6(0);
+#ifdef DEVICE_USED
+  t7(0);
+#endif // DEVICE_USED
+  return 0;
+}
+#endif // DEVICE
+
+#ifdef HOST
+__host__ int main() {
+  t1(0);
+  t2(0);
+  t3(0);
+  t4(0, 0);
+  t5();
+  t6(0);
+#ifdef HOST_USED
+  t7(0);
+#endif // HOST_USED
+  return 0;
+}
+#endif // HOST
+
+#if defined(HOST_USED)
+// expected-error@69 {{unknown register name 'r0' in asm}}
+// expected-note@96 {{called by 'main'}}
+#elif defined(DEVICE)
+// expected-error@19 {{invalid input constraint 'mx' in asm}}
+// expected-error@25 {{invalid output constraint '=a' in asm}}
+// expected-error@33 {{invalid output constraint '=t' in asm}}
+// expected-error@44 {{invalid output constraint '=la' in asm}}
+// expected-error@56 {{unknown register name 'st' in asm}}
+// expected-error@64 {{unknown register name 'ymm0' in asm}}
+// expected-note@74 {{called by 'm'}}
+// expected-note@75 {{called by 'm'}}
+// expected-note@76 {{called by 'm'}}
+// expected-note@77 {{called by 'm'}}
+// expected-note@78 {{called by 'm'}}
+// expected-note@79 {{called by 'm'}}
+#endif
diff --git a/test/SemaCUDA/call-device-fn-from-host.cu b/test/SemaCUDA/call-device-fn-from-host.cu
index 26215d581d..ba1ce86020 100644
--- a/test/SemaCUDA/call-device-fn-from-host.cu
+++ b/test/SemaCUDA/call-device-fn-from-host.cu
@@ -37,7 +37,7 @@ __host__ __device__ void T::hd3() {
 }
 
 template <typename T> __host__ __device__ void hd2() { device_fn(); }
-// expected-error@-1 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __device__ function 'device_fn' in __host__ __device__ function}}
 void host_fn() { hd2<int>(); }
 
 __host__ __device__ void hd() { device_fn(); }
@@ -90,3 +90,8 @@ __host__ __device__ void fn_ptr_template() {
 static __host__ __device__ void hd_func() { device_fn(); }
 __global__ void kernel() { hd_func(); }
 void host_func(void) { kernel<<<1, 1>>>(); }
+
+// Should allow host function call kernel template with device function argument.
+__device__ void f();
+template<void(*F)()> __global__ void t() { F(); }
+__host__ void g() { t<f><<<1,1>>>(); }
diff --git a/test/SemaCUDA/call-host-fn-from-device.cu b/test/SemaCUDA/call-host-fn-from-device.cu
index acdd291b66..c5bbd63d8e 100644
--- a/test/SemaCUDA/call-host-fn-from-device.cu
+++ b/test/SemaCUDA/call-host-fn-from-device.cu
@@ -56,14 +56,14 @@ __host__ __device__ void T::hd3() {
 }
 
 template <typename T> __host__ __device__ void hd2() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 __global__ void kernel() { hd2<int>(); }
 
 __host__ __device__ void hd() { host_fn(); }
 // expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 
 template <typename T> __host__ __device__ void hd3() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
+// expected-error@-1 2 {{reference to __host__ function 'host_fn' in __host__ __device__ function}}
 __device__ void device_fn() { hd3<int>(); }
 
 // No error because this is never instantiated.
diff --git a/test/SemaCUDA/config-type.cu b/test/SemaCUDA/config-type.cu
index a469d38d3e..a122c4539a 100644
--- a/test/SemaCUDA/config-type.cu
+++ b/test/SemaCUDA/config-type.cu
@@ -1,3 +1,7 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -target-sdk-version=8.0 -fsyntax-only -verify=legacy-launch %s
+// RUN: %clang_cc1 -target-sdk-version=9.2 -fsyntax-only -verify=new-launch %s
 
-void cudaConfigureCall(unsigned gridSize, unsigned blockSize); // expected-error {{must have scalar return type}}
+// legacy-launch-error@+1 {{must have scalar return type}}
+void cudaConfigureCall(unsigned gridSize, unsigned blockSize);
+// new-launch-error@+1 {{must have scalar return type}}
+void __cudaPushCallConfiguration(unsigned gridSize, unsigned blockSize);
diff --git a/test/SemaCUDA/cuda-inherits-calling-conv.cu b/test/SemaCUDA/cuda-inherits-calling-conv.cu
index 67c438fa62..881f2945b1 100644
--- a/test/SemaCUDA/cuda-inherits-calling-conv.cu
+++ b/test/SemaCUDA/cuda-inherits-calling-conv.cu
@@ -24,7 +24,7 @@ struct Foo<T()> {};
 // expected-no-diagnostics
 #else
 // expected-error@+4 {{redefinition of 'Foo}}
-// expected-warning@+3 {{calling convention '__fastcall' ignored}}
+// expected-warning@+3 {{'__fastcall' calling convention ignored}}
 #endif
 template <class T>
 struct Foo<T __fastcall()> {};
diff --git a/test/SemaCUDA/float16.cu b/test/SemaCUDA/float16.cu
new file mode 100644
index 0000000000..a9cbe87f32
--- /dev/null
+++ b/test/SemaCUDA/float16.cu
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+// expected-no-diagnostics
+#include "Inputs/cuda.h"
+
+__device__ void f(_Float16 x);
+
+__device__ _Float16 x = 1.0f16;
diff --git a/test/SemaCUDA/vla.cu b/test/SemaCUDA/vla.cu
index b65ae5e5fe..cf3054cd8e 100644
--- a/test/SemaCUDA/vla.cu
+++ b/test/SemaCUDA/vla.cu
@@ -1,5 +1,9 @@
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -verify %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -verify -DHOST %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -verify -DHOST %s
+
+#ifndef __CUDA_ARCH__
+// expected-no-diagnostics
+#endif
 
 #include "Inputs/cuda.h"
 
@@ -8,7 +12,10 @@ void host(int n) {
 }
 
 __device__ void device(int n) {
-  int x[n];  // expected-error {{cannot use variable-length arrays in __device__ functions}}
+  int x[n];
+#ifdef __CUDA_ARCH__
+  // expected-error@-2 {{cannot use variable-length arrays in __device__ functions}}
+#endif
 }
 
 __host__ __device__ void hd(int n) {
author	Jordan Rupprecht <rupprecht@google.com>	2019-05-14 21:58:59 +0000
committer	Jordan Rupprecht <rupprecht@google.com>	2019-05-14 21:58:59 +0000
commit	b35a2aa71f76a334a9c98c0a3c3995b5d902d2b9 (patch)
tree	cdff4a5d1a715d4ad622fd8f190128b54bebe440 /test/SemaCUDA
parent	3748d41833787fcbf59cc5624e8d2b042a8991bc (diff)
parent	741e05796da92b46d4f7bcbee00702ff37df6489 (diff)