diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
commit | d83cd0531d86365ecc13222cb3fe04af23d19448 (patch) | |
tree | 28ce6aae04ae4daab227f17205cd276aeb517ff2 /test/OpenMP/parallel_private_codegen.cpp | |
parent | bde095a7a76a2e76167f10a1f754257baa27a038 (diff) |
[OPENMP] Outlined function for parallel and other regions with list of captured variables.
Currently all variables used in OpenMP regions are captured into a record and passed to outlined functions in this record. It may result in some poor performance because of too complex analysis later in optimization passes. Patch makes to emit outlined functions for parallel-based regions with a list of captured variables. It reduces code for 2*n GEPs, stores and loads at least.
Codegen for task-based regions remains unchanged because runtime requires that all captured variables are passed in captured record.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@247251 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/OpenMP/parallel_private_codegen.cpp')
-rw-r--r-- | test/OpenMP/parallel_private_codegen.cpp | 23 |
1 files changed, 8 insertions, 15 deletions
diff --git a/test/OpenMP/parallel_private_codegen.cpp b/test/OpenMP/parallel_private_codegen.cpp index d2d02a7c3d..616d50fae9 100644 --- a/test/OpenMP/parallel_private_codegen.cpp +++ b/test/OpenMP/parallel_private_codegen.cpp @@ -19,9 +19,7 @@ struct S { volatile int g = 1212; // CHECK: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 } // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} } -// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 } template <typename T> T tmain() { S<T> test; @@ -45,13 +43,11 @@ int main() { [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( // LAMBDA-NOT: = getelementptr inbounds %{{.+}}, - // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8* - // LAMBDA: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) + // LAMBDA: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}) #pragma omp parallel private(g) { - // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], g = 1; // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 @@ -76,13 +72,11 @@ int main() { ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* // BLOCKS-NOT: = getelementptr inbounds %{{.+}}, - // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8* - // BLOCKS: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]]) + // BLOCKS: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}) #pragma omp parallel private(g) { - // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, - // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], g = 1; // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], // BLOCKS-NOT: [[G]]{{[[^:word:]]}} @@ -118,13 +112,12 @@ int main() { // CHECK: define i{{[0-9]+}} @main() // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]* -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // CHECK: ret // -// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]], @@ -145,11 +138,11 @@ int main() { // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret // -// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}}) +// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}) // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], |