4 files changed, 57 insertions, 13 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 2ed33b00bc..5da22c3e6c 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3178,6 +3178,31 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
       return ABIArgInfo::getDirect(CoerceTy);
     }
 
+    // If an aggregate may end up fully in registers, we do not
+    // use the ByVal method, but pass the aggregate as array.
+    // This is usually beneficial since we avoid forcing the
+    // back-end to store the argument to memory.
+    uint64_t Bits = getContext().getTypeSize(Ty);
+    if (Bits > 0 && Bits <= 8 * GPRBits) {
+      llvm::Type *CoerceTy;
+
+      // Types up to 8 bytes are passed as integer type (which will be
+      // properly aligned in the argument save area doubleword).
+      if (Bits <= GPRBits)
+        CoerceTy = llvm::IntegerType::get(getVMContext(),
+                                          llvm::RoundUpToAlignment(Bits, 8));
+      // Larger types are passed as arrays, with the base type selected
+      // according to the required alignment in the save area.
+      else {
+        uint64_t RegBits = ABIAlign * 8;
+        uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits;
+        llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
+        CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
+      }
+
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
     // All other aggregates are passed ByVal.
     return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
                                    /*Realign=*/TyAlign > ABIAlign);
diff --git a/test/CodeGen/ppc64-align-struct.c b/test/CodeGen/ppc64-align-struct.c
index f820d2fbba..a50c849b45 100644
--- a/test/CodeGen/ppc64-align-struct.c
+++ b/test/CodeGen/ppc64-align-struct.c
@@ -6,30 +6,48 @@ struct test1 { int x; int y; };
 struct test2 { int x; int y; } __attribute__((aligned (16)));
 struct test3 { int x; int y; } __attribute__((aligned (32)));
 struct test4 { int x; int y; int z; };
+struct test5 { int x[17]; };
+struct test6 { int x[17]; } __attribute__((aligned (16)));
+struct test7 { int x[17]; } __attribute__((aligned (32)));
 
-// CHECK: define void @test1(i32 signext %x, %struct.test1* byval align 8 %y)
+// CHECK: define void @test1(i32 signext %x, i64 %y.coerce)
 void test1 (int x, struct test1 y)
 {
 }
 
-// CHECK: define void @test2(i32 signext %x, %struct.test2* byval align 16 %y)
+// CHECK: define void @test2(i32 signext %x, [1 x i128] %y.coerce)
 void test2 (int x, struct test2 y)
 {
 }
 
-// This case requires run-time realignment of the incoming struct
-// CHECK: define void @test3(i32 signext %x, %struct.test3* byval align 16)
-// CHECK: %y = alloca %struct.test3, align 32
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK: define void @test3(i32 signext %x, [2 x i128] %y.coerce)
 void test3 (int x, struct test3 y)
 {
 }
 
-// CHECK: define void @test4(i32 signext %x, %struct.test4* byval align 8 %y)
+// CHECK: define void @test4(i32 signext %x, [2 x i64] %y.coerce)
 void test4 (int x, struct test4 y)
 {
 }
 
+// CHECK: define void @test5(i32 signext %x, %struct.test5* byval align 8 %y)
+void test5 (int x, struct test5 y)
+{
+}
+
+// CHECK: define void @test6(i32 signext %x, %struct.test6* byval align 16 %y)
+void test6 (int x, struct test6 y)
+{
+}
+
+// This case requires run-time realignment of the incoming struct
+// CHECK: define void @test7(i32 signext %x, %struct.test7* byval align 16)
+// CHECK: %y = alloca %struct.test7, align 32
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+void test7 (int x, struct test7 y)
+{
+}
+
 // CHECK: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
 // CHECK: %[[CUR:[^ ]+]] = load i8** %ap
 // CHECK: %[[NEXT:[^ ]+]] = getelementptr i8* %[[CUR]], i64 8
diff --git a/test/CodeGen/ppc64-vector.c b/test/CodeGen/ppc64-vector.c
index 3ff07a4d41..f0211f0ec1 100644
--- a/test/CodeGen/ppc64-vector.c
+++ b/test/CodeGen/ppc64-vector.c
@@ -45,7 +45,7 @@ v16i16 test_v16i16(v16i16 x)
   return x;
 }
 
-// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, %struct.v16i16* byval align 16)
+// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, [2 x i128] %x.coerce)
 struct v16i16 test_struct_v16i16(struct v16i16 x)
 {
   return x;
diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c
index cb19dd31f2..acf34a8a80 100644
--- a/test/CodeGen/ppc64le-aggregates.c
+++ b/test/CodeGen/ppc64le-aggregates.c
@@ -40,7 +40,7 @@ struct f7 func_f7(struct f7 x) { return x; }
 // CHECK: define [8 x float] @func_f8([8 x float] %x.coerce)
 struct f8 func_f8(struct f8 x) { return x; }
 
-// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, %struct.f9* byval align 8 %x)
+// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, [5 x i64] %x.coerce)
 struct f9 func_f9(struct f9 x) { return x; }
 
 // CHECK: define [2 x float] @func_fab([2 x float] %x.coerce)
@@ -98,10 +98,11 @@ struct f8 global_f8;
 void call_f8(void) { global_f8 = func_f8(global_f8); }
 
 // CHECK-LABEL: @call_f9
-// CHECK: %[[TMP1:[^ ]+]] = alloca %struct.f9, align 8
-// CHECK: %[[TMP2:[^ ]+]] = bitcast %struct.f9* %[[TMP1]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
-// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, %struct.f9* byval align 8 %[[TMP1]])
+// CHECK: %[[TMP1:[^ ]+]] = alloca [5 x i64]
+// CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 1, i1 false)
+// CHECK: %[[TMP3:[^ ]+]] = load [5 x i64]* %[[TMP1]]
+// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
 void call_f9(void) { global_f9 = func_f9(global_f9); }