summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/TargetInfo.cpp178
-rw-r--r--test/CodeGen/ppc64le-aggregates.c422
2 files changed, 591 insertions, 9 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 4ccdedbd7f..2ed33b00bc 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -2898,12 +2898,24 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
namespace {
/// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
class PPC64_SVR4_ABIInfo : public DefaultABIInfo {
+public:
+ enum ABIKind {
+ ELFv1 = 0,
+ ELFv2
+ };
+
+private:
+ static const unsigned GPRBits = 64;
+ ABIKind Kind;
public:
- PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+ PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
+ : DefaultABIInfo(CGT), Kind(Kind) {}
bool isPromotableTypeForABI(QualType Ty) const;
bool isAlignedParamType(QualType Ty) const;
+ bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+ uint64_t &Members) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -2941,8 +2953,9 @@ public:
class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
public:
- PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT)
- : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT)) {}
+ PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
+ PPC64_SVR4_ABIInfo::ABIKind Kind)
+ : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind)) {}
int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
// This is recovered from gcc output.
@@ -3019,6 +3032,13 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
AlignAsType = EltType;
}
+ // Likewise for ELFv2 homogeneous aggregates.
+ const Type *Base = nullptr;
+ uint64_t Members = 0;
+ if (!AlignAsType && Kind == ELFv2 &&
+ isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members))
+ AlignAsType = Base;
+
// With special case aggregates, only vector base types need alignment.
if (AlignAsType)
return AlignAsType->isVectorType();
@@ -3031,6 +3051,99 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
return false;
}
+/// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
+/// aggregate. Base is set to the base element type, and Members is set
+/// to the number of base elements.
+bool
+PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
+ uint64_t &Members) const {
+ if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
+ uint64_t NElements = AT->getSize().getZExtValue();
+ if (NElements == 0)
+ return false;
+ if (!isHomogeneousAggregate(AT->getElementType(), Base, Members))
+ return false;
+ Members *= NElements;
+ } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return false;
+
+ Members = 0;
+ for (const auto *FD : RD->fields()) {
+ // Ignore (non-zero arrays of) empty records.
+ QualType FT = FD->getType();
+ while (const ConstantArrayType *AT =
+ getContext().getAsConstantArrayType(FT)) {
+ if (AT->getSize().getZExtValue() == 0)
+ return false;
+ FT = AT->getElementType();
+ }
+ if (isEmptyRecord(getContext(), FT, true))
+ continue;
+
+ // For compatibility with GCC, ignore empty bitfields in C++ mode.
+ if (getContext().getLangOpts().CPlusPlus &&
+ FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
+ continue;
+
+ uint64_t FldMembers;
+ if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers))
+ return false;
+
+ Members = (RD->isUnion() ?
+ std::max(Members, FldMembers) : Members + FldMembers);
+ }
+
+ if (!Base)
+ return false;
+
+ // Ensure there is no padding.
+ if (getContext().getTypeSize(Base) * Members !=
+ getContext().getTypeSize(Ty))
+ return false;
+ } else {
+ Members = 1;
+ if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
+ Members = 2;
+ Ty = CT->getElementType();
+ }
+
+ // Homogeneous aggregates for ELFv2 must have base types of float,
+ // double, long double, or 128-bit vectors.
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+ if (BT->getKind() != BuiltinType::Float &&
+ BT->getKind() != BuiltinType::Double &&
+ BT->getKind() != BuiltinType::LongDouble)
+ return false;
+ } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+ if (getContext().getTypeSize(VT) != 128)
+ return false;
+ } else {
+ return false;
+ }
+
+ // The base type must be the same for all members. Types that
+ // agree in both total size and mode (float vs. vector) are
+ // treated as being equivalent here.
+ const Type *TyPtr = Ty.getTypePtr();
+ if (!Base)
+ Base = TyPtr;
+
+ if (Base->isVectorType() != TyPtr->isVectorType() ||
+ getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
+ return false;
+ }
+
+ // Vector types require one register, floating point types require one
+ // or two registers depending on their size.
+ uint32_t NumRegs = Base->isVectorType() ? 1 :
+ (getContext().getTypeSize(Base) + 63) / 64;
+
+ // Homogeneous Aggregates may occupy at most 8 registers.
+ return (Members > 0 && Members * NumRegs <= 8);
+}
+
ABIArgInfo
PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
if (Ty->isAnyComplexType())
@@ -3054,6 +3167,18 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
uint64_t ABIAlign = isAlignedParamType(Ty)? 16 : 8;
uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
+
+ // ELFv2 homogeneous aggregates are passed as array types.
+ const Type *Base = nullptr;
+ uint64_t Members = 0;
+ if (Kind == ELFv2 &&
+ isHomogeneousAggregate(Ty, Base, Members)) {
+ llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
+ llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
+ return ABIArgInfo::getDirect(CoerceTy);
+ }
+
+ // All other aggregates are passed ByVal.
return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
/*Realign=*/TyAlign > ABIAlign);
}
@@ -3082,8 +3207,36 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
}
}
- if (isAggregateTypeForABI(RetTy))
+ if (isAggregateTypeForABI(RetTy)) {
+ // ELFv2 homogeneous aggregates are returned as array types.
+ const Type *Base = nullptr;
+ uint64_t Members = 0;
+ if (Kind == ELFv2 &&
+ isHomogeneousAggregate(RetTy, Base, Members)) {
+ llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
+ llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
+ return ABIArgInfo::getDirect(CoerceTy);
+ }
+
+ // ELFv2 small aggregates are returned in up to two registers.
+ uint64_t Bits = getContext().getTypeSize(RetTy);
+ if (Kind == ELFv2 && Bits <= 2 * GPRBits) {
+ if (Bits == 0)
+ return ABIArgInfo::getIgnore();
+
+ llvm::Type *CoerceTy;
+ if (Bits > GPRBits) {
+ CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
+ CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, NULL);
+ } else
+ CoerceTy = llvm::IntegerType::get(getVMContext(),
+ llvm::RoundUpToAlignment(Bits, 8));
+ return ABIArgInfo::getDirect(CoerceTy);
+ }
+
+ // All other aggregates are returned indirectly.
return ABIArgInfo::getIndirect(0);
+ }
return (isPromotableTypeForABI(RetTy) ?
ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
@@ -6609,13 +6762,20 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::ppc:
return *(TheTargetCodeGenInfo = new PPC32TargetCodeGenInfo(Types));
case llvm::Triple::ppc64:
- if (Triple.isOSBinFormatELF())
- return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types));
- else
+ if (Triple.isOSBinFormatELF()) {
+ // FIXME: Should be switchable via command-line option.
+ PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
+ return *(TheTargetCodeGenInfo =
+ new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+ } else
return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types));
- case llvm::Triple::ppc64le:
+ case llvm::Triple::ppc64le: {
assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
- return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types));
+ // FIXME: Should be switchable via command-line option.
+ PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
+ return *(TheTargetCodeGenInfo =
+ new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+ }
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
diff --git a/test/CodeGen/ppc64le-aggregates.c b/test/CodeGen/ppc64le-aggregates.c
new file mode 100644
index 0000000000..cb19dd31f2
--- /dev/null
+++ b/test/CodeGen/ppc64le-aggregates.c
@@ -0,0 +1,422 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -faltivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// Test homogeneous float aggregate passing and returning.
+
+struct f1 { float f[1]; };
+struct f2 { float f[2]; };
+struct f3 { float f[3]; };
+struct f4 { float f[4]; };
+struct f5 { float f[5]; };
+struct f6 { float f[6]; };
+struct f7 { float f[7]; };
+struct f8 { float f[8]; };
+struct f9 { float f[9]; };
+
+struct fab { float a; float b; };
+struct fabc { float a; float b; float c; };
+
+// CHECK: define [1 x float] @func_f1(float inreg %x.coerce)
+struct f1 func_f1(struct f1 x) { return x; }
+
+// CHECK: define [2 x float] @func_f2([2 x float] %x.coerce)
+struct f2 func_f2(struct f2 x) { return x; }
+
+// CHECK: define [3 x float] @func_f3([3 x float] %x.coerce)
+struct f3 func_f3(struct f3 x) { return x; }
+
+// CHECK: define [4 x float] @func_f4([4 x float] %x.coerce)
+struct f4 func_f4(struct f4 x) { return x; }
+
+// CHECK: define [5 x float] @func_f5([5 x float] %x.coerce)
+struct f5 func_f5(struct f5 x) { return x; }
+
+// CHECK: define [6 x float] @func_f6([6 x float] %x.coerce)
+struct f6 func_f6(struct f6 x) { return x; }
+
+// CHECK: define [7 x float] @func_f7([7 x float] %x.coerce)
+struct f7 func_f7(struct f7 x) { return x; }
+
+// CHECK: define [8 x float] @func_f8([8 x float] %x.coerce)
+struct f8 func_f8(struct f8 x) { return x; }
+
+// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, %struct.f9* byval align 8 %x)
+struct f9 func_f9(struct f9 x) { return x; }
+
+// CHECK: define [2 x float] @func_fab([2 x float] %x.coerce)
+struct fab func_fab(struct fab x) { return x; }
+
+// CHECK: define [3 x float] @func_fabc([3 x float] %x.coerce)
+struct fabc func_fabc(struct fabc x) { return x; }
+
+// CHECK-LABEL: @call_f1
+// CHECK: %[[TMP:[^ ]+]] = load float* getelementptr inbounds (%struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
+// CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
+struct f1 global_f1;
+void call_f1(void) { global_f1 = func_f1(global_f1); }
+
+// CHECK-LABEL: @call_f2
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float]* getelementptr inbounds (%struct.f2* @global_f2, i32 0, i32 0), align 1
+// CHECK: call [2 x float] @func_f2([2 x float] %[[TMP]])
+struct f2 global_f2;
+void call_f2(void) { global_f2 = func_f2(global_f2); }
+
+// CHECK-LABEL: @call_f3
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float]* getelementptr inbounds (%struct.f3* @global_f3, i32 0, i32 0), align 1
+// CHECK: call [3 x float] @func_f3([3 x float] %[[TMP]])
+struct f3 global_f3;
+void call_f3(void) { global_f3 = func_f3(global_f3); }
+
+// CHECK-LABEL: @call_f4
+// CHECK: %[[TMP:[^ ]+]] = load [4 x float]* getelementptr inbounds (%struct.f4* @global_f4, i32 0, i32 0), align 1
+// CHECK: call [4 x float] @func_f4([4 x float] %[[TMP]])
+struct f4 global_f4;
+void call_f4(void) { global_f4 = func_f4(global_f4); }
+
+// CHECK-LABEL: @call_f5
+// CHECK: %[[TMP:[^ ]+]] = load [5 x float]* getelementptr inbounds (%struct.f5* @global_f5, i32 0, i32 0), align 1
+// CHECK: call [5 x float] @func_f5([5 x float] %[[TMP]])
+struct f5 global_f5;
+void call_f5(void) { global_f5 = func_f5(global_f5); }
+
+// CHECK-LABEL: @call_f6
+// CHECK: %[[TMP:[^ ]+]] = load [6 x float]* getelementptr inbounds (%struct.f6* @global_f6, i32 0, i32 0), align 1
+// CHECK: call [6 x float] @func_f6([6 x float] %[[TMP]])
+struct f6 global_f6;
+void call_f6(void) { global_f6 = func_f6(global_f6); }
+
+// CHECK-LABEL: @call_f7
+// CHECK: %[[TMP:[^ ]+]] = load [7 x float]* getelementptr inbounds (%struct.f7* @global_f7, i32 0, i32 0), align 1
+// CHECK: call [7 x float] @func_f7([7 x float] %[[TMP]])
+struct f7 global_f7;
+void call_f7(void) { global_f7 = func_f7(global_f7); }
+
+// CHECK-LABEL: @call_f8
+// CHECK: %[[TMP:[^ ]+]] = load [8 x float]* getelementptr inbounds (%struct.f8* @global_f8, i32 0, i32 0), align 1
+// CHECK: call [8 x float] @func_f8([8 x float] %[[TMP]])
+struct f8 global_f8;
+void call_f8(void) { global_f8 = func_f8(global_f8); }
+
+// CHECK-LABEL: @call_f9
+// CHECK: %[[TMP1:[^ ]+]] = alloca %struct.f9, align 8
+// CHECK: %[[TMP2:[^ ]+]] = bitcast %struct.f9* %[[TMP1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
+// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, %struct.f9* byval align 8 %[[TMP1]])
+struct f9 global_f9;
+void call_f9(void) { global_f9 = func_f9(global_f9); }
+
+// CHECK-LABEL: @call_fab
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float]* bitcast (%struct.fab* @global_fab to [2 x float]*)
+// CHECK: call [2 x float] @func_fab([2 x float] %[[TMP]])
+struct fab global_fab;
+void call_fab(void) { global_fab = func_fab(global_fab); }
+
+// CHECK-LABEL: @call_fabc
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float]* bitcast (%struct.fabc* @global_fabc to [3 x float]*)
+// CHECK: call [3 x float] @func_fabc([3 x float] %[[TMP]])
+struct fabc global_fabc;
+void call_fabc(void) { global_fabc = func_fabc(global_fabc); }
+
+
+// Test homogeneous vector aggregate passing and returning.
+
+struct v1 { vector int v[1]; };
+struct v2 { vector int v[2]; };
+struct v3 { vector int v[3]; };
+struct v4 { vector int v[4]; };
+struct v5 { vector int v[5]; };
+struct v6 { vector int v[6]; };
+struct v7 { vector int v[7]; };
+struct v8 { vector int v[8]; };
+struct v9 { vector int v[9]; };
+
+struct vab { vector int a; vector int b; };
+struct vabc { vector int a; vector int b; vector int c; };
+
+// CHECK: define [1 x <4 x i32>] @func_v1(<4 x i32> inreg %x.coerce)
+struct v1 func_v1(struct v1 x) { return x; }
+
+// CHECK: define [2 x <4 x i32>] @func_v2([2 x <4 x i32>] %x.coerce)
+struct v2 func_v2(struct v2 x) { return x; }
+
+// CHECK: define [3 x <4 x i32>] @func_v3([3 x <4 x i32>] %x.coerce)
+struct v3 func_v3(struct v3 x) { return x; }
+
+// CHECK: define [4 x <4 x i32>] @func_v4([4 x <4 x i32>] %x.coerce)
+struct v4 func_v4(struct v4 x) { return x; }
+
+// CHECK: define [5 x <4 x i32>] @func_v5([5 x <4 x i32>] %x.coerce)
+struct v5 func_v5(struct v5 x) { return x; }
+
+// CHECK: define [6 x <4 x i32>] @func_v6([6 x <4 x i32>] %x.coerce)
+struct v6 func_v6(struct v6 x) { return x; }
+
+// CHECK: define [7 x <4 x i32>] @func_v7([7 x <4 x i32>] %x.coerce)
+struct v7 func_v7(struct v7 x) { return x; }
+
+// CHECK: define [8 x <4 x i32>] @func_v8([8 x <4 x i32>] %x.coerce)
+struct v8 func_v8(struct v8 x) { return x; }
+
+// CHECK: define void @func_v9(%struct.v9* noalias sret %agg.result, %struct.v9* byval align 16 %x)
+struct v9 func_v9(struct v9 x) { return x; }
+
+// CHECK: define [2 x <4 x i32>] @func_vab([2 x <4 x i32>] %x.coerce)
+struct vab func_vab(struct vab x) { return x; }
+
+// CHECK: define [3 x <4 x i32>] @func_vabc([3 x <4 x i32>] %x.coerce)
+struct vabc func_vabc(struct vabc x) { return x; }
+
+// CHECK-LABEL: @call_v1
+// CHECK: %[[TMP:[^ ]+]] = load <4 x i32>* getelementptr inbounds (%struct.v1* @global_v1, i32 0, i32 0, i32 0), align 1
+// CHECK: call [1 x <4 x i32>] @func_v1(<4 x i32> inreg %[[TMP]])
+struct v1 global_v1;
+void call_v1(void) { global_v1 = func_v1(global_v1); }
+
+// CHECK-LABEL: @call_v2
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>]* getelementptr inbounds (%struct.v2* @global_v2, i32 0, i32 0), align 1
+// CHECK: call [2 x <4 x i32>] @func_v2([2 x <4 x i32>] %[[TMP]])
+struct v2 global_v2;
+void call_v2(void) { global_v2 = func_v2(global_v2); }
+
+// CHECK-LABEL: @call_v3
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>]* getelementptr inbounds (%struct.v3* @global_v3, i32 0, i32 0), align 1
+// CHECK: call [3 x <4 x i32>] @func_v3([3 x <4 x i32>] %[[TMP]])
+struct v3 global_v3;
+void call_v3(void) { global_v3 = func_v3(global_v3); }
+
+// CHECK-LABEL: @call_v4
+// CHECK: %[[TMP:[^ ]+]] = load [4 x <4 x i32>]* getelementptr inbounds (%struct.v4* @global_v4, i32 0, i32 0), align 1
+// CHECK: call [4 x <4 x i32>] @func_v4([4 x <4 x i32>] %[[TMP]])
+struct v4 global_v4;
+void call_v4(void) { global_v4 = func_v4(global_v4); }
+
+// CHECK-LABEL: @call_v5
+// CHECK: %[[TMP:[^ ]+]] = load [5 x <4 x i32>]* getelementptr inbounds (%struct.v5* @global_v5, i32 0, i32 0), align 1
+// CHECK: call [5 x <4 x i32>] @func_v5([5 x <4 x i32>] %[[TMP]])
+struct v5 global_v5;
+void call_v5(void) { global_v5 = func_v5(global_v5); }
+
+// CHECK-LABEL: @call_v6
+// CHECK: %[[TMP:[^ ]+]] = load [6 x <4 x i32>]* getelementptr inbounds (%struct.v6* @global_v6, i32 0, i32 0), align 1
+// CHECK: call [6 x <4 x i32>] @func_v6([6 x <4 x i32>] %[[TMP]])
+struct v6 global_v6;
+void call_v6(void) { global_v6 = func_v6(global_v6); }
+
+// CHECK-LABEL: @call_v7
+// CHECK: %[[TMP:[^ ]+]] = load [7 x <4 x i32>]* getelementptr inbounds (%struct.v7* @global_v7, i32 0, i32 0), align 1
+// CHECK: call [7 x <4 x i32>] @func_v7([7 x <4 x i32>] %[[TMP]])
+struct v7 global_v7;
+void call_v7(void) { global_v7 = func_v7(global_v7); }
+
+// CHECK-LABEL: @call_v8
+// CHECK: %[[TMP:[^ ]+]] = load [8 x <4 x i32>]* getelementptr inbounds (%struct.v8* @global_v8, i32 0, i32 0), align 1
+// CHECK: call [8 x <4 x i32>] @func_v8([8 x <4 x i32>] %[[TMP]])
+struct v8 global_v8;
+void call_v8(void) { global_v8 = func_v8(global_v8); }
+
+// CHECK-LABEL: @call_v9
+// CHECK: call void @func_v9(%struct.v9* sret %{{[^ ]+}}, %struct.v9* byval align 16 @global_v9)
+struct v9 global_v9;
+void call_v9(void) { global_v9 = func_v9(global_v9); }
+
+// CHECK-LABEL: @call_vab
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>]* bitcast (%struct.vab* @global_vab to [2 x <4 x i32>]*)
+// CHECK: call [2 x <4 x i32>] @func_vab([2 x <4 x i32>] %[[TMP]])
+struct vab global_vab;
+void call_vab(void) { global_vab = func_vab(global_vab); }
+
+// CHECK-LABEL: @call_vabc
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>]* bitcast (%struct.vabc* @global_vabc to [3 x <4 x i32>]*)
+// CHECK: call [3 x <4 x i32>] @func_vabc([3 x <4 x i32>] %[[TMP]])
+struct vabc global_vabc;
+void call_vabc(void) { global_vabc = func_vabc(global_vabc); }
+
+
+// As clang extension, non-power-of-two vectors may also be part of
+// homogeneous aggregates.
+
+typedef float float3 __attribute__((vector_size (12)));
+
+struct v3f1 { float3 v[1]; };
+struct v3f2 { float3 v[2]; };
+struct v3f3 { float3 v[3]; };
+struct v3f4 { float3 v[4]; };
+struct v3f5 { float3 v[5]; };
+struct v3f6 { float3 v[6]; };
+struct v3f7 { float3 v[7]; };
+struct v3f8 { float3 v[8]; };
+struct v3f9 { float3 v[9]; };
+
+struct v3fab { float3 a; float3 b; };
+struct v3fabc { float3 a; float3 b; float3 c; };
+
+// CHECK: define [1 x <3 x float>] @func_v3f1(<3 x float> inreg %x.coerce)
+struct v3f1 func_v3f1(struct v3f1 x) { return x; }
+
+// CHECK: define [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %x.coerce)
+struct v3f2 func_v3f2(struct v3f2 x) { return x; }
+
+// CHECK: define [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %x.coerce)
+struct v3f3 func_v3f3(struct v3f3 x) { return x; }
+
+// CHECK: define [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %x.coerce)
+struct v3f4 func_v3f4(struct v3f4 x) { return x; }
+
+// CHECK: define [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %x.coerce)
+struct v3f5 func_v3f5(struct v3f5 x) { return x; }
+
+// CHECK: define [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %x.coerce)
+struct v3f6 func_v3f6(struct v3f6 x) { return x; }
+
+// CHECK: define [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %x.coerce)
+struct v3f7 func_v3f7(struct v3f7 x) { return x; }
+
+// CHECK: define [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %x.coerce)
+struct v3f8 func_v3f8(struct v3f8 x) { return x; }
+
+// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval align 16 %x)
+struct v3f9 func_v3f9(struct v3f9 x) { return x; }
+
+// CHECK: define [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %x.coerce)
+struct v3fab func_v3fab(struct v3fab x) { return x; }
+
+// CHECK: define [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %x.coerce)
+struct v3fabc func_v3fabc(struct v3fabc x) { return x; }
+
+// CHECK-LABEL: @call_v3f1
+// CHECK: %[[TMP:[^ ]+]] = load <3 x float>* getelementptr inbounds (%struct.v3f1* @global_v3f1, i32 0, i32 0, i32 0), align 1
+// CHECK: call [1 x <3 x float>] @func_v3f1(<3 x float> inreg %[[TMP]])
+struct v3f1 global_v3f1;
+void call_v3f1(void) { global_v3f1 = func_v3f1(global_v3f1); }
+
+// CHECK-LABEL: @call_v3f2
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>]* getelementptr inbounds (%struct.v3f2* @global_v3f2, i32 0, i32 0), align 1
+// CHECK: call [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %[[TMP]])
+struct v3f2 global_v3f2;
+void call_v3f2(void) { global_v3f2 = func_v3f2(global_v3f2); }
+
+// CHECK-LABEL: @call_v3f3
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>]* getelementptr inbounds (%struct.v3f3* @global_v3f3, i32 0, i32 0), align 1
+// CHECK: call [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %[[TMP]])
+struct v3f3 global_v3f3;
+void call_v3f3(void) { global_v3f3 = func_v3f3(global_v3f3); }
+
+// CHECK-LABEL: @call_v3f4
+// CHECK: %[[TMP:[^ ]+]] = load [4 x <3 x float>]* getelementptr inbounds (%struct.v3f4* @global_v3f4, i32 0, i32 0), align 1
+// CHECK: call [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %[[TMP]])
+struct v3f4 global_v3f4;
+void call_v3f4(void) { global_v3f4 = func_v3f4(global_v3f4); }
+
+// CHECK-LABEL: @call_v3f5
+// CHECK: %[[TMP:[^ ]+]] = load [5 x <3 x float>]* getelementptr inbounds (%struct.v3f5* @global_v3f5, i32 0, i32 0), align 1
+// CHECK: call [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %[[TMP]])
+struct v3f5 global_v3f5;
+void call_v3f5(void) { global_v3f5 = func_v3f5(global_v3f5); }
+
+// CHECK-LABEL: @call_v3f6
+// CHECK: %[[TMP:[^ ]+]] = load [6 x <3 x float>]* getelementptr inbounds (%struct.v3f6* @global_v3f6, i32 0, i32 0), align 1
+// CHECK: call [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %[[TMP]])
+struct v3f6 global_v3f6;
+void call_v3f6(void) { global_v3f6 = func_v3f6(global_v3f6); }
+
+// CHECK-LABEL: @call_v3f7
+// CHECK: %[[TMP:[^ ]+]] = load [7 x <3 x float>]* getelementptr inbounds (%struct.v3f7* @global_v3f7, i32 0, i32 0), align 1
+// CHECK: call [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %[[TMP]])
+struct v3f7 global_v3f7;
+void call_v3f7(void) { global_v3f7 = func_v3f7(global_v3f7); }
+
+// CHECK-LABEL: @call_v3f8
+// CHECK: %[[TMP:[^ ]+]] = load [8 x <3 x float>]* getelementptr inbounds (%struct.v3f8* @global_v3f8, i32 0, i32 0), align 1
+// CHECK: call [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %[[TMP]])
+struct v3f8 global_v3f8;
+void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); }
+
+// CHECK-LABEL: @call_v3f9
+// CHECK: call void @func_v3f9(%struct.v3f9* sret %{{[^ ]+}}, %struct.v3f9* byval align 16 @global_v3f9)
+struct v3f9 global_v3f9;
+void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); }
+
+// CHECK-LABEL: @call_v3fab
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <3 x float>]*)
+// CHECK: call [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %[[TMP]])
+struct v3fab global_v3fab;
+void call_v3fab(void) { global_v3fab = func_v3fab(global_v3fab); }
+
+// CHECK-LABEL: @call_v3fabc
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <3 x float>]*)
+// CHECK: call [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %[[TMP]])
+struct v3fabc global_v3fabc;
+void call_v3fabc(void) { global_v3fabc = func_v3fabc(global_v3fabc); }
+
+
+// Test returning small aggregates.
+
+struct s1 { char c[1]; };
+struct s2 { char c[2]; };
+struct s3 { char c[3]; };
+struct s4 { char c[4]; };
+struct s5 { char c[5]; };
+struct s6 { char c[6]; };
+struct s7 { char c[7]; };
+struct s8 { char c[8]; };
+struct s9 { char c[9]; };
+struct s16 { char c[16]; };
+struct s17 { char c[17]; };
+
+// CHECK: define i8 @ret_s1()
+struct s1 ret_s1() {
+ return (struct s1) { 17 };
+}
+
+// CHECK: define i16 @ret_s2()
+struct s2 ret_s2() {
+ return (struct s2) { 17, 18 };
+}
+
+// CHECK: define i24 @ret_s3()
+struct s3 ret_s3() {
+ return (struct s3) { 17, 18, 19 };
+}
+
+// CHECK: define i32 @ret_s4()
+struct s4 ret_s4() {
+ return (struct s4) { 17, 18, 19, 20 };
+}
+
+// CHECK: define i40 @ret_s5()
+struct s5 ret_s5() {
+ return (struct s5) { 17, 18, 19, 20, 21 };
+}
+
+// CHECK: define i48 @ret_s6()
+struct s6 ret_s6() {
+ return (struct s6) { 17, 18, 19, 20, 21, 22 };
+}
+
+// CHECK: define i56 @ret_s7()
+struct s7 ret_s7() {
+ return (struct s7) { 17, 18, 19, 20, 21, 22, 23 };
+}
+
+// CHECK: define i64 @ret_s8()
+struct s8 ret_s8() {
+ return (struct s8) { 17, 18, 19, 20, 21, 22, 23, 24 };
+}
+
+// CHECK: define { i64, i64 } @ret_s9()
+struct s9 ret_s9() {
+ return (struct s9) { 17, 18, 19, 20, 21, 22, 23, 24, 25 };
+}
+
+// CHECK: define { i64, i64 } @ret_s16()
+struct s16 ret_s16() {
+ return (struct s16) { 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32 };
+}
+
+// CHECK: define void @ret_s17(%struct.s17*
+struct s17 ret_s17() {
+ return (struct s17) { 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33 };
+}
+