diff options
author | David Green <david.green@arm.com> | 2024-04-03 19:10:14 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-03 19:10:14 +0100 |
commit | 42c7bc04c30b427414a2d957776b1655abb27b6e (patch) | |
tree | 2e769ef7b119979a9963afed74c4c1b9f253cc2c | |
parent | 8a5a1b770413bb62ff27cd8c2aea3d04b3a95bbe (diff) |
[AArch64][ARM] Make neon fp16 generic intrinsics always available. (#87467)
By generic intrinsics this mean things like dup, ext, zip and bsl that
can always be executed with integer s16 operations and do not require
fullfp16. This makes them always available, and brings them inline with
GCC.
https://godbolt.org/z/azs8eMv54
The relevant test cases have been moved into their own files, to allow
them to be tested with armv8-a and armv8.2-a+fp16.
-rw-r--r-- | clang/include/clang/Basic/arm_neon.td | 31 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 10 | ||||
-rw-r--r-- | clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c | 485 | ||||
-rw-r--r-- | clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c | 472 | ||||
-rw-r--r-- | clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c | 600 | ||||
-rw-r--r-- | clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c | 178 |
6 files changed, 1100 insertions, 676 deletions
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index f16de97f4e6b..7edac5afafaa 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1758,24 +1758,21 @@ let TargetGuard = "fullfp16" in { // Mul lane def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>; def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>; +} - // Data processing intrinsics - section 5 - - // Logical operations - let isHiddenLInst = 1 in - def VBSLH : SInst<"vbsl", ".U..", "hQh">; - - // Transposition operations - def VZIPH : WInst<"vzip", "2..", "hQh">; - def VUZPH : WInst<"vuzp", "2..", "hQh">; - def VTRNH : WInst<"vtrn", "2..", "hQh">; - - // Vector Extract - def VEXTH : WInst<"vext", "...I", "hQh">; +// Data processing intrinsics - section 5. Do not require fullfp16. - // Reverse vector elements - def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; -} +// Logical operations +let isHiddenLInst = 1 in +def VBSLH : SInst<"vbsl", ".U..", "hQh">; +// Transposition operations +def VZIPH : WInst<"vzip", "2..", "hQh">; +def VUZPH : WInst<"vuzp", "2..", "hQh">; +def VTRNH : WInst<"vtrn", "2..", "hQh">; +// Vector Extract +def VEXTH : WInst<"vext", "...I", "hQh">; +// Reverse vector elements +def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; // ARMv8.2-A FP16 vector intrinsics for A64 only. let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { @@ -1857,7 +1854,9 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { def VMINVH : SInst<"vminv", "1.", "hQh">; def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">; def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; +} +let ArchGuard = "defined(__aarch64__)" in { // Permutation def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 483f9c268599..2537e715b63e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7281,8 +7281,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, - { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, }, - { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, }, { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, @@ -7301,8 +7299,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, - { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, }, - { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, }, { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, }, { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, }, @@ -7405,12 +7401,6 @@ static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, - { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, }, - { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, }, - { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, }, - { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, }, - { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, }, - { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, }, // The mangling rules cause us to have one ID for each type for vldap1(q)_lane // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an // arbitrary one to be handled as tha canonical variation. diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c new file mode 100644 index 000000000000..78391808fafa --- /dev/null +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c @@ -0,0 +1,485 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature -fullfp16 -target-feature +v8a\ +// RUN: -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=mem2reg \ +// RUN: | FileCheck %s +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ +// RUN: -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=mem2reg \ +// RUN: | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#include <arm_neon.h> + +// CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16 +// CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> +// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]] +// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1> +// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half> +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { + return vbsl_f16(a, b, c); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16 +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> +// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> +// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]] +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> +// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP4]] +// +float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { + return vbslq_f16(a, b, c); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> +// CHECK-NEXT: store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> +// CHECK-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] +// +float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { + return vzip_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> +// CHECK-NEXT: store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> +// CHECK-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] +// +float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { + return vzipq_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +// CHECK-NEXT: store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +// CHECK-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] +// +float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { + return vuzp_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +// CHECK-NEXT: store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +// CHECK-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] +// +float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { + return vuzpq_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// CHECK-NEXT: store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// CHECK-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] +// +float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { + return vtrn_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// CHECK-NEXT: store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// CHECK-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] +// +float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { + return vtrnq_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vmov_n_f16(float16_t a) { + return vmov_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vmovq_n_f16(float16_t a) { + return vmovq_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vdup_n_f16(float16_t a) { + return vdup_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vdupq_n_f16(float16_t a) { + return vdupq_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-NEXT: ret <4 x half> [[LANE]] +// +float16x4_t test_vdup_lane_f16(float16x4_t a) { + return vdup_lane_f16(a, 3); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +// CHECK-NEXT: ret <8 x half> [[LANE]] +// +float16x8_t test_vdupq_lane_f16(float16x4_t a) { + return vdupq_lane_f16(a, 3); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> +// CHECK-NEXT: ret <4 x half> [[LANE]] +// +float16x4_t test_vdup_laneq_f16(float16x8_t a) { + return vdup_laneq_f16(a, 1); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> +// CHECK-NEXT: ret <8 x half> [[LANE]] +// +float16x8_t test_vdupq_laneq_f16(float16x8_t a) { + return vdupq_laneq_f16(a, 7); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vext_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5> +// CHECK-NEXT: ret <4 x half> [[VEXT]] +// +float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { + return vext_f16(a, b, 2); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vextq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> +// CHECK-NEXT: ret <8 x half> [[VEXT]] +// +float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { + return vextq_f16(a, b, 5); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vrev64_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vrev64_f16(float16x4_t a) { + return vrev64_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vrev64q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vrev64q_f16(float16x8_t a) { + return vrev64q_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip1_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) { + return vzip1_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip1q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) { + return vzip1q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip2_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) { + return vzip2_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip2q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) { + return vzip2q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp1_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) { + return vuzp1_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp1q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) { + return vuzp1q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp2_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) { + return vuzp2_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp2q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) { + return vuzp2q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn1_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) { + return vtrn1_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn1q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) { + return vtrn1q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn2_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) { + return vtrn2_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn2q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) { + return vtrn2q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16 +// CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7 +// CHECK-NEXT: ret half [[VGETQ_LANE]] +// +float16_t test_vduph_laneq_f16(float16x8_t vec) { + return vduph_laneq_f16(vec, 7); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16 +// CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3 +// CHECK-NEXT: ret half [[VGET_LANE]] +// +float16_t test_vduph_lane_f16(float16x4_t vec) { + return vduph_lane_f16(vec, 3); +} diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c index 4163e6e0918f..617d515504fe 100644 --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -2004,475 +2004,3 @@ float16_t test_vminnmv_f16(float16x4_t a) { float16_t test_vminnmvq_f16(float16x8_t a) { return vminnmvq_f16(a); } - -// CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16 -// CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1> -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half> -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { - return vbsl_f16(a, b, c); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half> -// CHECK-NEXT: ret <8 x half> [[TMP4]] -// -float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { - return vbslq_f16(a, b, c); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> -// CHECK-NEXT: store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> -// CHECK-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] -// -float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { - return vzip_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> -// CHECK-NEXT: store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> -// CHECK-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] -// -float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { - return vzipq_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> -// CHECK-NEXT: store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> -// CHECK-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] -// -float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { - return vuzp_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> -// CHECK-NEXT: store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> -// CHECK-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] -// -float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { - return vuzpq_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] -// -float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { - return vtrn_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] -// -float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { - return vtrnq_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: ret <4 x half> [[VECINIT3]] -// -float16x4_t test_vmov_n_f16(float16_t a) { - return vmov_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 -// CHECK-NEXT: ret <8 x half> [[VECINIT7]] -// -float16x8_t test_vmovq_n_f16(float16_t a) { - return vmovq_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: ret <4 x half> [[VECINIT3]] -// -float16x4_t test_vdup_n_f16(float16_t a) { - return vdup_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 -// CHECK-NEXT: ret <8 x half> [[VECINIT7]] -// -float16x8_t test_vdupq_n_f16(float16_t a) { - return vdupq_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK-NEXT: ret <4 x half> [[LANE]] -// -float16x4_t test_vdup_lane_f16(float16x4_t a) { - return vdup_lane_f16(a, 3); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -// CHECK-NEXT: ret <8 x half> [[LANE]] -// -float16x8_t test_vdupq_lane_f16(float16x4_t a) { - return vdupq_lane_f16(a, 3); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> -// CHECK-NEXT: ret <4 x half> [[LANE]] -// -float16x4_t test_vdup_laneq_f16(float16x8_t a) { - return vdup_laneq_f16(a, 1); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> -// CHECK-NEXT: ret <8 x half> [[LANE]] -// -float16x8_t test_vdupq_laneq_f16(float16x8_t a) { - return vdupq_laneq_f16(a, 7); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vext_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5> -// CHECK-NEXT: ret <4 x half> [[VEXT]] -// -float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { - return vext_f16(a, b, 2); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vextq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> -// CHECK-NEXT: ret <8 x half> [[VEXT]] -// -float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { - return vextq_f16(a, b, 5); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vrev64_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vrev64_f16(float16x4_t a) { - return vrev64_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vrev64q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vrev64q_f16(float16x8_t a) { - return vrev64q_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip1_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) { - return vzip1_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip1q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) { - return vzip1q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip2_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) { - return vzip2_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip2q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) { - return vzip2q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp1_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) { - return vuzp1_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp1q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) { - return vuzp1q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp2_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) { - return vuzp2_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp2q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) { - return vuzp2q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn1_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) { - return vtrn1_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn1q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) { - return vtrn1q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn2_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) { - return vtrn2_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn2q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) { - return vtrn2q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7 -// CHECK-NEXT: ret half [[VGETQ_LANE]] -// -float16_t test_vduph_laneq_f16(float16x8_t vec) { - return vduph_laneq_f16(vec, 7); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3 -// CHECK-NEXT: ret half [[VGET_LANE]] -// -float16_t test_vduph_lane_f16(float16x4_t vec) { - return vduph_lane_f16(vec, 3); -} diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c new file mode 100644 index 000000000000..f8d83332ab01 --- /dev/null +++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c @@ -0,0 +1,600 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature -fullfp16 \ +// RUN: -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=sroa \ +// RUN: | FileCheck %s --check-prefixes=CHECK-NOFP16 +// RUN: %clang_cc1 -triple armv8a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \ +// RUN: -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=sroa \ +// RUN: | FileCheck %s --check-prefixes=CHECK-FP16 + +// REQUIRES: arm-registered-target + +#include <arm_neon.h> + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vbsl_f16( +// CHECK-NOFP16-SAME: <4 x i16> noundef [[A:%.*]], <2 x i32> noundef [[B_COERCE:%.*]], <2 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> [[TMP8]]) +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP12]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vbsl_f16( +// CHECK-FP16-SAME: <4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> +// CHECK-FP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half> +// CHECK-FP16-NEXT: ret <4 x half> [[TMP3]] +// +float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { + return vbsl_f16(a, b, c); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vbslq_f16( +// CHECK-NOFP16-SAME: <8 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B_COERCE:%.*]], <4 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[C_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP12]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vbslq_f16( +// CHECK-FP16-SAME: <8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> +// CHECK-FP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half> +// CHECK-FP16-NEXT: ret <8 x half> [[TMP3]] +// +float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { + return vbslq_f16(a, b, c); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vzip_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META3]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vzip_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5> +// CHECK-FP16-NEXT: store <4 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7> +// CHECK-FP16-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META3]] +// CHECK-FP16-NEXT: ret void +// +float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { + return vzip_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vzipq_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META6]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vzipq_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> +// CHECK-FP16-NEXT: store <8 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> +// CHECK-FP16-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META6]] +// CHECK-FP16-NEXT: ret void +// +float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { + return vzipq_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vuzp_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META9]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vuzp_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +// CHECK-FP16-NEXT: store <4 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +// CHECK-FP16-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META9]] +// CHECK-FP16-NEXT: ret void +// +float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { + return vuzp_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vuzpq_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META12]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vuzpq_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> +// CHECK-FP16-NEXT: store <8 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +// CHECK-FP16-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META12]] +// CHECK-FP16-NEXT: ret void +// +float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { + return vuzpq_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vtrn_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META15]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vtrn_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// CHECK-FP16-NEXT: store <4 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// CHECK-FP16-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META15]] +// CHECK-FP16-NEXT: ret void +// +float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { + return vtrn_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vtrnq_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META18]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vtrnq_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// CHECK-FP16-NEXT: store <8 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// CHECK-FP16-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META18]] +// CHECK-FP16-NEXT: ret void +// +float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { + return vtrnq_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vmov_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vmov_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vmov_n_f16(float16_t a) { + return vmov_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vmovq_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vmovq_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vmovq_n_f16(float16_t a) { + return vmovq_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vdup_n_f16(float16_t a) { + return vdup_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vdupq_n_f16(float16_t a) { + return vdupq_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_lane_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP4]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_lane_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> +// CHECK-FP16-NEXT: ret <4 x half> [[LANE]] +// +float16x4_t test_vdup_lane_f16(float16x4_t a) { + return vdup_lane_f16(a, 3); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_lane_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP4]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_lane_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> +// CHECK-FP16-NEXT: ret <8 x half> [[LANE]] +// +float16x8_t test_vdupq_lane_f16(float16x4_t a) { + return vdupq_lane_f16(a, 3); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vext_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 2, i32 3, i32 4, i32 5> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[VEXT]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP7]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vext_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5> +// CHECK-FP16-NEXT: ret <4 x half> [[VEXT]] +// +float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { + return vext_f16(a, b, 2); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vextq_f16( +// CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[VEXT]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP7]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vextq_f16( +// CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> +// CHECK-FP16-NEXT: ret <8 x half> [[VEXT]] +// +float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { + return vextq_f16(a, b, 5); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vrev64_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half> +// CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP5]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vrev64_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> +// CHECK-FP16-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vrev64_f16(float16x4_t a) { + return vrev64_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vrev64q_f16( +// CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x half> +// CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[SHUFFLE_I]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP5]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vrev64q_f16( +// CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> +// CHECK-FP16-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vrev64q_f16(float16x8_t a) { + return vrev64q_f16(a); +} +//. +// CHECK-NOFP16: [[META3]] = !{[[META4:![0-9]+]]} +// CHECK-NOFP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"} +// CHECK-NOFP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"} +// CHECK-NOFP16: [[META6]] = !{[[META7:![0-9]+]]} +// CHECK-NOFP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"} +// CHECK-NOFP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"} +// CHECK-NOFP16: [[META9]] = !{[[META10:![0-9]+]]} +// CHECK-NOFP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"} +// CHECK-NOFP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"} +// CHECK-NOFP16: [[META12]] = !{[[META13:![0-9]+]]} +// CHECK-NOFP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"} +// CHECK-NOFP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"} +// CHECK-NOFP16: [[META15]] = !{[[META16:![0-9]+]]} +// CHECK-NOFP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"} +// CHECK-NOFP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"} +// CHECK-NOFP16: [[META18]] = !{[[META19:![0-9]+]]} +// CHECK-NOFP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"} +// CHECK-NOFP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"} +//. +// CHECK-FP16: [[META3]] = !{[[META4:![0-9]+]]} +// CHECK-FP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"} +// CHECK-FP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"} +// CHECK-FP16: [[META6]] = !{[[META7:![0-9]+]]} +// CHECK-FP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"} +// CHECK-FP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"} +// CHECK-FP16: [[META9]] = !{[[META10:![0-9]+]]} +// CHECK-FP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"} +// CHECK-FP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"} +// CHECK-FP16: [[META12]] = !{[[META13:![0-9]+]]} +// CHECK-FP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"} +// CHECK-FP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"} +// CHECK-FP16: [[META15]] = !{[[META16:![0-9]+]]} +// CHECK-FP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"} +// CHECK-FP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"} +// CHECK-FP16: [[META18]] = !{[[META19:![0-9]+]]} +// CHECK-FP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"} +// CHECK-FP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"} +//. diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c index 477da3a9e205..c62d1c9de0cb 100644 --- a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c @@ -817,181 +817,3 @@ float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) { float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) { return vmulq_n_f16(a, b); } - -// CHECK-LABEL: test_vbsl_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> -// CHECK: [[VBSL:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL]] to <4 x half> -// CHECK: ret <4 x half> [[TMP3]] -float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { - return vbsl_f16(a, b, c); -} - -// CHECK-LABEL: test_vbslq_f16 -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> -// CHECK: [[VBSL:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL]] to <8 x half> -// CHECK: ret <8 x half> [[TMP3]] -float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { - return vbslq_f16(a, b, c); -} - -// CHECK-LABEL: test_vzip_f16 -// CHECK: [[VZIP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> -// CHECK: store <4 x half> [[VZIP0]], ptr [[addr1:%.*]] -// CHECK: [[VZIP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7> -// CHECK: store <4 x half> [[VZIP1]], ptr [[addr2:%.*]] -float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { - return vzip_f16(a, b); -} - -// CHECK-LABEL: test_vzipq_f16 -// CHECK: [[VZIP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> -// CHECK: store <8 x half> [[VZIP0]], ptr [[addr1:%.*]] -// CHECK: [[VZIP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> -// CHECK: store <8 x half> [[VZIP1]], ptr [[addr2:%.*]] -float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { - return vzipq_f16(a, b); -} - -// CHECK-LABEL: test_vuzp_f16 -// CHECK: [[VUZP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -// CHECK: store <4 x half> [[VUZP0]], ptr [[addr1:%.*]] -// CHECK: [[VUZP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -// CHECK: store <4 x half> [[VUZP1]], ptr [[addr1:%.*]] -float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { - return vuzp_f16(a, b); -} - -// CHECK-LABEL: test_vuzpq_f16 -// CHECK: [[VUZP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> -// CHECK: store <8 x half> [[VUZP0]], ptr [[addr1:%.*]] -// CHECK: [[VUZP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> -// CHECK: store <8 x half> [[VUZP1]], ptr [[addr2:%.*]] -float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { - return vuzpq_f16(a, b); -} - -// CHECK-LABEL: test_vtrn_f16 -// CHECK: [[VTRN0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK: store <4 x half> [[VTRN0]], ptr [[addr1:%.*]] -// CHECK: [[VTRN1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK: store <4 x half> [[VTRN1]], ptr [[addr2:%.*]] -float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { - return vtrn_f16(a, b); -} - -// CHECK-LABEL: test_vtrnq_f16 -// CHECK: [[VTRN0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK: store <8 x half> [[VTRN0]], ptr [[addr1:%.*]] -// CHECK: [[VTRN1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK: store <8 x half> [[VTRN1]], ptr [[addr2:%.*]] -float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { - return vtrnq_f16(a, b); -} - -// CHECK-LABEL: test_vmov_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <4 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: ret <4 x half> [[TMP3]] -float16x4_t test_vmov_n_f16(float16_t a) { - return vmov_n_f16(a); -} - -// CHECK-LABEL: test_vmovq_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <8 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4 -// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5 -// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6 -// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7 -// CHECK: ret <8 x half> [[TMP7]] -float16x8_t test_vmovq_n_f16(float16_t a) { - return vmovq_n_f16(a); -} - -// CHECK-LABEL: test_vdup_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <4 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: ret <4 x half> [[TMP3]] -float16x4_t test_vdup_n_f16(float16_t a) { - return vdup_n_f16(a); -} - -// CHECK-LABEL: test_vdupq_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <8 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4 -// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5 -// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6 -// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7 -// CHECK: ret <8 x half> [[TMP7]] -float16x8_t test_vdupq_n_f16(float16_t a) { - return vdupq_n_f16(a); -} - -// CHECK-LABEL: test_vdup_lane_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> -// CHECK: ret <4 x half> [[LANE]] -float16x4_t test_vdup_lane_f16(float16x4_t a) { - return vdup_lane_f16(a, 3); -} - -// CHECK-LABEL: test_vdupq_lane_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> -// CHECK: ret <8 x half> [[LANE]] -float16x8_t test_vdupq_lane_f16(float16x4_t a) { - return vdupq_lane_f16(a, 3); -} - -// CHECK-LABEL: @test_vext_f16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5> -// CHECK: ret <4 x half> [[VEXT]] -float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { - return vext_f16(a, b, 2); -} - -// CHECK-LABEL: @test_vextq_f16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> -// CHECK: ret <8 x half> [[VEXT]] -float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { - return vextq_f16(a, b, 5); -} - -// CHECK-LABEL: @test_vrev64_f16( -// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -// CHECK: ret <4 x half> [[SHFL]] -float16x4_t test_vrev64_f16(float16x4_t a) { - return vrev64_f16(a); -} - -// CHECK-LABEL: @test_vrev64q_f16( -// CHECK: [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> -// CHECK: ret <8 x half> [[SHFL]] -float16x8_t test_vrev64q_f16(float16x8_t a) { - return vrev64q_f16(a); -} |