From 35691f2512500bb48016a094cb2d3466e2bbc8a9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 May 2018 18:02:11 +0000 Subject: [X86] Reduce the number of setzero intrinsics to just the set defined by the Intel Intrinsics Guide. We had quite a few for different element sizes of integers sometimes with strange target features attached to them. We only need a single version for each of _m128i, _m256i, and _m512i with the target feature that first introduced those types. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@333568 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/avx512bitalgintrin.h | 4 +- lib/Headers/avx512bwintrin.h | 168 +++++++++++++++++---------------------- lib/Headers/avx512dqintrin.h | 2 +- lib/Headers/avx512fintrin.h | 16 ++-- lib/Headers/avx512vbmi2intrin.h | 24 +++--- lib/Headers/avx512vlbwintrin.h | 33 ++++---- lib/Headers/avx512vlcdintrin.h | 4 +- lib/Headers/avx512vldqintrin.h | 2 +- lib/Headers/avx512vlintrin.h | 66 +++++++-------- lib/Headers/gfniintrin.h | 6 +- lib/Headers/mmintrin.h | 2 +- test/CodeGen/avx512bw-builtins.c | 4 +- test/Headers/x86intrin-2.c | 4 - 13 files changed, 150 insertions(+), 185 deletions(-) diff --git a/lib/Headers/avx512bitalgintrin.h b/lib/Headers/avx512bitalgintrin.h index 2dd1471d2f..20c0645dd5 100644 --- a/lib/Headers/avx512bitalgintrin.h +++ b/lib/Headers/avx512bitalgintrin.h @@ -48,7 +48,7 @@ _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { - return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(), + return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_si512(), __U, __B); } @@ -70,7 +70,7 @@ _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { - return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(), + return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_si512(), __U, __B); } diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index 0bf98e947f..8b7952d9f2 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -34,26 +34,6 @@ typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) -static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_setzero_qi(void) { - return (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; -} - -static __inline __m512i __DEFAULT_FN_ATTRS -_mm512_setzero_hi(void) { - return (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }; -} - /* Integer compare */ #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ @@ -212,7 +192,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_add_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -231,7 +211,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_sub_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -250,7 +230,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_add_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -269,7 +249,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sub_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -288,7 +268,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mullo_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -326,7 +306,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_abs_epi8(__A), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -348,7 +328,7 @@ _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_abs_epi16(__A), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -362,7 +342,7 @@ _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packs_epi32(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -392,7 +372,7 @@ _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packs_epi16(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -406,7 +386,7 @@ _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_packus_epi32(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -436,7 +416,7 @@ _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_packus_epi16(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -444,7 +424,7 @@ _mm512_adds_epi8 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) -1); } @@ -463,7 +443,7 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) __U); } @@ -472,7 +452,7 @@ _mm512_adds_epi16 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) -1); } @@ -491,7 +471,7 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) __U); } @@ -500,7 +480,7 @@ _mm512_adds_epu8 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) -1); } @@ -519,7 +499,7 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) __U); } @@ -528,7 +508,7 @@ _mm512_adds_epu16 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) -1); } @@ -547,7 +527,7 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) __U); } @@ -575,7 +555,7 @@ _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -602,7 +582,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), - (__v32hi) _mm512_setzero_hi()); + (__v32hi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -616,7 +596,7 @@ _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -638,7 +618,7 @@ _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -661,7 +641,7 @@ _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_max_epu8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -683,7 +663,7 @@ _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_max_epu16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -705,7 +685,7 @@ _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -727,7 +707,7 @@ _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -749,7 +729,7 @@ _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, (__v64qi)_mm512_min_epu8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -771,7 +751,7 @@ _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_min_epu16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -801,7 +781,7 @@ _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -809,7 +789,7 @@ _mm512_subs_epi8 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) -1); } @@ -828,7 +808,7 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) __U); } @@ -837,7 +817,7 @@ _mm512_subs_epi16 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) -1); } @@ -856,7 +836,7 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) __U); } @@ -865,7 +845,7 @@ _mm512_subs_epu8 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) -1); } @@ -884,7 +864,7 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, (__v64qi) __B, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), (__mmask64) __U); } @@ -893,7 +873,7 @@ _mm512_subs_epu16 (__m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) -1); } @@ -912,7 +892,7 @@ _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, (__v32hi) __B, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), (__mmask32) __U); } @@ -947,7 +927,7 @@ _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, { return (__m512i)__builtin_ia32_selectw_512(__U, (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -969,7 +949,7 @@ _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -992,7 +972,7 @@ _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1014,7 +994,7 @@ _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1034,7 +1014,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, (__v32hi)_mm512_maddubs_epi16(__X, __Y), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1167,7 +1147,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpackhi_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1194,7 +1174,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpackhi_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1229,7 +1209,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_unpacklo_epi8(__A, __B), - (__v64qi)_mm512_setzero_qi()); + (__v64qi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1256,7 +1236,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_unpacklo_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1280,7 +1260,7 @@ _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepi8_epi16(__A), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1302,7 +1282,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_cvtepu8_epi16(__A), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } @@ -1340,7 +1320,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflehi_epi16((A), \ (imm)), \ - (__v32hi)_mm512_setzero_hi()); }) + (__v32hi)_mm512_setzero_si512()); }) #define _mm512_shufflelo_epi16(A, imm) __extension__ ({ \ (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \ @@ -1378,7 +1358,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ (__v32hi)_mm512_shufflelo_epi16((A), \ (imm)), \ - (__v32hi)_mm512_setzero_hi()); }) + (__v32hi)_mm512_setzero_si512()); }) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_sllv_epi16(__m512i __A, __m512i __B) @@ -1399,7 +1379,7 @@ _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sllv_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1421,7 +1401,7 @@ _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sll_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1443,7 +1423,7 @@ _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_slli_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } #define _mm512_bslli_epi128(a, imm) __extension__ ({ \ @@ -1534,7 +1514,7 @@ _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srlv_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1556,7 +1536,7 @@ _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srav_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1578,7 +1558,7 @@ _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_sra_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1600,7 +1580,7 @@ _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srai_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1622,7 +1602,7 @@ _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srl_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1644,7 +1624,7 @@ _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_srli_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } #define _mm512_bsrli_epi128(a, imm) __extension__ ({ \ @@ -1729,7 +1709,7 @@ _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, (__v32hi) __A, - (__v32hi) _mm512_setzero_hi ()); + (__v32hi) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1745,7 +1725,7 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, (__v64qi) __A, - (__v64qi) _mm512_setzero_hi ()); + (__v64qi) _mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -1791,7 +1771,7 @@ _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, (__v32hi) - _mm512_setzero_hi (), + _mm512_setzero_si512 (), (__mmask32) __U); } @@ -1808,7 +1788,7 @@ _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, (__v64qi) - _mm512_setzero_hi (), + _mm512_setzero_si512 (), (__mmask64) __U); } static __inline__ void __DEFAULT_FN_ATTRS @@ -1831,55 +1811,55 @@ static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_test_epi8_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_test_epi16_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_testn_epi8_mask (__m512i __A, __m512i __B) { - return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_qi()); + return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_testn_epi16_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_qi()); + _mm512_setzero_si512()); } static __inline__ __mmask64 __DEFAULT_FN_ATTRS @@ -1986,7 +1966,7 @@ _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, (__v32hi)_mm512_permutexvar_epi16(__A, __B), - (__v32hi)_mm512_setzero_hi()); + (__v32hi)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -2027,7 +2007,7 @@ _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, #define _mm512_maskz_dbsad_epu8(U, A, B, imm) ({\ (__m512i)__builtin_ia32_dbpsadbw512_mask((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), (int)(imm), \ - (__v32hi)_mm512_setzero_hi(), \ + (__v32hi)_mm512_setzero_si512(), \ (__mmask32)(U)); }) static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index e75c958434..d873b8c651 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -1178,7 +1178,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ - (__v2di)_mm_setzero_di()); }) + (__v2di)_mm_setzero_si128()); }) #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 5bfe39ec79..3f60ee814d 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -8917,56 +8917,56 @@ static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_test_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline __mmask8 __DEFAULT_FN_ATTRS _mm512_test_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_testn_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_testn_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), - _mm512_setzero_epi32()); + _mm512_setzero_si512()); } static __inline__ __m512 __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512vbmi2intrin.h b/lib/Headers/avx512vbmi2intrin.h index 43e97b40a0..585b4566f0 100644 --- a/lib/Headers/avx512vbmi2intrin.h +++ b/lib/Headers/avx512vbmi2intrin.h @@ -44,7 +44,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), __U); } @@ -60,7 +60,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), __U); } @@ -90,7 +90,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), __U); } @@ -106,7 +106,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) { return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), __U); } @@ -122,7 +122,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, - (__v32hi) _mm512_setzero_hi(), + (__v32hi) _mm512_setzero_si512(), __U); } @@ -138,7 +138,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, - (__v64qi) _mm512_setzero_qi(), + (__v64qi) _mm512_setzero_si512(), __U); } @@ -150,7 +150,7 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__mmask8)(U)); }) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ - _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + _mm512_mask_shldi_epi64(_mm512_setzero_si512(), (U), (A), (B), (I)) #define _mm512_shldi_epi64(A, B, I) \ _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) @@ -163,7 +163,7 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__mmask16)(U)); }) #define _mm512_maskz_shldi_epi32(U, A, B, I) \ - _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + _mm512_mask_shldi_epi32(_mm512_setzero_si512(), (U), (A), (B), (I)) #define _mm512_shldi_epi32(A, B, I) \ _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) @@ -176,7 +176,7 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__mmask32)(U)); }) #define _mm512_maskz_shldi_epi16(U, A, B, I) \ - _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + _mm512_mask_shldi_epi16(_mm512_setzero_si512(), (U), (A), (B), (I)) #define _mm512_shldi_epi16(A, B, I) \ _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) @@ -189,7 +189,7 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__mmask8)(U)); }) #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ - _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) + _mm512_mask_shrdi_epi64(_mm512_setzero_si512(), (U), (A), (B), (I)) #define _mm512_shrdi_epi64(A, B, I) \ _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) @@ -202,7 +202,7 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__mmask16)(U)); }) #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ - _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) + _mm512_mask_shrdi_epi32(_mm512_setzero_si512(), (U), (A), (B), (I)) #define _mm512_shrdi_epi32(A, B, I) \ _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) @@ -215,7 +215,7 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) (__mmask32)(U)); }) #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ - _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) + _mm512_mask_shrdi_epi16(_mm512_setzero_si512(), (U), (A), (B), (I)) #define _mm512_shrdi_epi16(A, B, I) \ _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h index 30a12b6072..a18e2c98a4 100644 --- a/lib/Headers/avx512vlbwintrin.h +++ b/lib/Headers/avx512vlbwintrin.h @@ -31,11 +31,6 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) -static __inline __m128i __DEFAULT_FN_ATTRS -_mm_setzero_hi(void){ - return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }; -} - /* Integer compare */ #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \ @@ -1846,7 +1841,7 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) #define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_hi()); }) + (__v8hi)_mm_setzero_si128()); }) #define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ @@ -1866,7 +1861,7 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) #define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \ (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ - (__v8hi)_mm_setzero_hi()); }) + (__v8hi)_mm_setzero_si128()); }) #define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \ (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ @@ -2217,7 +2212,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, (__v8hi) __A, - (__v8hi) _mm_setzero_hi ()); + (__v8hi) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -2249,7 +2244,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, (__v16qi) __A, - (__v16qi) _mm_setzero_hi ()); + (__v16qi) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -2314,7 +2309,7 @@ _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, (__v8hi) - _mm_setzero_hi (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -2403,14 +2398,14 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_test_epi8_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_hi()); + return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_hi()); + _mm_setzero_si128()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS @@ -2430,14 +2425,14 @@ _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi16_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi()); + return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_hi()); + _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS @@ -2457,14 +2452,14 @@ _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_testn_epi8_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi()); + return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_hi()); + _mm_setzero_si128()); } static __inline__ __mmask32 __DEFAULT_FN_ATTRS @@ -2484,13 +2479,13 @@ _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi16_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_hi()); + return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { - return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_hi()); + return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128()); } static __inline__ __mmask16 __DEFAULT_FN_ATTRS @@ -2721,7 +2716,7 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, #define _mm_dbsad_epu8(A, B, imm) __extension__ ({ \ (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), (int)(imm), \ - (__v8hi)_mm_setzero_hi(), \ + (__v8hi)_mm_setzero_si128(), \ (__mmask8)-1); }) #define _mm_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \ diff --git a/lib/Headers/avx512vlcdintrin.h b/lib/Headers/avx512vlcdintrin.h index 555fe1e6fe..d3b6da256d 100644 --- a/lib/Headers/avx512vlcdintrin.h +++ b/lib/Headers/avx512vlcdintrin.h @@ -77,7 +77,7 @@ _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -217,7 +217,7 @@ _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h index e1b9abb25c..162867de54 100644 --- a/lib/Headers/avx512vldqintrin.h +++ b/lib/Headers/avx512vldqintrin.h @@ -1115,7 +1115,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ - (__v2di)_mm_setzero_di()); }) + (__v2di)_mm_setzero_si128()); }) #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ (__m256d)__builtin_shufflevector((__v4df)(A), \ diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index ced0d276c2..23bc89a65d 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -30,12 +30,6 @@ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) -/* Doesn't require avx512vl, used in avx512dqintrin.h */ -static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) -_mm_setzero_di(void) { - return (__m128i)(__v2di){ 0LL, 0LL}; -} - /* Integer compare */ #define _mm_cmpeq_epi32_mask(A, B) \ @@ -4247,7 +4241,7 @@ _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) #define _mm_rol_epi64(a, b) __extension__ ({\ (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)_mm_setzero_di(), \ + (__v2di)_mm_setzero_si128(), \ (__mmask8)-1); }) #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ @@ -4256,7 +4250,7 @@ _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ - (__v2di)_mm_setzero_di(), \ + (__v2di)_mm_setzero_si128(), \ (__mmask8)(u)); }) #define _mm256_rol_epi64(a, b) __extension__ ({\ @@ -4339,7 +4333,7 @@ _mm_rolv_epi64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -4359,7 +4353,7 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -4423,7 +4417,7 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) #define _mm_ror_epi64(A, B) __extension__ ({ \ (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)_mm_setzero_di(), \ + (__v2di)_mm_setzero_si128(), \ (__mmask8)-1); }) #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ @@ -4432,7 +4426,7 @@ _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ - (__v2di)_mm_setzero_di(), \ + (__v2di)_mm_setzero_si128(), \ (__mmask8)(U)); }) #define _mm256_ror_epi64(A, B) __extension__ ({ \ @@ -4526,7 +4520,7 @@ _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sll_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -4558,7 +4552,7 @@ _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_slli_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -4643,7 +4637,7 @@ _mm_rorv_epi64 (__m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) -1); } @@ -4663,7 +4657,7 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, (__v2di) __B, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -4710,7 +4704,7 @@ _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_sllv_epi64(__X, __Y), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -4774,7 +4768,7 @@ _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srlv_epi64(__X, __Y), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -4902,7 +4896,7 @@ _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srl_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -4934,7 +4928,7 @@ _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srli_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -5004,7 +4998,7 @@ _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_srav_epi64(__X, __Y), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -5129,7 +5123,7 @@ _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __A, - (__v2di) _mm_setzero_di ()); + (__v2di) _mm_setzero_si128 ()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -5162,7 +5156,7 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, (__v2di) - _mm_setzero_di (), + _mm_setzero_si128 (), (__mmask8) __U); } @@ -6011,14 +6005,14 @@ _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi32_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS @@ -6038,14 +6032,14 @@ _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi64_mask (__m128i __A, __m128i __B) { - return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS @@ -6065,14 +6059,14 @@ _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi32_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS @@ -6092,14 +6086,14 @@ _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi64_mask (__m128i __A, __m128i __B) { - return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di()); + return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), - _mm_setzero_di()); + _mm_setzero_si128()); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS @@ -6161,7 +6155,7 @@ _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -6225,7 +6219,7 @@ _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -6327,7 +6321,7 @@ _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_sra_epi64(__A, __B), \ - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -6371,7 +6365,7 @@ _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ (__v2di)_mm_srai_epi64(__A, __imm), \ - (__v2di)_mm_setzero_di()); + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS @@ -8141,7 +8135,7 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ - (__v2di)_mm_setzero_di()); }) + (__v2di)_mm_setzero_si128()); }) #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ diff --git a/lib/Headers/gfniintrin.h b/lib/Headers/gfniintrin.h index e828c335ef..a5895b3fe2 100644 --- a/lib/Headers/gfniintrin.h +++ b/lib/Headers/gfniintrin.h @@ -71,7 +71,7 @@ (__v64qi)(__m512i)(S)); }) #define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ - (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I); }) #define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ @@ -116,7 +116,7 @@ (__v64qi)(__m512i)(S)); }) #define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ - (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I); }) /* Default attributes for simple form (no masking). */ @@ -193,7 +193,7 @@ _mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) { - return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(), + return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(), __U, __A, __B); } diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h index ef2a97effd..56b44a23db 100644 --- a/lib/Headers/mmintrin.h +++ b/lib/Headers/mmintrin.h @@ -1295,7 +1295,7 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void) { - return (__m64){ 0LL }; + return __extension__ (__m64){ 0LL }; } /// Constructs a 64-bit integer vector initialized with the specified diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 19eb571305..4deb64f0eb 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -684,7 +684,7 @@ __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) { // CHECK: add <64 x i16> %{{.*}}, // CHECK: lshr <64 x i16> %{{.*}}, // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8> - // CHECK: store <64 x i8> zeroinitializer + // CHECK: store <8 x i64> zeroinitializer // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} return _mm512_maskz_avg_epu8(__U,__A,__B); } @@ -720,7 +720,7 @@ __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: add <32 x i32> %{{.*}}, // CHECK: lshr <32 x i32> %{{.*}}, // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16> - // CHECK: store <32 x i16> zeroinitializer + // CHECK: store <8 x i64> zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_avg_epu16(__U,__A,__B); } diff --git a/test/Headers/x86intrin-2.c b/test/Headers/x86intrin-2.c index 9be8545a2c..e6fd7c8044 100644 --- a/test/Headers/x86intrin-2.c +++ b/test/Headers/x86intrin-2.c @@ -72,10 +72,6 @@ __mmask8 __attribute__((__target__("avx512vl"))) mm_cmpeq_epi32_mask_wrap(__m128 return _mm_cmpeq_epi32_mask(a, b); } -__m512i __attribute__((__target__("avx512bw"))) mm512_setzero_qi_wrap(void) { - return _mm512_setzero_qi(); -} - __m512i __attribute__((__target__("avx512dq"))) mm512_mullo_epi64_wrap(__m512i a, __m512i b) { return _mm512_mullo_epi64(a, b); } -- cgit v1.2.3