diff options
Diffstat (limited to 'src/gui/painting/qdrawhelper_p.h')
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 131 |
1 files changed, 93 insertions, 38 deletions
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 1c6cd5db8a..f13cdbaec0 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -1,31 +1,37 @@ /**************************************************************************** ** -** Copyright (C) 2015 The Qt Company Ltd. -** Contact: http://www.qt.io/licensing/ +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtGui module of the Qt Toolkit. ** -** $QT_BEGIN_LICENSE:LGPL21$ +** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms -** and conditions see http://www.qt.io/terms-conditions. For further -** information use the contact form at http://www.qt.io/contact-us. +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 2.1 or version 3 as published by the Free -** Software Foundation and appearing in the file LICENSE.LGPLv21 and -** LICENSE.LGPLv3 included in the packaging of this file. Please review the -** following information to ensure the GNU Lesser General Public License -** requirements will be met: https://www.gnu.org/licenses/lgpl.html and -** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. ** -** As a special exception, The Qt Company gives you certain additional -** rights. These rights are described in The Qt Company LGPL Exception -** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. ** ** $QT_END_LICENSE$ ** @@ -632,31 +638,75 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) { } #endif -#ifdef __SSE2__ +#if defined(__SSE2__) +static Q_ALWAYS_INLINE uint interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty) +{ + // First interpolate top and bottom pixels in parallel. + vt = _mm_unpacklo_epi8(vt, _mm_setzero_si128()); + vb = _mm_unpacklo_epi8(vb, _mm_setzero_si128()); + vt = _mm_mullo_epi16(vt, _mm_set1_epi16(256 - disty)); + vb = _mm_mullo_epi16(vb, _mm_set1_epi16(disty)); + __m128i vlr = _mm_add_epi16(vt, vb); + vlr = _mm_srli_epi16(vlr, 8); + // vlr now contains the result of the first two interpolate calls vlr = unpacked((xright << 64) | xleft) + + // Now the last interpolate between left and right.. + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(256 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vmulx = _mm_unpacklo_epi16(vidistx, vdistx); + vlr = _mm_unpacklo_epi16(vlr, _mm_srli_si128(vlr, 8)); + // vlr now contains the colors of left and right interleaved { la, ra, lr, rr, lg, rg, lb, rb } + vlr = _mm_madd_epi16(vlr, vmulx); // Multiply and horizontal add. + vlr = _mm_srli_epi32(vlr, 8); + vlr = _mm_packs_epi32(vlr, vlr); + vlr = _mm_packus_epi16(vlr, vlr); + return _mm_cvtsi128_si32(vlr); +} + +static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) +{ + __m128i vt = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tl), _mm_cvtsi32_si128(tr)); + __m128i vb = _mm_unpacklo_epi32(_mm_cvtsi32_si128(bl), _mm_cvtsi32_si128(br)); + return interpolate_4_pixels_sse2(vt, vb, distx, disty); +} + +static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty) +{ + __m128i vt = _mm_loadl_epi64((const __m128i*)t); + __m128i vb = _mm_loadl_epi64((const __m128i*)b); + return interpolate_4_pixels_sse2(vt, vb, distx, disty); +} +#elif defined(__ARM_NEON__) +static Q_ALWAYS_INLINE uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty) +{ + uint16x8_t vt16 = vmovl_u8(vreinterpret_u8_u32(vt32)); + uint16x8_t vb16 = vmovl_u8(vreinterpret_u8_u32(vb32)); + vt16 = vmulq_n_u16(vt16, 256 - disty); + vt16 = vmlaq_n_u16(vt16, vb16, disty); + vt16 = vshrq_n_u16(vt16, 8); + uint16x4_t vl16 = vget_low_u16(vt16); + uint16x4_t vr16 = vget_high_u16(vt16); + vl16 = vmul_n_u16(vl16, 256 - distx); + vl16 = vmla_n_u16(vl16, vr16, distx); + vl16 = vshr_n_u16(vl16, 8); + uint8x8_t vr = vmovn_u16(vcombine_u16(vl16, vl16)); + return vget_lane_u32(vreinterpret_u32_u8(vr), 0); +} + static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) { - // First interpolate right and left pixels in parallel. - __m128i vl = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tl), _mm_cvtsi32_si128(bl)); - __m128i vr = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tr), _mm_cvtsi32_si128(br)); - vl = _mm_unpacklo_epi8(vl, _mm_setzero_si128()); - vr = _mm_unpacklo_epi8(vr, _mm_setzero_si128()); - vl = _mm_mullo_epi16(vl, _mm_set1_epi16(256 - distx)); - vr = _mm_mullo_epi16(vr, _mm_set1_epi16(distx)); - __m128i vtb = _mm_add_epi16(vl, vr); - vtb = _mm_srli_epi16(vtb, 8); - // vtb now contains the result of the first two interpolate calls vtb = unpacked((xbot << 64) | xtop) - - // Now the last interpolate between top and bottom interpolations. - const __m128i vidisty = _mm_shufflelo_epi16(_mm_cvtsi32_si128(256 - disty), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vdisty = _mm_shufflelo_epi16(_mm_cvtsi32_si128(disty), _MM_SHUFFLE(0, 0, 0, 0)); - const __m128i vmuly = _mm_unpacklo_epi16(vidisty, vdisty); - vtb = _mm_unpacklo_epi16(vtb, _mm_srli_si128(vtb, 8)); - // vtb now contains the colors of top and bottom interleaved { ta, ba, tr, br, tg, bg, tb, bb } - vtb = _mm_madd_epi16(vtb, vmuly); // Multiply and horizontal add. - vtb = _mm_srli_epi32(vtb, 8); - vtb = _mm_packs_epi32(vtb, _mm_setzero_si128()); - vtb = _mm_packus_epi16(vtb, _mm_setzero_si128()); - return _mm_cvtsi128_si32(vtb); + uint32x2_t vt32 = vmov_n_u32(tl); + uint32x2_t vb32 = vmov_n_u32(bl); + vt32 = vset_lane_u32(tr, vt32, 1); + vb32 = vset_lane_u32(br, vb32, 1); + return interpolate_4_pixels_neon(vt32, vb32, distx, disty); +} + +static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty) +{ + uint32x2_t vt32 = vld1_u32(t); + uint32x2_t vb32 = vld1_u32(b); + return interpolate_4_pixels_neon(vt32, vb32, distx, disty); } #else static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) @@ -667,6 +717,11 @@ static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx); return INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty); } + +static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty) +{ + return interpolate_4_pixels(t[0], t[1], b[0], b[1], distx, disty); +} #endif #if Q_BYTE_ORDER == Q_BIG_ENDIAN @@ -1161,7 +1216,7 @@ typedef void (QT_FASTCALL *StorePixelsFunc)(uchar *dest, const uint *src, int in extern QPixelLayout qPixelLayouts[QImage::NImageFormats]; extern const FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount]; -extern const StorePixelsFunc qStorePixels[QPixelLayout::BPPCount]; +extern StorePixelsFunc qStorePixels[QPixelLayout::BPPCount]; |