From 8ea61d6d2a63e49a7734fe70a2f2ddd2e14f7ca4 Mon Sep 17 00:00:00 2001 From: Frederik Gladhorn Date: Thu, 22 Oct 2015 14:38:44 +0200 Subject: Fix alignment issues on 32 bit in qConvertA2RGB30PMToARGB64PM_sse2 and qConvertARGB32PMToARGB64PM_sse2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32 bit platforms the pointers may end up being 4 byte aligned. Happens with MSVC on 32 bit Windows. _mm_store_si128 is documented to require 16 byte alignment. Change-Id: I80737fedf9e7f436a51a83924117cc0bc63017cc Reviewed-by: Jędrzej Nowacki --- src/gui/painting/qdrawhelper.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src/gui/painting') diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 64a363868a..6cfc4b9307 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -503,14 +503,16 @@ static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const template static inline void qConvertARGB32PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count) { + if (count <= 0) + return; + const __m128i amask = _mm_set1_epi32(0xff000000); int i = 0; - if (((uintptr_t)buffer & 0xf) && count > 0) { + for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) { uint s = *src++; if (RGBA) s = RGBA2ARGB(s); *buffer++ = QRgba64::fromArgb32(s); - i++; } for (; i < count-3; i += 4) { __m128i vs = _mm_loadu_si128((const __m128i*)src); @@ -641,15 +643,18 @@ static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const ui template static inline void qConvertA2RGB30PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count) { + if (count <= 0) + return; + const __m128i rmask = _mm_set1_epi32(0x3ff00000); const __m128i gmask = _mm_set1_epi32(0x000ffc00); const __m128i bmask = _mm_set1_epi32(0x000003ff); const __m128i afactor = _mm_set1_epi16(0x5555); int i = 0; - if (((uintptr_t)buffer & 0xf) && count > 0) { + + for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) *buffer++ = qConvertA2rgb30ToRgb64(*src++); - i++; - } + for (; i < count-3; i += 4) { __m128i vs = _mm_loadu_si128((const __m128i*)src); src += 4; -- cgit v1.2.3