summaryrefslogtreecommitdiffstats
path: root/src/gui/image/qimage_ssse3.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui/image/qimage_ssse3.cpp')
-rw-r--r--src/gui/image/qimage_ssse3.cpp22
1 files changed, 8 insertions, 14 deletions
diff --git a/src/gui/image/qimage_ssse3.cpp b/src/gui/image/qimage_ssse3.cpp
index 0fa0eecd80..9cdfba20e3 100644
--- a/src/gui/image/qimage_ssse3.cpp
+++ b/src/gui/image/qimage_ssse3.cpp
@@ -50,15 +50,11 @@ QT_BEGIN_NAMESPACE
// dst must be at least len * 4 bytes
Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, const uchar *src, int len)
{
- quint32 *const end = dst + len;
+ int i = 0;
- // Prologue, align dst to 16 bytes. The alignment is done on dst because it has 4 store()
- // for each 3 load() of src.
- const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
- const int prologLength = qMin(len, offsetToAlignOn16Bytes);
-
- for (int i = 0; i < prologLength; ++i) {
- *dst++ = qRgb(src[0], src[1], src[2]);
+ // Prologue, align dst to 16 bytes.
+ ALIGNMENT_PROLOGUE_16BYTES(dst, i, len) {
+ dst[i] = qRgb(src[0], src[1], src[2]);
src += 3;
}
@@ -72,10 +68,9 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, con
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
const __m128i *inVectorPtr = (const __m128i *)src;
- __m128i *dstVectorPtr = (__m128i *)dst;
+ __m128i *dstVectorPtr = (__m128i *)(dst + i);
- const int simdRoundCount = (len - prologLength) / 16; // one iteration in the loop converts 16 pixels
- for (int i = 0; i < simdRoundCount; ++i) {
+ for (; i < (len - 15); i += 16) { // one iteration in the loop converts 16 pixels
/*
RGB888 has 5 pixels per vector, + 1 byte from the next pixel. The idea here is
to load vectors of RGB888 and use palignr to select a vector out of two vectors.
@@ -117,10 +112,9 @@ Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32_ssse3(quint32 *dst, con
++dstVectorPtr;
}
src = (const uchar *)inVectorPtr;
- dst = (quint32 *)dstVectorPtr;
- while (dst != end) {
- *dst++ = qRgb(src[0], src[1], src[2]);
+ SIMD_EPILOGUE(i, len, 15) {
+ dst[i] = qRgb(src[0], src[1], src[2]);
src += 3;
}
}