summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-04-09 10:49:27 +0200
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-04-14 09:37:47 +0000
commitac659cd203bc43c472c58191cba3fe4f96247dfa (patch)
tree4b486f07b5a27c5c9596a288cf8fbd8f8508ef8c /src/gui/painting
parent361051345047a76da15ddc3de71a21b672944837 (diff)
Extend high color rendering to image rendering
A previous commit added 16-bit per color-channel precision to solids and linear gradients. This patch extends that to include texture data. To support that pixellayouts now have a method to convert to RGBA64. Change-Id: I661ae91bd7038085787003608a0af4add057e478 Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/qdrawhelper.cpp1105
-rw-r--r--src/gui/painting/qdrawhelper_p.h3
-rw-r--r--src/gui/painting/qrgba64_p.h33
3 files changed, 1068 insertions, 73 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 2dab2ba859..aad904e8fc 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -185,6 +185,36 @@ static const uint *QT_FASTCALL convertToRGB32(uint *buffer, const uint *src, int
}
template<QImage::Format Format>
+static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+ Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
+ Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
+ Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
+
+ Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
+ Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
+ Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
+
+ Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
+ Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
+ Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
+
+ for (int i = 0; i < count; ++i) {
+ uint red = (src[i] >> redShift<Format>()) & redMask;
+ uint green = (src[i] >> greenShift<Format>()) & greenMask;
+ uint blue = (src[i] >> blueShift<Format>()) & blueMask;
+
+ red = ((red << redLeftShift) | (red >> redRightShift)) << 16;
+ green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8;
+ blue = (blue << blueLeftShift) | (blue >> blueRightShift);
+ buffer[i] = QRgba64::fromRgba(red, green, blue, 255);
+ }
+
+ return buffer;
+}
+
+template<QImage::Format Format>
static const uint *QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -220,6 +250,41 @@ static const uint *QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, const uint
}
template<QImage::Format Format>
+static const QRgba64 *QT_FASTCALL convertARGBPMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+ Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1);
+ Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
+ Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
+ Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
+
+ Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>();
+ Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
+ Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
+ Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
+
+ Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8;
+ Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
+ Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
+ Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
+
+ for (int i = 0; i < count; ++i) {
+ uint alpha = (src[i] >> alphaShift<Format>()) & alphaMask;
+ uint red = (src[i] >> redShift<Format>()) & redMask;
+ uint green = (src[i] >> greenShift<Format>()) & greenMask;
+ uint blue = (src[i] >> blueShift<Format>()) & blueMask;
+
+ alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift);
+ red = qMin(alpha, (red << redLeftShift) | (red >> redRightShift));
+ green = qMin(alpha, (green << greenLeftShift) | (green >> greenRightShift));
+ blue = qMin(alpha, (blue << blueLeftShift) | (blue >> blueRightShift));
+ buffer[i] = QRgba64::fromRgba(red, green, blue, alpha);
+ }
+
+ return buffer;
+}
+
+template<QImage::Format Format>
static const uint *QT_FASTCALL convertRGBFromARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -321,7 +386,8 @@ template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixe
false, bitsPerPixel<Format>(),
convertToRGB32<Format>,
convertRGBFromARGB32PM<Format>,
- convertRGBFromRGB32<Format>
+ convertRGBFromRGB32<Format>,
+ convertToRGB64<Format>
};
}
@@ -335,7 +401,8 @@ template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixe
true, bitsPerPixel<Format>(),
convertARGBPMToARGB32PM<Format>,
convertARGBPMFromARGB32PM<Format>,
- convertARGBPMFromRGB32<Format>
+ convertARGBPMFromRGB32<Format>,
+ convertARGBPMToARGB64PM<Format>
};
}
@@ -350,6 +417,14 @@ static const uint *QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, const uint
return buffer;
}
+static const QRgba64 *QT_FASTCALL convertIndexedToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *clut)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromArgb32(clut[src[i]]).premultiplied();
+ return buffer;
+}
+
static const uint *QT_FASTCALL convertPassThrough(uint *, const uint *src, int,
const QPixelLayout *, const QRgb *)
{
@@ -392,6 +467,22 @@ static const uint *QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, const uint
return buffer;
}
+static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromRgba(0, 0, 0, src[i]);
+ return buffer;
+}
+
+static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromRgba(src[i], src[i], src[i], 255);
+ return buffer;
+}
+
static const uint *QT_FASTCALL convertARGB32FromARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -408,6 +499,111 @@ static const uint *QT_FASTCALL convertRGBA8888PMFromARGB32PM(uint *buffer, const
return buffer;
}
+#ifdef __SSE2__
+template<bool RGBA, bool maskAlpha>
+static inline void qConvertARGB32PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count)
+{
+ const __m128i amask = _mm_set1_epi32(0xff000000);
+ int i = 0;
+ if (((uintptr_t)buffer & 0xf) && count > 0) {
+ uint s = *src++;
+ if (RGBA)
+ s = RGBA2ARGB(s);
+ *buffer++ = QRgba64::fromArgb32(s);
+ i++;
+ }
+ for (; i < count-3; i += 4) {
+ __m128i vs = _mm_loadu_si128((const __m128i*)src);
+ if (maskAlpha)
+ vs = _mm_or_si128(vs, amask);
+ src += 4;
+ __m128i v1 = _mm_unpacklo_epi8(vs, vs);
+ __m128i v2 = _mm_unpackhi_epi8(vs, vs);
+ if (!RGBA) {
+ v1 = _mm_shufflelo_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
+ v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
+ v1 = _mm_shufflehi_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
+ v2 = _mm_shufflehi_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
+ }
+ _mm_store_si128((__m128i*)(buffer), v1);
+ buffer += 2;
+ _mm_store_si128((__m128i*)(buffer), v2);
+ buffer += 2;
+ }
+
+ for (; i < count; ++i) {
+ uint s = *src++;
+ if (RGBA)
+ s = RGBA2ARGB(s);
+ *buffer++ = QRgba64::fromArgb32(s);
+ }
+}
+#endif
+
+static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+#ifdef __SSE2__
+ qConvertARGB32PMToARGB64PM_sse2<false, true>(buffer, src, count);
+#else
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromArgb32(0xff000000 | src[i]);
+#endif
+ return buffer;
+}
+
+static const QRgba64 *QT_FASTCALL convertARGB32ToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+#ifdef __SSE2__
+ qConvertARGB32PMToARGB64PM_sse2<false, false>(buffer, src, count);
+ for (int i = 0; i < count; ++i)
+ buffer[i] = buffer[i].premultiplied();
+#else
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromArgb32(src[i]).premultiplied();
+#endif
+ return buffer;
+}
+
+static const QRgba64 *QT_FASTCALL convertARGB32PMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+#ifdef __SSE2__
+ qConvertARGB32PMToARGB64PM_sse2<false, false>(buffer, src, count);
+#else
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromArgb32(src[i]);
+#endif
+ return buffer;
+}
+
+static const QRgba64 *QT_FASTCALL convertRGBA8888ToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+#ifdef __SSE2__
+ qConvertARGB32PMToARGB64PM_sse2<true, false>(buffer, src, count);
+ for (int i = 0; i < count; ++i)
+ buffer[i] = buffer[i].premultiplied();
+#else
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i])).premultiplied();
+#endif
+ return buffer;
+}
+
+static const QRgba64 *QT_FASTCALL convertRGBA8888PMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+#ifdef __SSE2__
+ qConvertARGB32PMToARGB64PM_sse2<true, false>(buffer, src, count);
+#else
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i]));
+#endif
+ return buffer;
+}
+
static const uint *QT_FASTCALL convertRGBA8888FromARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
{
@@ -441,6 +637,60 @@ static const uint *QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, const ui
return buffer;
}
+#ifdef __SSE2__
+template<QtPixelOrder PixelOrder>
+static inline void qConvertA2RGB30PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count)
+{
+ const __m128i rmask = _mm_set1_epi32(0x3ff00000);
+ const __m128i gmask = _mm_set1_epi32(0x000ffc00);
+ const __m128i bmask = _mm_set1_epi32(0x000003ff);
+ const __m128i afactor = _mm_set1_epi16(0x5555);
+ int i = 0;
+ if (((uintptr_t)buffer & 0xf) && count > 0) {
+ *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
+ i++;
+ }
+ for (; i < count-3; i += 4) {
+ __m128i vs = _mm_loadu_si128((const __m128i*)src);
+ src += 4;
+ __m128i va = _mm_srli_epi32(vs, 30);
+ __m128i vr = _mm_and_si128(vs, rmask);
+ __m128i vb = _mm_and_si128(vs, bmask);
+ __m128i vg = _mm_and_si128(vs, gmask);
+ va = _mm_mullo_epi16(va, afactor);
+ vr = _mm_or_si128(_mm_srli_epi32(vr, 14), _mm_srli_epi32(vr, 24));
+ vg = _mm_or_si128(_mm_srli_epi32(vg, 4), _mm_srli_epi32(vg, 14));
+ vb = _mm_or_si128(_mm_slli_epi32(vb, 6), _mm_srli_epi32(vb, 4));
+ __m128i vrb;
+ if (PixelOrder == PixelOrderRGB)
+ vrb = _mm_or_si128(vr, _mm_slli_si128(vb, 2));
+ else
+ vrb = _mm_or_si128(vb, _mm_slli_si128(vr, 2));
+ __m128i vga = _mm_or_si128(vg, _mm_slli_si128(va, 2));
+ _mm_store_si128((__m128i*)(buffer), _mm_unpacklo_epi16(vrb, vga));
+ buffer += 2;
+ _mm_store_si128((__m128i*)(buffer), _mm_unpackhi_epi16(vrb, vga));
+ buffer += 2;
+ }
+
+ for (; i < count; ++i)
+ *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
+}
+#endif
+
+template<QtPixelOrder PixelOrder>
+static const QRgba64 *QT_FASTCALL convertA2RGB30PMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *, const QRgb *)
+{
+#ifdef __SSE2__
+ qConvertA2RGB30PMToARGB64PM_sse2<PixelOrder>(buffer, src, count);
+#else
+ for (int i = 0; i < count; ++i)
+ buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
+#endif
+ return buffer;
+}
+
template<QtPixelOrder PixelOrder>
static const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM(uint *buffer, const uint *src, int count,
const QPixelLayout *, const QRgb *)
@@ -602,15 +852,15 @@ inline void QT_FASTCALL storePixels<QPixelLayout::BPP32>(uchar *dest, const uint
// convertFromArgb32() assumes that no color channel is more than 8 bits.
// QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits.
QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
- { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPPNone, 0, 0, 0 }, // Format_Invalid
- { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1MSB, convertIndexedToARGB32PM, 0, 0 }, // Format_Mono
- { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1LSB, convertIndexedToARGB32PM, 0, 0 }, // Format_MonoLSB
- { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertIndexedToARGB32PM, 0, 0 }, // Format_Indexed8
+ { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPPNone, 0, 0, 0, 0 }, // Format_Invalid
+ { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1MSB, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_Mono
+ { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP1LSB, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_MonoLSB
+ { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertIndexedToARGB32PM, 0, 0, convertIndexedToARGB64PM }, // Format_Indexed8
// Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong,
// but everywhere this generic conversion would be wrong is currently overloaded.
- { 8, 16, 8, 8, 8, 0, 0, 0, false, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough }, // Format_RGB32
- { 8, 16, 8, 8, 8, 0, 8, 24, false, QPixelLayout::BPP32, convertARGB32ToARGB32PM, convertARGB32FromARGB32PM, convertPassThrough }, // Format_ARGB32
- { 8, 16, 8, 8, 8, 0, 8, 24, true, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough }, // Format_ARGB32_Premultiplied
+ { 8, 16, 8, 8, 8, 0, 0, 0, false, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough, convertRGB32ToRGB64 }, // Format_RGB32
+ { 8, 16, 8, 8, 8, 0, 8, 24, false, QPixelLayout::BPP32, convertARGB32ToARGB32PM, convertARGB32FromARGB32PM, convertPassThrough, convertARGB32ToARGB64PM }, // Format_ARGB32
+ { 8, 16, 8, 8, 8, 0, 8, 24, true, QPixelLayout::BPP32, convertPassThrough, convertPassThrough, convertPassThrough, convertARGB32PMToARGB64PM }, // Format_ARGB32_Premultiplied
#ifdef Q_COMPILER_CONSTEXPR
pixelLayoutRGB<QImage::Format_RGB16>(),
pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(),
@@ -625,55 +875,73 @@ QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
{ 5, 11, 6, 5, 5, 0, 0, 0, false, QPixelLayout::BPP16,
convertToRGB32<QImage::Format_RGB16>,
convertRGBFromARGB32PM<QImage::Format_RGB16>,
- convertRGBFromRGB32<QImage::Format_RGB16>},
+ convertRGBFromRGB32<QImage::Format_RGB16>,
+ convertToRGB64<QImage::Format_RGB16>,
+ },
{ 5, 19, 6, 13, 5, 8, 8, 0, true, QPixelLayout::BPP24,
convertARGBPMToARGB32PM<QImage::Format_ARGB8565_Premultiplied>,
convertARGBPMFromARGB32PM<QImage::Format_ARGB8565_Premultiplied>,
- convertARGBPMFromRGB32<QImage::Format_ARGB8565_Premultiplied>},
+ convertARGBPMFromRGB32<QImage::Format_ARGB8565_Premultiplied>,
+ convertARGBPMToARGB64PM<QImage::Format_ARGB8565_Premultiplied>,
+ },
{ 6, 12, 6, 6, 6, 0, 0, 0, false, QPixelLayout::BPP24,
convertToRGB32<QImage::Format_RGB666>,
convertRGBFromARGB32PM<QImage::Format_RGB666>,
- convertRGBFromRGB32<QImage::Format_RGB666>},
+ convertRGBFromRGB32<QImage::Format_RGB666>,
+ convertToRGB64<QImage::Format_RGB666>,
+ },
{ 6, 12, 6, 6, 6, 0, 6, 18, true, QPixelLayout::BPP24,
convertARGBPMToARGB32PM<QImage::Format_ARGB6666_Premultiplied>,
convertARGBPMFromARGB32PM<QImage::Format_ARGB6666_Premultiplied>,
- convertARGBPMFromRGB32<QImage::Format_ARGB6666_Premultiplied>},
+ convertARGBPMFromRGB32<QImage::Format_ARGB6666_Premultiplied>,
+ convertARGBPMToARGB64PM<QImage::Format_ARGB6666_Premultiplied>,
+ },
{ 5, 10, 5, 5, 5, 0, 0, 0, false, QPixelLayout::BPP16,
convertToRGB32<QImage::Format_RGB555>,
convertRGBFromARGB32PM<QImage::Format_RGB555>,
- convertRGBFromRGB32<QImage::Format_RGB555> },
+ convertRGBFromRGB32<QImage::Format_RGB555>,
+ convertToRGB64<QImage::Format_RGB555>,
+ },
{ 5, 18, 5, 13, 5, 8, 8, 0, true, QPixelLayout::BPP24,
convertARGBPMToARGB32PM<QImage::Format_ARGB8555_Premultiplied>,
convertARGBPMFromARGB32PM<QImage::Format_ARGB8555_Premultiplied>,
- convertARGBPMFromRGB32<QImage::Format_ARGB8555_Premultiplied>},
+ convertARGBPMFromRGB32<QImage::Format_ARGB8555_Premultiplied>,
+ convertARGBPMToARGB64PM<QImage::Format_ARGB8555_Premultiplied>,
+ },
{ 8, 16, 8, 8, 8, 0, 0, 0, false, QPixelLayout::BPP24,
convertToRGB32<QImage::Format_RGB888>,
convertRGBFromARGB32PM<QImage::Format_RGB888>,
- convertRGBFromRGB32<QImage::Format_RGB888>},
+ convertRGBFromRGB32<QImage::Format_RGB888>,
+ convertToRGB64<QImage::Format_RGB888>,
+ },
{ 4, 8, 4, 4, 4, 0, 0, 0, false, QPixelLayout::BPP16,
convertToRGB32<QImage::Format_RGB444>,
convertRGBFromARGB32PM<QImage::Format_RGB444>,
- convertRGBFromRGB32<QImage::Format_RGB444>},
+ convertRGBFromRGB32<QImage::Format_RGB444>,
+ convertToRGB64<QImage::Format_RGB444>,
+ },
{ 4, 8, 4, 4, 4, 0, 4, 12, true, QPixelLayout::BPP16,
convertARGBPMToARGB32PM<QImage::Format_ARGB4444_Premultiplied>,
convertARGBPMFromARGB32PM<QImage::Format_ARGB4444_Premultiplied>,
- convertARGBPMFromRGB32<QImage::Format_ARGB4444_Premultiplied>},
+ convertARGBPMFromRGB32<QImage::Format_ARGB4444_Premultiplied>,
+ convertARGBPMToARGB64PM<QImage::Format_ARGB4444_Premultiplied>,
+ },
#endif
#if Q_BYTE_ORDER == Q_BIG_ENDIAN
- { 8, 24, 8, 16, 8, 8, 0, 0, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32 }, // Format_RGBX8888
- { 8, 24, 8, 16, 8, 8, 8, 0, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32 }, // Format_RGBA8888
- { 8, 24, 8, 16, 8, 8, 8, 0, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32}, // Format_RGBA8888_Premultiplied
+ { 8, 24, 8, 16, 8, 8, 0, 0, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBX8888
+ { 8, 24, 8, 16, 8, 8, 8, 0, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32, convertRGBA8888ToARGB64PM }, // Format_RGBA8888
+ { 8, 24, 8, 16, 8, 8, 8, 0, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM}, // Format_RGBA8888_Premultiplied
#else
- { 8, 0, 8, 8, 8, 16, 0, 24, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32 }, // Format_RGBX8888
- { 8, 0, 8, 8, 8, 16, 8, 24, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32 }, // Format_RGBA8888 (ABGR32)
- { 8, 0, 8, 8, 8, 16, 8, 24, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied
+ { 8, 0, 8, 8, 8, 16, 0, 24, false, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBXFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBX8888
+ { 8, 0, 8, 8, 8, 16, 8, 24, false, QPixelLayout::BPP32, convertRGBA8888ToARGB32PM, convertRGBA8888FromARGB32PM, convertRGBXFromRGB32, convertRGBA8888ToARGB64PM }, // Format_RGBA8888 (ABGR32)
+ { 8, 0, 8, 8, 8, 16, 8, 24, true, QPixelLayout::BPP32, convertRGBA8888PMToARGB32PM, convertRGBA8888PMFromARGB32PM, convertRGBXFromRGB32, convertRGBA8888PMToARGB64PM }, // Format_RGBA8888_Premultiplied
#endif
- { 10, 20, 10, 10, 10, 0, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertRGB30FromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR> }, // Format_BGR30
- { 10, 20, 10, 10, 10, 0, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertA2RGB30PMFromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR> }, // Format_A2BGR30_Premultiplied
- { 10, 0, 10, 10, 10, 20, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertRGB30FromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB> }, // Format_RGB30
- { 10, 0, 10, 10, 10, 20, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertA2RGB30PMFromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB> }, // Format_A2RGB30_Premultiplied
- { 0, 0, 0, 0, 0, 0, 8, 0, false, QPixelLayout::BPP8, convertAlpha8ToRGB32, convertAlpha8FromARGB32PM, 0 }, // Format_Alpha8
- { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertGrayscale8ToRGB32, convertGrayscale8FromARGB32PM, convertGrayscale8FromRGB32 } // Format_Grayscale8
+ { 10, 20, 10, 10, 10, 0, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertRGB30FromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR>, convertA2RGB30PMToARGB64PM<PixelOrderBGR> }, // Format_BGR30
+ { 10, 20, 10, 10, 10, 0, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderBGR>, convertA2RGB30PMFromARGB32PM<PixelOrderBGR>, convertRGB30FromRGB32<PixelOrderBGR>, convertA2RGB30PMToARGB64PM<PixelOrderBGR> }, // Format_A2BGR30_Premultiplied
+ { 10, 0, 10, 10, 10, 20, 0, 30, false, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertRGB30FromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB>, convertA2RGB30PMToARGB64PM<PixelOrderRGB> }, // Format_RGB30
+ { 10, 0, 10, 10, 10, 20, 2, 30, true, QPixelLayout::BPP32, convertA2RGB30PMToARGB32PM<PixelOrderRGB>, convertA2RGB30PMFromARGB32PM<PixelOrderRGB>, convertRGB30FromRGB32<PixelOrderRGB>, convertA2RGB30PMToARGB64PM<PixelOrderRGB> }, // Format_A2RGB30_Premultiplied
+ { 0, 0, 0, 0, 0, 0, 8, 0, false, QPixelLayout::BPP8, convertAlpha8ToRGB32, convertAlpha8FromARGB32PM, 0, convertAlpha8ToRGB64 }, // Format_Alpha8
+ { 0, 0, 0, 0, 0, 0, 0, 0, false, QPixelLayout::BPP8, convertGrayscale8ToRGB32, convertGrayscale8FromARGB32PM, convertGrayscale8FromRGB32, convertGrayscale8ToRGB64 } // Format_Grayscale8
};
FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount] = {
@@ -764,29 +1032,14 @@ static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBu
const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
uint buffer32[buffer_size];
const uint *ptr = qFetchPixels[layout->bpp](buffer32, rasterBuffer->scanLine(y), x, length);
- ptr = const_cast<uint *>(layout->convertToARGB32PM(buffer32, ptr, length, layout, 0));
-
- for (int i = 0; i < length; ++i) {
- buffer[i] = QRgba64::fromArgb32(ptr[i]);
- }
- return buffer;
-}
-
-static QRgba64 *QT_FASTCALL destFetch64ARGB32(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
-{
- const uint *src = ((const uint *)rasterBuffer->scanLine(y)) + x;
- for (int i = 0; i < length; ++i)
- buffer[i] = QRgba64::fromArgb32(src[i]).premultiplied();
- return buffer;
+ return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, ptr, length, layout, 0));
}
-template<QtPixelOrder PixelOrder>
-static QRgba64 *QT_FASTCALL destFetch64RGB30(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
+static QRgba64 *QT_FASTCALL destFetch64uint32(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
{
+ const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
const uint *src = ((const uint *)rasterBuffer->scanLine(y)) + x;
- for (int i = 0; i < length; ++i)
- buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
- return buffer;
+ return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, src, length, layout, 0));
}
static DestFetchProc destFetchProc[QImage::NImageFormats] =
@@ -824,9 +1077,9 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
destFetch64, // Format_Mono,
destFetch64, // Format_MonoLSB
0, // Format_Indexed8
- destFetch64, // Format_RGB32
- destFetch64ARGB32, // Format_ARGB32,
- destFetch64, // Format_ARGB32_Premultiplied
+ destFetch64uint32, // Format_RGB32
+ destFetch64uint32, // Format_ARGB32,
+ destFetch64uint32, // Format_ARGB32_Premultiplied
destFetch64, // Format_RGB16
destFetch64, // Format_ARGB8565_Premultiplied
destFetch64, // Format_RGB666
@@ -836,13 +1089,13 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
destFetch64, // Format_RGB888
destFetch64, // Format_RGB444
destFetch64, // Format_ARGB4444_Premultiplied
- destFetch64, // Format_RGBX8888
- destFetch64, // Format_RGBA8888
- destFetch64, // Format_RGBA8888_Premultiplied
- destFetch64RGB30<PixelOrderBGR>, // Format_BGR30
- destFetch64RGB30<PixelOrderBGR>, // Format_A2BGR30_Premultiplied
- destFetch64RGB30<PixelOrderRGB>, // Format_RGB30
- destFetch64RGB30<PixelOrderRGB>, // Format_A2RGB30_Premultiplied
+ destFetch64uint32, // Format_RGBX8888
+ destFetch64uint32, // Format_RGBA8888
+ destFetch64uint32, // Format_RGBA8888_Premultiplied
+ destFetch64uint32, // Format_BGR30
+ destFetch64uint32, // Format_A2BGR30_Premultiplied
+ destFetch64uint32, // Format_RGB30
+ destFetch64uint32, // Format_A2RGB30_Premultiplied
destFetch64, // Format_Alpha8
destFetch64, // Format_Grayscale8
};
@@ -991,6 +1244,40 @@ static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, c
}
}
+#ifdef __SSE2__
+template<QtPixelOrder PixelOrder>
+static inline void qConvertARGB64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count)
+{
+ const __m128i gmask = _mm_set1_epi32(0x000ffc00);
+ const __m128i cmask = _mm_set1_epi32(0x000003ff);
+ int i = 0;
+ __m128i vr, vg, vb, va;
+ for (; i < count-1; i += 2) {
+ __m128i vs = _mm_loadu_si128((const __m128i*)buffer);
+ buffer += 2;
+ vr = _mm_srli_epi64(vs, 6);
+ vg = _mm_srli_epi64(vs, 16 + 6 - 10);
+ vb = _mm_srli_epi64(vs, 32 + 6);
+ vr = _mm_and_si128(vr, cmask);
+ vg = _mm_and_si128(vg, gmask);
+ vb = _mm_and_si128(vb, cmask);
+ va = _mm_srli_epi64(vs, 48 + 14);
+ if (PixelOrder == PixelOrderRGB)
+ vr = _mm_slli_epi32(vr, 20);
+ else
+ vb = _mm_slli_epi32(vb, 20);
+ va = _mm_slli_epi32(va, 30);
+ __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
+ vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
+ _mm_storel_epi64((__m128i*)dest, vd);
+ dest += 2;
+ }
+
+ for (; i < count; ++i)
+ *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
+}
+#endif
+
static void QT_FASTCALL destStore64ARGB32(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
{
uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
@@ -999,13 +1286,25 @@ static void QT_FASTCALL destStore64ARGB32(QRasterBuffer *rasterBuffer, int x, in
}
}
+static void QT_FASTCALL destStore64RGBA8888(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
+{
+ uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
+ for (int i = 0; i < length; ++i) {
+ dest[i] = ARGB2RGBA(buffer[i].unpremultiplied().toArgb32());
+ }
+}
+
template<QtPixelOrder PixelOrder>
static void QT_FASTCALL destStore64RGB30(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
{
uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
+#ifdef __SSE2__
+ qConvertARGB64PMToA2RGB30PM_sse2<PixelOrder>(dest, buffer, length);
+#else
for (int i = 0; i < length; ++i) {
dest[i] = qConvertRgb64ToRgb30<PixelOrder>(buffer[i]);
}
+#endif
}
static DestStoreProc destStoreProc[QImage::NImageFormats] =
@@ -1056,7 +1355,7 @@ static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
destStore64, // Format_RGB444
destStore64, // Format_ARGB4444_Premultiplied
destStore64, // Format_RGBX8888
- destStore64, // Format_RGBA8888
+ destStore64RGBA8888, // Format_RGBA8888
destStore64, // Format_RGBA8888_Premultiplied
destStore64RGB30<PixelOrderBGR>, // Format_BGR30
destStore64RGB30<PixelOrderBGR>, // Format_A2BGR30_Premultiplied
@@ -1121,6 +1420,21 @@ static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Opera
return buffer;
}
+static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *,
+ const QSpanData *data, int y, int x, int length)
+{
+ const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
+ const QRgb *clut = data->texture.colorTable ? data->texture.colorTable->constData() : 0;
+ if (layout->bpp != QPixelLayout::BPP32) {
+ uint buffer32[buffer_size];
+ const uint *ptr = qFetchPixels[layout->bpp](buffer32, data->texture.scanLine(y), x, length);
+ return layout->convertToARGB64PM(buffer, ptr, length, layout, clut);
+ } else {
+ const uint *src = (const uint *)data->texture.scanLine(y) + x;
+ return layout->convertToARGB64PM(buffer, src, length, layout, clut);
+ }
+}
+
// blendType is either BlendTransformed or BlendTransformedTiled
template<TextureBlendType blendType>
static const uint *QT_FASTCALL fetchTransformedARGB32PM(uint *buffer, const Operator *, const QSpanData *data,
@@ -1288,6 +1602,111 @@ static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *,
return layout->convertToARGB32PM(buffer, buffer, length, layout, clut);
}
+template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
+static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
+ int y, int x, int length)
+{
+ int image_width = data->texture.width;
+ int image_height = data->texture.height;
+
+ const qreal cx = x + qreal(0.5);
+ const qreal cy = y + qreal(0.5);
+
+ const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
+ FetchPixelFunc fetch = qFetchPixel[layout->bpp];
+ const QRgb *clut = data->texture.colorTable ? data->texture.colorTable->constData() : 0;
+
+ uint buffer32[buffer_size];
+ QRgba64 *b = buffer;
+ if (data->fast_matrix) {
+ // The increment pr x in the scanline
+ int fdx = (int)(data->m11 * fixed_scale);
+ int fdy = (int)(data->m12 * fixed_scale);
+
+ int fx = int((data->m21 * cy
+ + data->m11 * cx + data->dx) * fixed_scale);
+ int fy = int((data->m22 * cy
+ + data->m12 * cx + data->dy) * fixed_scale);
+
+ int i = 0, j = 0;
+ while (i < length) {
+ if (j == buffer_size) {
+ layout->convertToARGB64PM(b, buffer32, buffer_size, layout, clut);
+ b += buffer_size;
+ j = 0;
+ }
+ int px = fx >> 16;
+ int py = fy >> 16;
+
+ if (blendType == BlendTransformedTiled) {
+ px %= image_width;
+ py %= image_height;
+ if (px < 0) px += image_width;
+ if (py < 0) py += image_height;
+ } else {
+ px = qBound(0, px, image_width - 1);
+ py = qBound(0, py, image_height - 1);
+ }
+ buffer32[j] = fetch(data->texture.scanLine(py), px);
+
+ fx += fdx;
+ fy += fdy;
+ ++i; ++j;
+ }
+ if (j > 0) {
+ layout->convertToARGB64PM(b, buffer32, j, layout, clut);
+ b += j;
+ }
+ } else {
+ const qreal fdx = data->m11;
+ const qreal fdy = data->m12;
+ const qreal fdw = data->m13;
+
+ qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
+ qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
+ qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
+
+ int i = 0, j = 0;
+ while (i < length) {
+ if (j == buffer_size) {
+ layout->convertToARGB64PM(b, buffer32, buffer_size, layout, clut);
+ b += buffer_size;
+ j = 0;
+ }
+ const qreal iw = fw == 0 ? 1 : 1 / fw;
+ const qreal tx = fx * iw;
+ const qreal ty = fy * iw;
+ int px = int(tx) - (tx < 0);
+ int py = int(ty) - (ty < 0);
+
+ if (blendType == BlendTransformedTiled) {
+ px %= image_width;
+ py %= image_height;
+ if (px < 0) px += image_width;
+ if (py < 0) py += image_height;
+ } else {
+ px = qBound(0, px, image_width - 1);
+ py = qBound(0, py, image_height - 1);
+ }
+ buffer32[j] = fetch(data->texture.scanLine(py), px);
+
+ fx += fdx;
+ fy += fdy;
+ fw += fdw;
+ //force increment to avoid /0
+ if (!fw) {
+ fw += fdw;
+ }
+ ++i; ++j;
+ }
+ if (j > 0) {
+ layout->convertToARGB64PM(b, buffer32, j, layout, clut);
+ b += j;
+ }
+ }
+ return buffer;
+}
+
/** \internal
interpolate 4 argb pixels with the distx and disty factor.
distx and disty bust be between 0 and 16
@@ -1415,6 +1834,41 @@ static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint
}
#endif
+#if defined(__SSE2__)
+static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty)
+{
+ const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
+ const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
+
+ __m128i vt = _mm_loadu_si128((const __m128i*)t);
+ if (disty) {
+ __m128i vb = _mm_loadu_si128((const __m128i*)b);
+ vt = _mm_mulhi_epu16(vt, _mm_set1_epi16(0x10000 - disty));
+ vb = _mm_mulhi_epu16(vb, _mm_set1_epi16(disty));
+ vt = _mm_add_epi16(vt, vb);
+ }
+ vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
+ vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+#ifdef Q_PROCESSOR_X86_64
+ return QRgba64::fromRgba64(_mm_cvtsi128_si64(vt));
+#else
+ QRgba64 out;
+ _mm_storel_epi64((__m128i*)&out, vt);
+ return out;
+#endif
+}
+#else
+static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty)
+{
+ const uint dx = distx>>8;
+ const uint dy = disty>>8;
+ const uint idx = 256 - dx;
+ const uint idy = 256 - dy;
+ QRgba64 xtop = interpolate256(t[0], idx, t[1], dx);
+ QRgba64 xbot = interpolate256(b[0], idx, b[1], dx);
+ return interpolate256(xtop, idy, xbot, dy);
+}
+#endif
template<TextureBlendType blendType>
void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
@@ -2280,6 +2734,349 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
return buffer;
}
+template<TextureBlendType blendType>
+static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
+ const QSpanData *data, int y, int x, int length)
+{
+ const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
+ const QRgb *clut = data->texture.colorTable ? data->texture.colorTable->constData() : 0;
+
+ int image_width = data->texture.width;
+ int image_height = data->texture.height;
+
+ int image_x1 = data->texture.x1;
+ int image_y1 = data->texture.y1;
+ int image_x2 = data->texture.x2 - 1;
+ int image_y2 = data->texture.y2 - 1;
+
+ const qreal cx = x + qreal(0.5);
+ const qreal cy = y + qreal(0.5);
+
+ const qreal fdx = data->m11;
+ const qreal fdy = data->m12;
+ const qreal fdw = data->m13;
+
+ if (data->fast_matrix) {
+ // The increment pr x in the scanline
+ int fdx = (int)(data->m11 * fixed_scale);
+ int fdy = (int)(data->m12 * fixed_scale);
+
+ int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
+ int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
+
+ fx -= half_point;
+ fy -= half_point;
+
+ if (fdy == 0) { //simple scale, no rotation
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+ const uchar *s1 = data->texture.scanLine(y1);
+ const uchar *s2 = data->texture.scanLine(y2);
+
+ FetchPixelFunc fetch = qFetchPixel[layout->bpp];
+ uint sbuf1[buffer_size];
+ uint sbuf2[buffer_size];
+ QRgba64 buf1[buffer_size];
+ QRgba64 buf2[buffer_size];
+ QRgba64 *b = buffer;
+ while (length) {
+ int len = qMin(length, buffer_size / 2);
+ int fracX = fx;
+ int i = 0;
+ int disty = (fy & 0x0000ffff);
+#if defined(__SSE2__)
+ const __m128i vdy = _mm_set1_epi16(disty);
+ const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
+ if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) {
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
+ if (x1 != x2)
+ break;
+ sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
+ sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
+ sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
+ sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
+ fx += fdx;
+ }
+
+ const __m128i v_fdx = _mm_set1_epi32(fdx*4);
+ __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
+ for (; i < len-3; i+=4) {
+ int offset = _mm_extract_epi16(v_fx, 1);
+ sbuf1[i * 2 + 0] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 0] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1];
+ offset = _mm_extract_epi16(v_fx, 3);
+ sbuf1[i * 2 + 2] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 2] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1];
+ offset = _mm_extract_epi16(v_fx, 5);
+ sbuf1[i * 2 + 4] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 4] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1];
+ offset = _mm_extract_epi16(v_fx, 7);
+ sbuf1[i * 2 + 6] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 6] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1];
+ v_fx = _mm_add_epi32(v_fx, v_fdx);
+ }
+ fx = _mm_cvtsi128_si32(v_fx);
+ }
+#endif
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
+
+ if (layout->bpp == QPixelLayout::BPP32) {
+ sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
+ sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
+ sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
+ sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
+
+ } else {
+ sbuf1[i * 2 + 0] = fetch(s1, x1);
+ sbuf1[i * 2 + 1] = fetch(s1, x2);
+ sbuf2[i * 2 + 0] = fetch(s2, x1);
+ sbuf2[i * 2 + 1] = fetch(s2, x2);
+ }
+
+ fx += fdx;
+ }
+ layout->convertToARGB64PM(buf1, sbuf1, len * 2, layout, clut);
+ if (disty)
+ layout->convertToARGB64PM(buf2, sbuf2, len * 2, layout, clut);
+
+ for (int i = 0; i < len; ++i) {
+ int distx = (fracX & 0x0000ffff);
+#if defined(__SSE2__)
+ const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
+ const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
+ __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2));
+ if (disty) {
+ __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2));
+ vt = _mm_mulhi_epu16(vt, vidy);
+ vb = _mm_mulhi_epu16(vb, vdy);
+ vt = _mm_add_epi16(vt, vb);
+ }
+ vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
+ vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+ _mm_storel_epi64((__m128i*)(b+i), vt);
+#else
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
+#endif
+ fracX += fdx;
+ }
+ length -= len;
+ b += len;
+ }
+ } else { //rotation
+ FetchPixelFunc fetch = qFetchPixel[layout->bpp];
+ uint sbuf1[buffer_size];
+ uint sbuf2[buffer_size];
+ QRgba64 buf1[buffer_size];
+ QRgba64 buf2[buffer_size];
+ QRgba64 *end = buffer + length;
+ QRgba64 *b = buffer;
+
+ while (b < end) {
+ int len = qMin(length, buffer_size / 2);
+ int fracX = fx;
+ int fracY = fy;
+ int i = 0;
+#if defined(__SSE2__)
+ if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) {
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+ if (x1 != x2 && y1 != y2)
+ break;
+ const uchar *s1 = data->texture.scanLine(y1);
+ const uchar *s2 = data->texture.scanLine(y2);
+ sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
+ sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
+ sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
+ sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
+ fx += fdx;
+ fy += fdy;
+ }
+
+ const __m128i v_fdx = _mm_set1_epi32(fdx*4);
+ const __m128i v_fdy = _mm_set1_epi32(fdy*4);
+ __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
+ __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
+ const int bytesPerLine = data->texture.bytesPerLine;
+ const uchar *s1 = data->texture.imageData;
+ const uchar *s2 = s1 + bytesPerLine;
+ const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
+ for (; i < len-3; i+=4) {
+ if (fdx > 0 && (short)_mm_extract_epi16(v_fx, 7) >= image_x2)
+ break;
+ if (fdx < 0 && (short)_mm_extract_epi16(v_fx, 7) < image_x1)
+ break;
+ if (fdy > 0 && (short)_mm_extract_epi16(v_fy, 7) >= image_y2)
+ break;
+ if (fdy < 0 && (short)_mm_extract_epi16(v_fy, 7) < image_y1)
+ break;
+ const __m128i vy = _mm_packs_epi32(_mm_srai_epi32(v_fy, 16), _mm_setzero_si128());
+ __m128i voffset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epu16(vy, vbpl));
+ voffset = _mm_add_epi32(voffset, _mm_srli_epi32(v_fx, 16));
+
+ int offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
+ sbuf1[i * 2 + 0] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 0] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1];
+ offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
+ sbuf1[i * 2 + 2] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 2] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1];
+ offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
+ sbuf1[i * 2 + 4] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 4] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1];
+ offset = _mm_cvtsi128_si32(voffset);
+ sbuf1[i * 2 + 6] = ((const uint*)s1)[offset];
+ sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1];
+ sbuf2[i * 2 + 6] = ((const uint*)s2)[offset];
+ sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1];
+
+ v_fx = _mm_add_epi32(v_fx, v_fdx);
+ v_fy = _mm_add_epi32(v_fy, v_fdy);
+ }
+ fx = _mm_cvtsi128_si32(v_fx);
+ fy = _mm_cvtsi128_si32(v_fy);
+ }
+#endif
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+
+ const uchar *s1 = data->texture.scanLine(y1);
+ const uchar *s2 = data->texture.scanLine(y2);
+
+ if (layout->bpp == QPixelLayout::BPP32) {
+ sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
+ sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
+ sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
+ sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
+
+ } else {
+ sbuf1[i * 2 + 0] = fetch(s1, x1);
+ sbuf1[i * 2 + 1] = fetch(s1, x2);
+ sbuf2[i * 2 + 0] = fetch(s2, x1);
+ sbuf2[i * 2 + 1] = fetch(s2, x2);
+ }
+
+ fx += fdx;
+ fy += fdy;
+ }
+ layout->convertToARGB64PM(buf1, sbuf1, len * 2, layout, clut);
+ layout->convertToARGB64PM(buf2, sbuf2, len * 2, layout, clut);
+
+ for (int i = 0; i < len; ++i) {
+ int distx = (fracX & 0x0000ffff);
+ int disty = (fracY & 0x0000ffff);
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
+ fracX += fdx;
+ fracY += fdy;
+ }
+
+ length -= len;
+ b += len;
+ }
+ }
+ } else {
+ qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
+ qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
+ qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
+
+ FetchPixelFunc fetch = qFetchPixel[layout->bpp];
+ uint sbuf1[buffer_size];
+ uint sbuf2[buffer_size];
+ QRgba64 buf1[buffer_size];
+ QRgba64 buf2[buffer_size];
+ QRgba64 *b = buffer;
+
+ int distxs[buffer_size / 2];
+ int distys[buffer_size / 2];
+
+ while (length) {
+ int len = qMin(length, buffer_size / 2);
+ for (int i = 0; i < len; ++i) {
+ const qreal iw = fw == 0 ? 1 : 1 / fw;
+ const qreal px = fx * iw - qreal(0.5);
+ const qreal py = fy * iw - qreal(0.5);
+
+ int x1 = int(px) - (px < 0);
+ int x2;
+ int y1 = int(py) - (py < 0);
+ int y2;
+
+ distxs[i] = int((px - x1) * (1<<16));
+ distys[i] = int((py - y1) * (1<<16));
+
+ fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+
+ const uchar *s1 = data->texture.scanLine(y1);
+ const uchar *s2 = data->texture.scanLine(y2);
+
+ if (layout->bpp == QPixelLayout::BPP32) {
+ sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
+ sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
+ sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
+ sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
+
+ } else {
+ sbuf1[i * 2 + 0] = fetch(s1, x1);
+ sbuf1[i * 2 + 1] = fetch(s1, x2);
+ sbuf2[i * 2 + 0] = fetch(s2, x1);
+ sbuf2[i * 2 + 1] = fetch(s2, x2);
+ }
+
+ fx += fdx;
+ fy += fdy;
+ fw += fdw;
+ //force increment to avoid /0
+ if (!fw)
+ fw += fdw;
+ }
+
+ layout->convertToARGB64PM(buf1, sbuf1, len * 2, layout, clut);
+ layout->convertToARGB64PM(buf2, sbuf2, len * 2, layout, clut);
+
+ for (int i = 0; i < len; ++i) {
+ int distx = distxs[i];
+ int disty = distys[i];
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
+ }
+
+ length -= len;
+ b += len;
+ }
+ }
+
+ return buffer;
+}
+
static SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
// Untransformed
{
@@ -2448,6 +3245,174 @@ static SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
},
};
+static SourceFetchProc64 sourceFetch64[NBlendTypes][QImage::NImageFormats] = {
+ // Untransformed
+ {
+ 0, // Invalid
+ fetchUntransformed64, // Mono
+ fetchUntransformed64, // MonoLsb
+ fetchUntransformed64, // Indexed8
+ fetchUntransformed64, // RGB32
+ fetchUntransformed64, // ARGB32
+ fetchUntransformed64, // ARGB32_Premultiplied
+ fetchUntransformed64, // RGB16
+ fetchUntransformed64, // ARGB8565_Premultiplied
+ fetchUntransformed64, // RGB666
+ fetchUntransformed64, // ARGB6666_Premultiplied
+ fetchUntransformed64, // RGB555
+ fetchUntransformed64, // ARGB8555_Premultiplied
+ fetchUntransformed64, // RGB888
+ fetchUntransformed64, // RGB444
+ fetchUntransformed64, // ARGB4444_Premultiplied
+ fetchUntransformed64, // RGBX8888
+ fetchUntransformed64, // RGBA8888
+ fetchUntransformed64, // RGBA8888_Premultiplied
+ fetchUntransformed64, // Format_BGR30
+ fetchUntransformed64, // Format_A2BGR30_Premultiplied
+ fetchUntransformed64, // Format_RGB30
+ fetchUntransformed64, // Format_A2RGB30_Premultiplied
+ fetchUntransformed64, // Alpha8
+ fetchUntransformed64, // Grayscale8
+ },
+ // Tiled
+ {
+ 0, // Invalid
+ fetchUntransformed64, // Mono
+ fetchUntransformed64, // MonoLsb
+ fetchUntransformed64, // Indexed8
+ fetchUntransformed64, // RGB32
+ fetchUntransformed64, // ARGB32
+ fetchUntransformed64, // ARGB32_Premultiplied
+ fetchUntransformed64, // RGB16
+ fetchUntransformed64, // ARGB8565_Premultiplied
+ fetchUntransformed64, // RGB666
+ fetchUntransformed64, // ARGB6666_Premultiplied
+ fetchUntransformed64, // RGB555
+ fetchUntransformed64, // ARGB8555_Premultiplied
+ fetchUntransformed64, // RGB888
+ fetchUntransformed64, // RGB444
+ fetchUntransformed64, // ARGB4444_Premultiplied
+ fetchUntransformed64, // RGBX8888
+ fetchUntransformed64, // RGBA8888
+ fetchUntransformed64, // RGBA8888_Premultiplied
+ fetchUntransformed64, // BGR30
+ fetchUntransformed64, // A2BGR30_Premultiplied
+ fetchUntransformed64, // RGB30
+ fetchUntransformed64, // A2RGB30_Premultiplied
+ fetchUntransformed64, // Alpha8
+ fetchUntransformed64, // Grayscale8
+ },
+ // Transformed
+ {
+ 0, // Invalid
+ fetchTransformed64<BlendTransformed>, // Mono
+ fetchTransformed64<BlendTransformed>, // MonoLsb
+ fetchTransformed64<BlendTransformed>, // Indexed8
+ fetchTransformed64<BlendTransformed>, // RGB32
+ fetchTransformed64<BlendTransformed>, // ARGB32
+ fetchTransformed64<BlendTransformed>, // ARGB32_Premultiplied
+ fetchTransformed64<BlendTransformed>, // RGB16
+ fetchTransformed64<BlendTransformed>, // ARGB8565_Premultiplied
+ fetchTransformed64<BlendTransformed>, // RGB666
+ fetchTransformed64<BlendTransformed>, // ARGB6666_Premultiplied
+ fetchTransformed64<BlendTransformed>, // RGB555
+ fetchTransformed64<BlendTransformed>, // ARGB8555_Premultiplied
+ fetchTransformed64<BlendTransformed>, // RGB888
+ fetchTransformed64<BlendTransformed>, // RGB444
+ fetchTransformed64<BlendTransformed>, // ARGB4444_Premultiplied
+ fetchTransformed64<BlendTransformed>, // RGBX8888
+ fetchTransformed64<BlendTransformed>, // RGBA8888
+ fetchTransformed64<BlendTransformed>, // RGBA8888_Premultiplied
+ fetchTransformed64<BlendTransformed>, // BGR30
+ fetchTransformed64<BlendTransformed>, // A2BGR30_Premultiplied
+ fetchTransformed64<BlendTransformed>, // RGB30
+ fetchTransformed64<BlendTransformed>, // A2RGB30_Premultiplied
+ fetchTransformed64<BlendTransformed>, // Alpah8
+ fetchTransformed64<BlendTransformed>, // Grayscale8
+ },
+ {
+ 0, // TransformedTiled
+ fetchTransformed64<BlendTransformedTiled>, // Mono
+ fetchTransformed64<BlendTransformedTiled>, // MonoLsb
+ fetchTransformed64<BlendTransformedTiled>, // Indexed8
+ fetchTransformed64<BlendTransformedTiled>, // RGB32
+ fetchTransformed64<BlendTransformedTiled>, // ARGB32
+ fetchTransformed64<BlendTransformedTiled>, // ARGB32_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // RGB16
+ fetchTransformed64<BlendTransformedTiled>, // ARGB8565_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // RGB666
+ fetchTransformed64<BlendTransformedTiled>, // ARGB6666_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // RGB555
+ fetchTransformed64<BlendTransformedTiled>, // ARGB8555_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // RGB888
+ fetchTransformed64<BlendTransformedTiled>, // RGB444
+ fetchTransformed64<BlendTransformedTiled>, // ARGB4444_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // RGBX8888
+ fetchTransformed64<BlendTransformedTiled>, // RGBA8888
+ fetchTransformed64<BlendTransformedTiled>, // RGBA8888_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // BGR30
+ fetchTransformed64<BlendTransformedTiled>, // A2BGR30_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // RGB30
+ fetchTransformed64<BlendTransformedTiled>, // A2RGB30_Premultiplied
+ fetchTransformed64<BlendTransformedTiled>, // Alpha8
+ fetchTransformed64<BlendTransformedTiled>, // Grayscale8
+ },
+ {
+ 0, // Bilinear
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // Mono
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // MonoLsb
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // Indexed8
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGB32
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // ARGB32
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // ARGB32_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGB16
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // ARGB8565_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGB666
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // ARGB6666_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGB555
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // ARGB8555_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGB888
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGB444
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // ARGB4444_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGBX8888
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGBA8888
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGBA8888_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // BGR30
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // A2BGR30_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // RGB30
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // A2RGB30_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // Alpha8
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // Grayscale8
+ },
+ {
+ 0, // BilinearTiled
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // Mono
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // MonoLsb
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // Indexed8
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGB32
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // ARGB32
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // ARGB32_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGB16
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // ARGB8565_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGB666
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // ARGB6666_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGB555
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // ARGB8555_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGB888
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGB444
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // ARGB4444_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGBX8888
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGBA8888
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGBA8888_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // BGR30
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // A2BGR30_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // RGB30
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // A2RGB30_Premultiplied
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // Alpha8
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled>, // Grayscale8
+ },
+};
+
#define FIXPT_BITS 8
#define FIXPT_SIZE (1<<FIXPT_BITS)
@@ -4931,7 +5896,7 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in
break;
case QSpanData::Texture:
op.srcFetch = sourceFetch[getBlendType(data)][data->texture.format];
- op.srcFetch64 = 0; // sourceFetch64[getBlendType(data)][data->texture.format];
+ op.srcFetch64 = sourceFetch64[getBlendType(data)][data->texture.format];
solidSource = !data->texture.hasAlpha;
default:
break;
@@ -6826,6 +7791,16 @@ inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
map, mapWidth, mapHeight, mapStride);
}
+template<QtPixelOrder PixelOrder>
+inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
+ int x, int y, const QRgba64 &color,
+ const uchar *map,
+ int mapWidth, int mapHeight, int mapStride)
+{
+ qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
+ map, mapWidth, mapHeight, mapStride);
+}
+
inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
int x, int y, const QRgba64 &color,
const uchar *map,
@@ -7348,7 +8323,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] =
{
blend_color_generic_rgb64,
blend_src_generic_rgb64,
- 0,
+ qt_bitmapblit_rgb30<PixelOrderBGR>,
0,
0,
qt_rectfill_rgb30<PixelOrderBGR>
@@ -7357,7 +8332,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] =
{
blend_color_generic_rgb64,
blend_src_generic_rgb64,
- 0,
+ qt_bitmapblit_rgb30<PixelOrderBGR>,
0,
0,
qt_rectfill_rgb30<PixelOrderBGR>
@@ -7366,7 +8341,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] =
{
blend_color_generic_rgb64,
blend_src_generic_rgb64,
- 0,
+ qt_bitmapblit_rgb30<PixelOrderRGB>,
0,
0,
qt_rectfill_rgb30<PixelOrderRGB>
@@ -7375,7 +8350,7 @@ DrawHelper qDrawHelper[QImage::NImageFormats] =
{
blend_color_generic_rgb64,
blend_src_generic_rgb64,
- 0,
+ qt_bitmapblit_rgb30<PixelOrderRGB>,
0,
0,
qt_rectfill_rgb30<PixelOrderRGB>
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 501f1ce033..53f4db6fce 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -1139,6 +1139,8 @@ void QT_FASTCALL rasterop_solid_NotDestination(uint *dest, int length, uint colo
struct QPixelLayout;
typedef const uint *(QT_FASTCALL *ConvertFunc)(uint *buffer, const uint *src, int count,
const QPixelLayout *layout, const QRgb *clut);
+typedef const QRgba64 *(QT_FASTCALL *ConvertFunc64)(QRgba64 *buffer, const uint *src, int count,
+ const QPixelLayout *layout, const QRgb *clut);
struct QPixelLayout
{
@@ -1168,6 +1170,7 @@ struct QPixelLayout
ConvertFunc convertToARGB32PM;
ConvertFunc convertFromARGB32PM;
ConvertFunc convertFromRGB32;
+ ConvertFunc64 convertToARGB64PM;
};
typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count);
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index 0301c2a7de..c6cbe666ac 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -36,6 +36,7 @@
#include <QtGui/qrgba64.h>
#include <QtGui/private/qdrawhelper_p.h>
+#include <private/qsimd_p.h>
QT_BEGIN_NAMESPACE
@@ -52,20 +53,36 @@ inline QRgba64 multiplyAlpha256(QRgba64 rgba64, uint alpha256)
(rgba64.alpha() * alpha256) >> 8);
}
-inline QRgba64 multiplyAlpha255(QRgba64 rgba64, uint alpha255)
-{
- return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255),
- qt_div_255(rgba64.green() * alpha255),
- qt_div_255(rgba64.blue() * alpha255),
- qt_div_255(rgba64.alpha() * alpha255));
-}
-
inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
{
+#ifdef __SSE2__
+ const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0));
+ __m128i vs = _mm_loadl_epi64((__m128i*)&rgba64);
+ vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
+ vs = _mm_add_epi32(vs, _mm_srli_epi32(vs, 16));
+ vs = _mm_add_epi32(vs, _mm_set1_epi32(0x8000));
+ vs = _mm_srai_epi32(vs, 16);
+ vs = _mm_packs_epi32(vs, _mm_setzero_si128());
+ _mm_storel_epi64((__m128i*)&rgba64, vs);
+ return rgba64;
+#else
return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535),
qt_div_65535(rgba64.green() * alpha65535),
qt_div_65535(rgba64.blue() * alpha65535),
qt_div_65535(rgba64.alpha() * alpha65535));
+#endif
+}
+
+inline QRgba64 multiplyAlpha255(QRgba64 rgba64, uint alpha255)
+{
+#ifdef __SSE2__
+ return multiplyAlpha65535(rgba64, alpha255 * 257);
+#else
+ return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255),
+ qt_div_255(rgba64.green() * alpha255),
+ qt_div_255(rgba64.blue() * alpha255),
+ qt_div_255(rgba64.alpha() * alpha255));
+#endif
}
inline QRgba64 interpolate256(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)