summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2016-12-07 14:37:12 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2018-08-11 14:12:48 +0000
commit9c9f98f2ff255da3e9a3ccdb81a82c53a105c8f9 (patch)
tree2f352bdf94b81d24271b7ce9e2bab3174ad0dff7 /src/gui/painting
parent1c623bc6d1c0a7ca52d81ca72c64f36898b3e12c (diff)
Implement support for 16bpc image formats
Adds support for 16bit per color image formats in QImage. This makes it possible to read and write 16bpc PNGs, and take full advantage of the 16bpc paint engine. [ChangeLog][QtGui][QImage] QImage now supports 64bit image formats with 16 bits per color channel, compatible with 16bpc PNG or RGBA16 OpenGL formats. Task-number: QTBUG-45858 Change-Id: Icd28bd5868a6efcf65cb5bd56031d42941e04099 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/qdrawhelper.cpp1235
-rw-r--r--src/gui/painting/qdrawhelper_p.h19
-rw-r--r--src/gui/painting/qdrawhelper_sse4.cpp14
-rw-r--r--src/gui/painting/qmemrotate.cpp33
-rw-r--r--src/gui/painting/qmemrotate_p.h1
5 files changed, 967 insertions, 335 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 1110f951a9..b1424b5b0a 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -267,18 +267,19 @@ inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int in
return reinterpret_cast<const uint *>(src)[index];
}
-template <QPixelLayout::BPP bpp>
-inline const uint *QT_FASTCALL fetchPixels(uint *buffer, const uchar *src, int index, int count)
+template <>
+inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar *src, int index)
{
- for (int i = 0; i < count; ++i)
- buffer[i] = fetchPixel<bpp>(src, index + i);
- return buffer;
+ // We have to do the conversion in fetch to fit into a 32bit uint
+ QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
+ return c.toArgb32();
}
-template <>
-inline const uint *QT_FASTCALL fetchPixels<QPixelLayout::BPP32>(uint *, const uchar *src, int index, int)
+template <QPixelLayout::BPP bpp>
+static quint64 QT_FASTCALL fetchPixel64(const uchar *src, int index)
{
- return reinterpret_cast<const uint *>(src) + index;
+ Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64);
+ return fetchPixel<bpp>(src, index);
}
template <QPixelLayout::BPP width> static
@@ -296,16 +297,6 @@ inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index,
reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel);
}
-static FetchPixelsFunc qFetchPixels[QPixelLayout::BPPCount] = {
- 0, // BPPNone
- fetchPixels<QPixelLayout::BPP1MSB>, // BPP1MSB
- fetchPixels<QPixelLayout::BPP1LSB>, // BPP1LSB
- fetchPixels<QPixelLayout::BPP8>, // BPP8
- fetchPixels<QPixelLayout::BPP16>, // BPP16
- fetchPixels<QPixelLayout::BPP24>, // BPP24
- fetchPixels<QPixelLayout::BPP32> // BPP32
-};
-
typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index);
static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = {
@@ -315,7 +306,8 @@ static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = {
fetchPixel<QPixelLayout::BPP8>, // BPP8
fetchPixel<QPixelLayout::BPP16>, // BPP16
fetchPixel<QPixelLayout::BPP24>, // BPP24
- fetchPixel<QPixelLayout::BPP32> // BPP32
+ fetchPixel<QPixelLayout::BPP32>, // BPP32
+ fetchPixel<QPixelLayout::BPP64> // BPP64
};
template<QImage::Format Format>
@@ -350,16 +342,20 @@ static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QR
buffer[i] = convertPixelToRGB32<Format>(buffer[i]);
}
+#if defined(__SSE2__) && !defined(__SSSE3__)
+extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count);
+#endif
+
template<QImage::Format Format>
static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
#if defined(__SSE2__) && !defined(__SSSE3__)
- if (BPP == QPixelLayout::BPP24) {
+ if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
// With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
// to vectorize the deforested version below.
- qFetchPixels[BPP](buffer, src, index, count);
+ fetchPixelsBPP24_ssse3(buffer, src, index, count);
convertToRGB32<Format>(buffer, count, nullptr);
return buffer;
}
@@ -370,12 +366,26 @@ static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, i
}
template<QImage::Format Format>
+static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s)
+{
+ return QRgba64::fromArgb32(convertPixelToRGB32<Format>(s));
+}
+
+template<QImage::Format Format>
static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
for (int i = 0; i < count; ++i)
- buffer[i] = QRgba64::fromArgb32(convertPixelToRGB32<Format>(src[i]));
+ buffer[i] = convertPixelToRGB64<Format>(src[i]);
+ return buffer;
+}
+template<QImage::Format Format>
+static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i));
return buffer;
}
@@ -433,10 +443,10 @@ static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *
{
constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
#if defined(__SSE2__) && !defined(__SSSE3__)
- if (BPP == QPixelLayout::BPP24) {
+ if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
// With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
// to vectorize the deforested version below.
- qFetchPixels[BPP](buffer, src, index, count);
+ fetchPixelsBPP24_ssse3(buffer, src, index, count);
convertARGBPMToARGB32PM<Format>(buffer, count, nullptr);
return buffer;
}
@@ -447,12 +457,27 @@ static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *
}
template<QImage::Format Format>
-static const QRgba64 *QT_FASTCALL convertARGBPMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s)
+{
+ return QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(s));
+}
+
+template<QImage::Format Format>
+static const QRgba64 *QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
for (int i = 0; i < count; ++i)
- buffer[i] = QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(src[i]));
+ buffer[i] = convertPixelToRGB64<Format>(src[i]);
+ return buffer;
+}
+template<QImage::Format Format>
+static const QRgba64 *QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
+ for (int i = 0; i < count; ++i)
+ buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i));
return buffer;
}
@@ -635,6 +660,7 @@ template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixe
convertToRGB32<Format>,
convertToRGB64<Format>,
fetchRGBToRGB32<Format>,
+ fetchRGBToRGB64<Format>,
storeRGBFromARGB32PM<Format, false>,
storeRGBFromARGB32PM<Format, true>
};
@@ -648,8 +674,9 @@ template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixe
bitsPerPixel<Format>(),
rbSwap<Format>,
convertARGBPMToARGB32PM<Format>,
- convertARGBPMToARGB64PM<Format>,
+ convertARGBPMToRGBA64PM<Format>,
fetchARGBPMToARGB32PM<Format>,
+ fetchARGBPMToRGBA64PM<Format>,
storeARGBPMFromARGB32PM<Format, false>,
storeARGBPMFromARGB32PM<Format, true>
};
@@ -672,7 +699,18 @@ static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar
return buffer;
}
-static const QRgba64 *QT_FASTCALL convertIndexedToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+template<QPixelLayout::BPP BPP>
+static const QRgba64 *QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *clut, QDitherInfo *)
+{
+ for (int i = 0; i < count; ++i) {
+ const uint s = fetchPixel<BPP>(src, index + i);
+ buffer[i] = QRgba64::fromArgb32(clut->at(s)).premultiplied();
+ }
+ return buffer;
+}
+
+static const QRgba64 *QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *clut, QDitherInfo *)
{
for (int i = 0; i < count; ++i)
@@ -690,6 +728,12 @@ static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int in
return reinterpret_cast<const uint *>(src) + index;
}
+static const QRgba64 *QT_FASTCALL fetchPassThrough64(QRgba64 *, const uchar *src, int index, int,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ return reinterpret_cast<const QRgba64 *>(src) + index;
+}
+
static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
@@ -755,6 +799,13 @@ static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const ui
buffer[i] = QRgba64::fromRgba(0, 0, 0, src[i]);
return buffer;
}
+static const QRgba64 *QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromRgba(0, 0, 0, src[index + i]);
+ return buffer;
+}
static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
{
@@ -782,6 +833,16 @@ static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, cons
return buffer;
}
+static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ for (int i = 0; i < count; ++i) {
+ const uint s = src[index + i];
+ buffer[i] = QRgba64::fromRgba(s, s, s, 255);
+ }
+ return buffer;
+}
+
static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
@@ -798,7 +859,7 @@ static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src
#ifdef __SSE2__
template<bool RGBA, bool maskAlpha>
-static inline void qConvertARGB32PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count)
+static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
{
if (count <= 0)
return;
@@ -841,6 +902,66 @@ static inline void qConvertARGB32PMToARGB64PM_sse2(QRgba64 *buffer, const uint *
*buffer++ = QRgba64::fromArgb32(s);
}
}
+
+template<QtPixelOrder PixelOrder>
+static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count)
+{
+ const __m128i gmask = _mm_set1_epi32(0x000ffc00);
+ const __m128i cmask = _mm_set1_epi32(0x000003ff);
+ int i = 0;
+ __m128i vr, vg, vb, va;
+ for (; i < count && uintptr_t(buffer) & 0xF; ++i) {
+ *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
+ }
+
+ for (; i < count-15; i += 16) {
+ // Repremultiplying is really expensive and hard to do in SIMD without AVX2,
+ // so we try to avoid it by checking if it is needed 16 samples at a time.
+ __m128i vOr = _mm_set1_epi32(0);
+ __m128i vAnd = _mm_set1_epi32(0xffffffff);
+ for (int j = 0; j < 16; j += 2) {
+ __m128i vs = _mm_load_si128((const __m128i*)(buffer + j));
+ vOr = _mm_or_si128(vOr, vs);
+ vAnd = _mm_and_si128(vAnd, vs);
+ }
+ const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7));
+ const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7));
+
+ if (andAlpha == 0xffff) {
+ for (int j = 0; j < 16; j += 2) {
+ __m128i vs = _mm_load_si128((const __m128i*)buffer);
+ buffer += 2;
+ vr = _mm_srli_epi64(vs, 6);
+ vg = _mm_srli_epi64(vs, 16 + 6 - 10);
+ vb = _mm_srli_epi64(vs, 32 + 6);
+ vr = _mm_and_si128(vr, cmask);
+ vg = _mm_and_si128(vg, gmask);
+ vb = _mm_and_si128(vb, cmask);
+ va = _mm_srli_epi64(vs, 48 + 14);
+ if (PixelOrder == PixelOrderRGB)
+ vr = _mm_slli_epi32(vr, 20);
+ else
+ vb = _mm_slli_epi32(vb, 20);
+ va = _mm_slli_epi32(va, 30);
+ __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
+ vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
+ _mm_storel_epi64((__m128i*)dest, vd);
+ dest += 2;
+ }
+ } else if (orAlpha == 0) {
+ for (int j = 0; j < 16; ++j) {
+ *dest++ = 0;
+ buffer++;
+ }
+ } else {
+ for (int j = 0; j < 16; ++j)
+ *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
+ }
+ }
+
+ SIMD_EPILOGUE(i, count, 15)
+ *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
+}
#elif defined(__ARM_NEON__)
template<bool RGBA, bool maskAlpha>
static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count)
@@ -894,7 +1015,7 @@ static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uin
const QVector<QRgb> *, QDitherInfo *)
{
#ifdef __SSE2__
- qConvertARGB32PMToARGB64PM_sse2<false, true>(buffer, src, count);
+ qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count);
#elif defined(__ARM_NEON__)
qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count);
#else
@@ -904,11 +1025,17 @@ static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uin
return buffer;
}
-static const QRgba64 *QT_FASTCALL convertARGB32ToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ return convertRGB32ToRGB64(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
+}
+
+static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
#ifdef __SSE2__
- qConvertARGB32PMToARGB64PM_sse2<false, false>(buffer, src, count);
+ qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count);
for (int i = 0; i < count; ++i)
buffer[i] = buffer[i].premultiplied();
#elif defined(__ARM_NEON__)
@@ -922,11 +1049,17 @@ static const QRgba64 *QT_FASTCALL convertARGB32ToARGB64PM(QRgba64 *buffer, const
return buffer;
}
-static const QRgba64 *QT_FASTCALL convertARGB32PMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+static const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ return convertARGB32ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
+}
+
+static const QRgba64 *QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
#ifdef __SSE2__
- qConvertARGB32PMToARGB64PM_sse2<false, false>(buffer, src, count);
+ qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count);
#elif defined(__ARM_NEON__)
qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count);
#else
@@ -936,11 +1069,36 @@ static const QRgba64 *QT_FASTCALL convertARGB32PMToARGB64PM(QRgba64 *buffer, con
return buffer;
}
-static const QRgba64 *QT_FASTCALL convertRGBA8888ToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+static const QRgba64 *QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ return convertARGB32PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
+}
+
+static void convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count)
+{
+ for (int i = 0; i < count; ++i)
+ buffer[i] = buffer[i].premultiplied();
+}
+
+static void convertRGBA64PMToRGBA64PM(QRgba64 *, int)
+{
+}
+
+static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
+ for (int i = 0; i < count; ++i)
+ buffer[i] = QRgba64::fromRgba64(s[i]).premultiplied();
+ return buffer;
+}
+
+static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
#ifdef __SSE2__
- qConvertARGB32PMToARGB64PM_sse2<true, false>(buffer, src, count);
+ qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count);
for (int i = 0; i < count; ++i)
buffer[i] = buffer[i].premultiplied();
#elif defined(__ARM_NEON__)
@@ -954,11 +1112,17 @@ static const QRgba64 *QT_FASTCALL convertRGBA8888ToARGB64PM(QRgba64 *buffer, con
return buffer;
}
-static const QRgba64 *QT_FASTCALL convertRGBA8888PMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+static const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ return convertRGBA8888ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
+}
+
+static const QRgba64 *QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
#ifdef __SSE2__
- qConvertARGB32PMToARGB64PM_sse2<true, false>(buffer, src, count);
+ qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count);
#elif defined(__ARM_NEON__)
qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count);
#else
@@ -968,6 +1132,12 @@ static const QRgba64 *QT_FASTCALL convertRGBA8888PMToARGB64PM(QRgba64 *buffer, c
return buffer;
}
+static const QRgba64 *QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ return convertRGBA8888PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
+}
+
static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
@@ -1025,7 +1195,7 @@ static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const ucha
#ifdef __SSE2__
template<QtPixelOrder PixelOrder>
-static inline void qConvertA2RGB30PMToARGB64PM_sse2(QRgba64 *buffer, const uint *src, int count)
+static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
{
if (count <= 0)
return;
@@ -1068,11 +1238,11 @@ static inline void qConvertA2RGB30PMToARGB64PM_sse2(QRgba64 *buffer, const uint
#endif
template<QtPixelOrder PixelOrder>
-static const QRgba64 *QT_FASTCALL convertA2RGB30PMToARGB64PM(QRgba64 *buffer, const uint *src, int count,
+static const QRgba64 *QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
const QVector<QRgb> *, QDitherInfo *)
{
#ifdef __SSE2__
- qConvertA2RGB30PMToARGB64PM_sse2<PixelOrder>(buffer, src, count);
+ qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count);
#else
for (int i = 0; i < count; ++i)
buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
@@ -1081,6 +1251,13 @@ static const QRgba64 *QT_FASTCALL convertA2RGB30PMToARGB64PM(QRgba64 *buffer, co
}
template<QtPixelOrder PixelOrder>
+static const QRgba64 *QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
+}
+
+template<QtPixelOrder PixelOrder>
static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
@@ -1104,6 +1281,52 @@ static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int
UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
}
+template<bool RGBA>
+void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count)
+{
+ int i = 0;
+#ifdef __SSE2__
+ if (((uintptr_t)dst & 0x7) && count > 0) {
+ uint s = (*src++).toArgb32();
+ if (RGBA)
+ s = ARGB2RGBA(s);
+ *dst++ = s;
+ i++;
+ }
+ const __m128i vhalf = _mm_set1_epi32(0x80);
+ const __m128i vzero = _mm_setzero_si128();
+ for (; i < count-1; i += 2) {
+ __m128i vs = _mm_loadu_si128((const __m128i*)src);
+ src += 2;
+ if (!RGBA) {
+ vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
+ vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
+ }
+ __m128i v1 = _mm_unpacklo_epi16(vs, vzero);
+ __m128i v2 = _mm_unpackhi_epi16(vs, vzero);
+ v1 = _mm_add_epi32(v1, vhalf);
+ v2 = _mm_add_epi32(v2, vhalf);
+ v1 = _mm_sub_epi32(v1, _mm_srli_epi32(v1, 8));
+ v2 = _mm_sub_epi32(v2, _mm_srli_epi32(v2, 8));
+ v1 = _mm_srli_epi32(v1, 8);
+ v2 = _mm_srli_epi32(v2, 8);
+ v1 = _mm_packs_epi32(v1, v2);
+ v1 = _mm_packus_epi16(v1, vzero);
+ _mm_storel_epi64((__m128i*)(dst), v1);
+ dst += 2;
+ }
+#endif
+ for (; i < count; i++) {
+ uint s = (*src++).toArgb32();
+ if (RGBA)
+ s = ARGB2RGBA(s);
+ *dst++ = s;
+ }
+}
+template void qt_convertRGBA64ToARGB32<false>(uint *dst, const QRgba64 *src, int count);
+template void qt_convertRGBA64ToARGB32<true>(uint *dst, const QRgba64 *src, int count);
+
+
static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *)
{
@@ -1125,27 +1348,66 @@ static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src
dest[index + i] = qGray(qUnpremultiply(src[i]));
}
+static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
+ for (int i = 0; i < count; ++i)
+ buffer[i] = toArgb32(s[i]);
+ return buffer;
+}
+
+static void QT_FASTCALL storeRGB64FromRGB32(uchar *dest, const uint *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
+ for (int i = 0; i < count; ++i)
+ d[i] = QRgba64::fromArgb32(src[i]);
+}
+
+static const uint *QT_FASTCALL fetchRGBA64ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
+ for (int i = 0; i < count; ++i)
+ buffer[i] = toArgb32(s[i].premultiplied());
+ return buffer;
+}
+
+static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar *dest, const uint *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
+ for (int i = 0; i < count; ++i)
+ d[i] = QRgba64::fromArgb32(src[i]).unpremultiplied();
+}
// Note:
// convertToArgb32() assumes that no color channel is less than 4 bits.
// storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits.
// QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits.
QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
- { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid
- { false, false, QPixelLayout::BPP1MSB, nullptr, convertIndexedToARGB32PM,
- convertIndexedToARGB64PM, fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, nullptr, nullptr }, // Format_Mono
- { false, false, QPixelLayout::BPP1LSB, nullptr, convertIndexedToARGB32PM,
- convertIndexedToARGB64PM, fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, nullptr, nullptr }, // Format_MonoLSB
- { false, false, QPixelLayout::BPP8, nullptr, convertIndexedToARGB32PM,
- convertIndexedToARGB64PM, fetchIndexedToARGB32PM<QPixelLayout::BPP8>, nullptr, nullptr }, // Format_Indexed8
+ { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid
+ { false, false, QPixelLayout::BPP1MSB, nullptr,
+ convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
+ fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>,
+ nullptr, nullptr }, // Format_Mono
+ { false, false, QPixelLayout::BPP1LSB, nullptr,
+ convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
+ fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>,
+ nullptr, nullptr }, // Format_MonoLSB
+ { false, false, QPixelLayout::BPP8, nullptr,
+ convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
+ fetchIndexedToARGB32PM<QPixelLayout::BPP8>, fetchIndexedToRGBA64PM<QPixelLayout::BPP8>,
+ nullptr, nullptr }, // Format_Indexed8
// Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong,
// but everywhere this generic conversion would be wrong is currently overloaded.
{ false, false, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
- convertRGB32ToRGB64, fetchPassThrough, storePassThrough, storePassThrough }, // Format_RGB32
+ convertRGB32ToRGB64, fetchPassThrough, fetchRGB32ToRGB64, storePassThrough, storePassThrough }, // Format_RGB32
{ true, false, QPixelLayout::BPP32, rbSwap_rgb32, convertARGB32ToARGB32PM,
- convertARGB32ToARGB64PM, fetchARGB32ToARGB32PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32
+ convertARGB32ToRGBA64PM, fetchARGB32ToARGB32PM, fetchARGB32ToRGBA64PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32
{ true, true, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
- convertARGB32PMToARGB64PM, fetchPassThrough, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied
+ convertARGB32PMToRGBA64PM, fetchPassThrough, fetchARGB32PMToRGBA64PM, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied
pixelLayoutRGB<QImage::Format_RGB16>(),
pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(),
pixelLayoutRGB<QImage::Format_RGB666>(),
@@ -1156,43 +1418,167 @@ QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
pixelLayoutRGB<QImage::Format_RGB444>(),
pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(),
{ false, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
- convertRGBA8888PMToARGB64PM, fetchRGBA8888PMToARGB32PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888
+ convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888
{ true, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888ToARGB32PM,
- convertRGBA8888ToARGB64PM, fetchRGBA8888ToARGB32PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888
+ convertRGBA8888ToRGBA64PM, fetchRGBA8888ToARGB32PM, fetchRGBA8888ToRGBA64PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888
{ true, true, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
- convertRGBA8888PMToARGB64PM, fetchRGBA8888PMToARGB32PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied
+ convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied
{ false, false, QPixelLayout::BPP32, rbSwap_rgb30,
convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
- convertA2RGB30PMToARGB64PM<PixelOrderBGR>,
+ convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
+ fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
storeRGB30FromARGB32PM<PixelOrderBGR>,
storeRGB30FromRGB32<PixelOrderBGR>
}, // Format_BGR30
{ true, true, QPixelLayout::BPP32, rbSwap_rgb30,
convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
- convertA2RGB30PMToARGB64PM<PixelOrderBGR>,
+ convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
+ fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
storeA2RGB30PMFromARGB32PM<PixelOrderBGR>,
storeRGB30FromRGB32<PixelOrderBGR>
}, // Format_A2BGR30_Premultiplied
{ false, false, QPixelLayout::BPP32, rbSwap_rgb30,
convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
- convertA2RGB30PMToARGB64PM<PixelOrderRGB>,
+ convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
+ fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
storeRGB30FromARGB32PM<PixelOrderRGB>,
storeRGB30FromRGB32<PixelOrderRGB>
}, // Format_RGB30
{ true, true, QPixelLayout::BPP32, rbSwap_rgb30,
convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
- convertA2RGB30PMToARGB64PM<PixelOrderRGB>,
+ convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
+ fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
storeA2RGB30PMFromARGB32PM<PixelOrderRGB>,
storeRGB30FromRGB32<PixelOrderRGB>
}, // Format_A2RGB30_Premultiplied
- { true, true, QPixelLayout::BPP8, nullptr, convertAlpha8ToRGB32,
- convertAlpha8ToRGB64, fetchAlpha8ToRGB32, storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8
- { false, false, QPixelLayout::BPP8, nullptr, convertGrayscale8ToRGB32,
- convertGrayscale8ToRGB64, fetchGrayscale8ToRGB32, storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 } // Format_Grayscale8
+ { true, true, QPixelLayout::BPP8, nullptr,
+ convertAlpha8ToRGB32, convertAlpha8ToRGB64,
+ fetchAlpha8ToRGB32, fetchAlpha8ToRGB64,
+ storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8
+ { false, false, QPixelLayout::BPP8, nullptr,
+ convertGrayscale8ToRGB32, convertGrayscale8ToRGB64,
+ fetchGrayscale8ToRGB32, fetchGrayscale8ToRGB64,
+ storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 }, // Format_Grayscale8
+ { false, false, QPixelLayout::BPP64, nullptr,
+ convertPassThrough, nullptr,
+ fetchRGB64ToRGB32, fetchPassThrough64,
+ storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBX64
+ { true, false, QPixelLayout::BPP64, nullptr,
+ convertARGB32ToARGB32PM, nullptr,
+ fetchRGBA64ToARGB32PM, fetchRGBA64ToRGBA64PM,
+ storeRGBA64FromARGB32PM, storeRGB64FromRGB32 }, // Format_RGBA64
+ { true, true, QPixelLayout::BPP64, nullptr,
+ convertPassThrough, nullptr,
+ fetchRGB64ToRGB32, fetchPassThrough64,
+ storeRGB64FromRGB32, storeRGB64FromRGB32 } // Format_RGBA64_Premultiplied
+};
+
+Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats);
+
+static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length)
+{
+ for (int i = 0; i < length; ++i) {
+ dest[i] = toArgb32(src[i]);
+ }
+}
+
+template<QImage::Format format>
+static void QT_FASTCALL storeGenericFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *clut, QDitherInfo *dither)
+{
+ uint buffer[BufferSize];
+ convertFromRgb64(buffer, src, count);
+ qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither);
+}
+
+static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ uint *d = (uint*)dest + index;
+ for (int i = 0; i < count; ++i)
+ d[i] = toArgb32(src[i].unpremultiplied());
+}
+
+static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ uint *d = (uint*)dest + index;
+ for (int i = 0; i < count; ++i)
+ d[i] = toRgba8888(src[i].unpremultiplied());
+}
+
+template<QtPixelOrder PixelOrder>
+static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ uint *d = (uint*)dest + index;
+#ifdef __SSE2__
+ qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count);
+#else
+ for (int i = 0; i < count; ++i)
+ d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]);
+#endif
+}
+
+static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
+ for (int i = 0; i < count; ++i) {
+ d[i] = src[i].unpremultiplied();
+ d[i].setAlpha(65535);
+ }
+}
+
+static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
+ for (int i = 0; i < count; ++i)
+ d[i] = src[i].unpremultiplied();
+}
+
+static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
+ if (d != src)
+ memcpy(d, src, count * sizeof(QRgba64));
+}
+
+ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = {
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ storeGenericFromRGBA64PM<QImage::Format_RGB32>,
+ storeARGB32FromRGBA64PM,
+ storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>,
+ storeGenericFromRGBA64PM<QImage::Format_RGB16>,
+ storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>,
+ storeGenericFromRGBA64PM<QImage::Format_RGB666>,
+ storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>,
+ storeGenericFromRGBA64PM<QImage::Format_RGB555>,
+ storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>,
+ storeGenericFromRGBA64PM<QImage::Format_RGB888>,
+ storeGenericFromRGBA64PM<QImage::Format_RGB444>,
+ storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>,
+ storeGenericFromRGBA64PM<QImage::Format_RGBX8888>,
+ storeRGBA8888FromRGBA64PM,
+ storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>,
+ storeRGB30FromRGBA64PM<PixelOrderBGR>,
+ storeRGB30FromRGBA64PM<PixelOrderBGR>,
+ storeRGB30FromRGBA64PM<PixelOrderRGB>,
+ storeRGB30FromRGBA64PM<PixelOrderRGB>,
+ storeGenericFromRGBA64PM<QImage::Format_Alpha8>,
+ storeGenericFromRGBA64PM<QImage::Format_Grayscale8>,
+ storeRGBX64FromRGBA64PM,
+ storeRGBA64FromRGBA64PM,
+ storeRGBA64PMFromRGBA64PM
};
/*
@@ -1245,19 +1631,20 @@ static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, in
return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
}
+static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int)
+{
+ return buffer;
+}
+
static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
{
const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
- uint buffer32[BufferSize];
- const uint *ptr = qFetchPixels[layout->bpp](buffer32, rasterBuffer->scanLine(y), x, length);
- return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, ptr, length, 0, 0));
+ return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
}
-static QRgba64 *QT_FASTCALL destFetch64uint32(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
+static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int)
{
- const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
- const uint *src = ((const uint *)rasterBuffer->scanLine(y)) + x;
- return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, src, length, 0, 0));
+ return (QRgba64 *)rasterBuffer->scanLine(y) + x;
}
static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int)
@@ -1292,6 +1679,9 @@ static DestFetchProc destFetchProc[QImage::NImageFormats] =
destFetch, // Format_A2RGB30_Premultiplied
destFetch, // Format_Alpha8
destFetch, // Format_Grayscale8
+ destFetch, // Format_RGBX64
+ destFetch, // Format_RGBA64
+ destFetch, // Format_RGBA64_Premultiplied
};
static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
@@ -1300,9 +1690,9 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
0, // Format_Mono,
0, // Format_MonoLSB
0, // Format_Indexed8
- destFetch64uint32, // Format_RGB32
- destFetch64uint32, // Format_ARGB32,
- destFetch64uint32, // Format_ARGB32_Premultiplied
+ destFetch64, // Format_RGB32
+ destFetch64, // Format_ARGB32,
+ destFetch64, // Format_ARGB32_Premultiplied
destFetch64, // Format_RGB16
destFetch64, // Format_ARGB8565_Premultiplied
destFetch64, // Format_RGB666
@@ -1312,15 +1702,18 @@ static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
destFetch64, // Format_RGB888
destFetch64, // Format_RGB444
destFetch64, // Format_ARGB4444_Premultiplied
- destFetch64uint32, // Format_RGBX8888
- destFetch64uint32, // Format_RGBA8888
- destFetch64uint32, // Format_RGBA8888_Premultiplied
- destFetch64uint32, // Format_BGR30
- destFetch64uint32, // Format_A2BGR30_Premultiplied
- destFetch64uint32, // Format_RGB30
- destFetch64uint32, // Format_A2RGB30_Premultiplied
+ destFetch64, // Format_RGBX8888
+ destFetch64, // Format_RGBA8888
+ destFetch64, // Format_RGBA8888_Premultiplied
+ destFetch64, // Format_BGR30
+ destFetch64, // Format_A2BGR30_Premultiplied
+ destFetch64, // Format_RGB30
+ destFetch64, // Format_A2RGB30_Premultiplied
destFetch64, // Format_Alpha8
destFetch64, // Format_Grayscale8
+ destFetchRGB64, // Format_RGBX64
+ destFetch64, // Format_RGBA64
+ destFetchRGB64, // Format_RGBA64_Premultiplied
};
/*
@@ -1429,120 +1822,19 @@ static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, con
store(dest, buffer, x, length, nullptr, nullptr);
}
-static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length)
-{
- for (int i = 0; i < length; ++i) {
- dest[i] = toArgb32(src[i]);
- }
-}
-
static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
{
- uint buf[BufferSize];
- const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
- ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
- if (!layout->premultiplied && !layout->hasAlphaChannel)
- store = layout->storeFromRGB32;
+ auto store = qStoreFromRGBA64PM[rasterBuffer->format];
uchar *dest = rasterBuffer->scanLine(y);
- while (length) {
- int l = qMin(length, BufferSize);
- convertFromRgb64(buf, buffer, l);
- store(dest, buf, x, l, nullptr, nullptr);
- length -= l;
- buffer += l;
- x += l;
- }
-}
-
-#ifdef __SSE2__
-template<QtPixelOrder PixelOrder>
-static inline void qConvertARGB64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count)
-{
- const __m128i gmask = _mm_set1_epi32(0x000ffc00);
- const __m128i cmask = _mm_set1_epi32(0x000003ff);
- int i = 0;
- __m128i vr, vg, vb, va;
- for (; i < count && uintptr_t(buffer) & 0xF; ++i) {
- *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
- }
-
- for (; i < count-15; i += 16) {
- // Repremultiplying is really expensive and hard to do in SIMD without AVX2,
- // so we try to avoid it by checking if it is needed 16 samples at a time.
- __m128i vOr = _mm_set1_epi32(0);
- __m128i vAnd = _mm_set1_epi32(0xffffffff);
- for (int j = 0; j < 16; j += 2) {
- __m128i vs = _mm_load_si128((const __m128i*)(buffer + j));
- vOr = _mm_or_si128(vOr, vs);
- vAnd = _mm_and_si128(vAnd, vs);
- }
- const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7));
- const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7));
-
- if (andAlpha == 0xffff) {
- for (int j = 0; j < 16; j += 2) {
- __m128i vs = _mm_load_si128((const __m128i*)buffer);
- buffer += 2;
- vr = _mm_srli_epi64(vs, 6);
- vg = _mm_srli_epi64(vs, 16 + 6 - 10);
- vb = _mm_srli_epi64(vs, 32 + 6);
- vr = _mm_and_si128(vr, cmask);
- vg = _mm_and_si128(vg, gmask);
- vb = _mm_and_si128(vb, cmask);
- va = _mm_srli_epi64(vs, 48 + 14);
- if (PixelOrder == PixelOrderRGB)
- vr = _mm_slli_epi32(vr, 20);
- else
- vb = _mm_slli_epi32(vb, 20);
- va = _mm_slli_epi32(va, 30);
- __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
- vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
- _mm_storel_epi64((__m128i*)dest, vd);
- dest += 2;
- }
- } else if (orAlpha == 0) {
- for (int j = 0; j < 16; ++j) {
- *dest++ = 0;
- buffer++;
- }
- } else {
- for (int j = 0; j < 16; ++j)
- *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
- }
- }
-
- SIMD_EPILOGUE(i, count, 15)
- *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
-}
-#endif
-
-static void QT_FASTCALL destStore64ARGB32(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
-{
- uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
- for (int i = 0; i < length; ++i) {
- dest[i] = toArgb32(buffer[i].unpremultiplied());
- }
-}
-
-static void QT_FASTCALL destStore64RGBA8888(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
-{
- uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
- for (int i = 0; i < length; ++i) {
- dest[i] = toRgba8888(buffer[i].unpremultiplied());
- }
+ store(dest, buffer, x, length, nullptr, nullptr);
}
-template<QtPixelOrder PixelOrder>
-static void QT_FASTCALL destStore64RGB30(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
+static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
{
- uint *dest = (uint*)rasterBuffer->scanLine(y) + x;
-#ifdef __SSE2__
- qConvertARGB64PMToA2RGB30PM_sse2<PixelOrder>(dest, buffer, length);
-#else
+ QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x;
for (int i = 0; i < length; ++i) {
- dest[i] = qConvertRgb64ToRgb30<PixelOrder>(buffer[i]);
+ dest[i] = buffer[i].unpremultiplied();
}
-#endif
}
static DestStoreProc destStoreProc[QImage::NImageFormats] =
@@ -1572,16 +1864,19 @@ static DestStoreProc destStoreProc[QImage::NImageFormats] =
destStore, // Format_A2RGB30_Premultiplied
destStore, // Format_Alpha8
destStore, // Format_Grayscale8
+ destStore, // Format_RGBX64
+ destStore, // Format_RGBA64
+ destStore, // Format_RGBA64_Premultiplied
};
static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
{
0, // Format_Invalid
- destStore64, // Format_Mono,
- destStore64, // Format_MonoLSB
+ 0, // Format_Mono,
+ 0, // Format_MonoLSB
0, // Format_Indexed8
destStore64, // Format_RGB32
- destStore64ARGB32, // Format_ARGB32,
+ destStore64, // Format_ARGB32,
destStore64, // Format_ARGB32_Premultiplied
destStore64, // Format_RGB16
destStore64, // Format_ARGB8565_Premultiplied
@@ -1593,14 +1888,17 @@ static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
destStore64, // Format_RGB444
destStore64, // Format_ARGB4444_Premultiplied
destStore64, // Format_RGBX8888
- destStore64RGBA8888, // Format_RGBA8888
+ destStore64, // Format_RGBA8888
destStore64, // Format_RGBA8888_Premultiplied
- destStore64RGB30<PixelOrderBGR>, // Format_BGR30
- destStore64RGB30<PixelOrderBGR>, // Format_A2BGR30_Premultiplied
- destStore64RGB30<PixelOrderRGB>, // Format_RGB30
- destStore64RGB30<PixelOrderRGB>, // Format_A2RGB30_Premultiplied
+ destStore64, // Format_BGR30
+ destStore64, // Format_A2BGR30_Premultiplied
+ destStore64, // Format_RGB30
+ destStore64, // Format_A2RGB30_Premultiplied
destStore64, // Format_Alpha8
destStore64, // Format_Grayscale8
+ 0, // Format_RGBX64
+ destStore64RGBA64, // Format_RGBA64
+ 0 // Format_RGBA64_Premultiplied
};
/*
@@ -1639,7 +1937,7 @@ static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator
const QSpanData *data, int y, int x, int)
{
const uchar *scanLine = data->texture.scanLine(y);
- return ((const uint *)scanLine) + x;
+ return reinterpret_cast<const uint *>(scanLine) + x;
}
static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *,
@@ -1656,14 +1954,14 @@ static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Op
const QSpanData *data, int y, int x, int length)
{
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
- if (layout->bpp != QPixelLayout::BPP32) {
- uint buffer32[BufferSize];
- const uint *ptr = qFetchPixels[layout->bpp](buffer32, data->texture.scanLine(y), x, length);
- return layout->convertToARGB64PM(buffer, ptr, length, data->texture.colorTable, 0);
- } else {
- const uint *src = (const uint *)data->texture.scanLine(y) + x;
- return layout->convertToARGB64PM(buffer, src, length, data->texture.colorTable, 0);
- }
+ return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
+}
+
+static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *,
+ const QSpanData *data, int y, int x, int)
+{
+ const uchar *scanLine = data->texture.scanLine(y);
+ return reinterpret_cast<const QRgba64 *>(scanLine) + x;
}
template<TextureBlendType blendType>
@@ -1680,8 +1978,8 @@ inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
}
}
-template<TextureBlendType blendType, QPixelLayout::BPP bpp>
-static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *data,
+template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
+static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data,
int y, int x, int length)
{
Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
@@ -1690,8 +1988,9 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
const qreal cx = x + qreal(0.5);
const qreal cy = y + qreal(0.5);
+ constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
- if (bpp != QPixelLayout::BPPNone) // Like this to not ICE on GCC 5.3.1
+ if (!useFetch)
Q_ASSERT(layout->bpp == bpp);
// When templated 'fetch' should be inlined at compile time:
const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
@@ -1725,13 +2024,19 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
if (x1 == x2)
break;
- buffer[i] = fetch(src, x1);
+ if (useFetch)
+ buffer[i] = fetch(src, x1);
+ else
+ buffer[i] = reinterpret_cast<const T*>(src)[x1];
fx += fdx;
}
for (; i < fastLen; ++i) {
int px = (fx >> 16);
- buffer[i] = fetch(src, px);
+ if (useFetch)
+ buffer[i] = fetch(src, px);
+ else
+ buffer[i] = reinterpret_cast<const T*>(src)[px];
fx += fdx;
}
}
@@ -1739,7 +2044,10 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
for (; i < length; ++i) {
int px = (fx >> 16);
fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
- buffer[i] = fetch(src, px);
+ if (useFetch)
+ buffer[i] = fetch(src, px);
+ else
+ buffer[i] = reinterpret_cast<const T*>(src)[px];
fx += fdx;
}
} else { // rotation or shear
@@ -1764,7 +2072,10 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1);
if (x1 == x2 && y1 == y2)
break;
- buffer[i] = fetch(image.scanLine(y1), x1);
+ if (useFetch)
+ buffer[i] = fetch(image.scanLine(y1), x1);
+ else
+ buffer[i] = reinterpret_cast<const T*>(image.scanLine(y1))[x1];
fx += fdx;
fy += fdy;
}
@@ -1772,7 +2083,10 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
for (; i < fastLen; ++i) {
int px = (fx >> 16);
int py = (fy >> 16);
- buffer[i] = fetch(image.scanLine(py), px);
+ if (useFetch)
+ buffer[i] = fetch(image.scanLine(py), px);
+ else
+ buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
fx += fdx;
fy += fdy;
}
@@ -1783,7 +2097,10 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
int py = (fy >> 16);
fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
- buffer[i] = fetch(image.scanLine(py), px);
+ if (useFetch)
+ buffer[i] = fetch(image.scanLine(py), px);
+ else
+ buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
fx += fdx;
fy += fdy;
}
@@ -1797,8 +2114,8 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
- uint *const end = buffer + length;
- uint *b = buffer;
+ T *const end = buffer + length;
+ T *b = buffer;
while (b < end) {
const qreal iw = fw == 0 ? 1 : 1 / fw;
const qreal tx = fx * iw;
@@ -1808,7 +2125,10 @@ static void QT_FASTCALL fetchTransformed_fetcher(uint *buffer, const QSpanData *
fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
- *b = fetch(image.scanLine(py), px);
+ if (useFetch)
+ *b = fetch(image.scanLine(py), px);
+ else
+ *b = reinterpret_cast<const T*>(image.scanLine(py))[px];
fx += fdx;
fy += fdy;
@@ -1828,23 +2148,29 @@ static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *,
{
Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
- fetchTransformed_fetcher<blendType, bpp>(buffer, data, y, x, length);
+ fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
return buffer;
}
-template<TextureBlendType blendType>
+template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
int y, int x, int length)
{
- Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
- uint buffer32[BufferSize];
- if (layout->bpp == QPixelLayout::BPP32)
- fetchTransformed_fetcher<blendType, QPixelLayout::BPP32>(buffer32, data, y, x, length);
- else
- fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone>(buffer32, data, y, x, length);
- layout->convertToARGB64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
+ if (layout->bpp != QPixelLayout::BPP64) {
+ uint buffer32[BufferSize];
+ Q_ASSERT(length <= BufferSize);
+ if (layout->bpp == QPixelLayout::BPP32)
+ fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
+ else
+ fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
+ return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
+ }
+
+ fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length);
+ if (data->texture.format == QImage::Format_RGBA64)
+ convertRGBA64ToRGBA64PM(buffer, length);
return buffer;
}
@@ -1939,7 +2265,7 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, u
#endif
#if defined(__SSE2__)
-static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty)
+static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty)
{
__m128i vt = _mm_loadu_si128((const __m128i*)t);
if (disty) {
@@ -1963,7 +2289,7 @@ static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint
#endif
}
#else
-static inline QRgba64 interpolate_4_pixels_rgb64(QRgba64 t[], QRgba64 b[], uint distx, uint disty)
+static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba64 b[], uint distx, uint disty)
{
const uint dx = distx>>8;
const uint dy = disty>>8;
@@ -2844,16 +3170,16 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, ui
}
-typedef void (QT_FASTCALL *BilinearFastTransformFetcher)(uint *buf1, uint *buf2, const int len, const QTextureData &image,
- int fx, int fy, const int fdx, const int fdy);
-
-template<TextureBlendType blendType, QPixelLayout::BPP bpp>
-static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, const int len, const QTextureData &image,
+template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
+static void QT_FASTCALL fetchTransformedBilinear_fetcher(T *buf1, T *buf2, const int len, const QTextureData &image,
int fx, int fy, const int fdx, const int fdy)
{
const QPixelLayout &layout = qPixelLayouts[image.format];
- Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout.bpp);
- // When templated 'fetch1' should be inlined at compile time:
+ constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
+ if (useFetch)
+ Q_ASSERT(sizeof(T) == sizeof(uint));
+ else
+ Q_ASSERT(layout.bpp == bpp);
const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>;
if (fdy == 0) {
int y1 = (fy >> 16);
@@ -2870,8 +3196,13 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2,
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
if (x1 != x2)
break;
- buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
- buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
+ if (useFetch) {
+ buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
+ buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
+ } else {
+ buf1[i * 2 + 0] = buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x1];
+ buf2[i * 2 + 0] = buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x1];
+ }
fx += fdx;
}
int fastLen = len;
@@ -2882,10 +3213,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2,
for (; i < fastLen; ++i) {
int x = (fx >> 16);
- buf1[i * 2 + 0] = fetch1(s1, x);
- buf1[i * 2 + 1] = fetch1(s1, x + 1);
- buf2[i * 2 + 0] = fetch1(s2, x);
- buf2[i * 2 + 1] = fetch1(s2, x + 1);
+ if (useFetch) {
+ buf1[i * 2 + 0] = fetch1(s1, x);
+ buf1[i * 2 + 1] = fetch1(s1, x + 1);
+ buf2[i * 2 + 0] = fetch1(s2, x);
+ buf2[i * 2 + 1] = fetch1(s2, x + 1);
+ } else {
+ buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
+ buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
+ buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
+ buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
+ }
fx += fdx;
}
}
@@ -2894,10 +3232,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2,
int x1 = (fx >> 16);
int x2;
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
- buf1[i * 2 + 0] = fetch1(s1, x1);
- buf1[i * 2 + 1] = fetch1(s1, x2);
- buf2[i * 2 + 0] = fetch1(s2, x1);
- buf2[i * 2 + 1] = fetch1(s2, x2);
+ if (useFetch) {
+ buf1[i * 2 + 0] = fetch1(s1, x1);
+ buf1[i * 2 + 1] = fetch1(s1, x2);
+ buf2[i * 2 + 0] = fetch1(s2, x1);
+ buf2[i * 2 + 1] = fetch1(s2, x2);
+ } else {
+ buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
+ buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
+ buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
+ buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
+ }
fx += fdx;
}
} else {
@@ -2914,10 +3259,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2,
break;
const uchar *s1 = image.scanLine(y1);
const uchar *s2 = image.scanLine(y2);
- buf1[i * 2 + 0] = fetch1(s1, x1);
- buf1[i * 2 + 1] = fetch1(s1, x2);
- buf2[i * 2 + 0] = fetch1(s2, x1);
- buf2[i * 2 + 1] = fetch1(s2, x2);
+ if (useFetch) {
+ buf1[i * 2 + 0] = fetch1(s1, x1);
+ buf1[i * 2 + 1] = fetch1(s1, x2);
+ buf2[i * 2 + 0] = fetch1(s2, x1);
+ buf2[i * 2 + 1] = fetch1(s2, x2);
+ } else {
+ buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
+ buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
+ buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
+ buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
+ }
fx += fdx;
fy += fdy;
}
@@ -2936,10 +3288,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2,
int y = (fy >> 16);
const uchar *s1 = image.scanLine(y);
const uchar *s2 = s1 + image.bytesPerLine;
- buf1[i * 2 + 0] = fetch1(s1, x);
- buf1[i * 2 + 1] = fetch1(s1, x + 1);
- buf2[i * 2 + 0] = fetch1(s2, x);
- buf2[i * 2 + 1] = fetch1(s2, x + 1);
+ if (useFetch) {
+ buf1[i * 2 + 0] = fetch1(s1, x);
+ buf1[i * 2 + 1] = fetch1(s1, x + 1);
+ buf2[i * 2 + 0] = fetch1(s2, x);
+ buf2[i * 2 + 1] = fetch1(s2, x + 1);
+ } else {
+ buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
+ buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
+ buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
+ buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
+ }
fx += fdx;
fy += fdy;
}
@@ -2955,10 +3314,17 @@ static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2,
const uchar *s1 = image.scanLine(y1);
const uchar *s2 = image.scanLine(y2);
- buf1[i * 2 + 0] = fetch1(s1, x1);
- buf1[i * 2 + 1] = fetch1(s1, x2);
- buf2[i * 2 + 0] = fetch1(s2, x1);
- buf2[i * 2 + 1] = fetch1(s2, x2);
+ if (useFetch) {
+ buf1[i * 2 + 0] = fetch1(s1, x1);
+ buf1[i * 2 + 1] = fetch1(s1, x2);
+ buf2[i * 2 + 0] = fetch1(s2, x1);
+ buf2[i * 2 + 1] = fetch1(s2, x2);
+ } else {
+ buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
+ buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
+ buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
+ buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
+ }
fx += fdx;
fy += fdy;
}
@@ -2997,7 +3363,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
if (mid != length)
fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
} else {
- const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
+ const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
uint buf1[BufferSize];
uint buf2[BufferSize];
@@ -3032,7 +3398,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
}
}
} else { // rotation or shear
- const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
+ const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
uint buf1[BufferSize];
uint buf2[BufferSize];
@@ -3146,19 +3512,27 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
}
template<TextureBlendType blendType>
-static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
- const QSpanData *data, int y, int x, int length)
+static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 *buffer, const QSpanData *data,
+ int y, int x, int length)
{
- const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
+ const QTextureData &texture = data->texture;
+ const QPixelLayout *layout = &qPixelLayouts[texture.format];
const QVector<QRgb> *clut = data->texture.colorTable;
const qreal cx = x + qreal(0.5);
const qreal cy = y + qreal(0.5);
+ uint sbuf1[BufferSize];
+ uint sbuf2[BufferSize];
+ QRgba64 buf1[BufferSize];
+ QRgba64 buf2[BufferSize];
+ QRgba64 *end = buffer + length;
+ QRgba64 *b = buffer;
+
if (data->fast_matrix) {
// The increment pr x in the scanline
- int fdx = (int)(data->m11 * fixed_scale);
- int fdy = (int)(data->m12 * fixed_scale);
+ const int fdx = (int)(data->m11 * fixed_scale);
+ const int fdy = (int)(data->m12 * fixed_scale);
int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
@@ -3166,18 +3540,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
fx -= half_point;
fy -= half_point;
- const BilinearFastTransformFetcher fetcher =
+ const auto fetcher =
(layout->bpp == QPixelLayout::BPP32)
- ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32>
- : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone>;
+ ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
+ : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
if (fdy == 0) { //simple scale, no rotation
-
- uint sbuf1[BufferSize];
- uint sbuf2[BufferSize];
- quint64 buf1[BufferSize];
- quint64 buf2[BufferSize];
- QRgba64 *b = buffer;
while (length) {
int len = qMin(length, BufferSize / 2);
int disty = (fy & 0x0000ffff);
@@ -3187,9 +3555,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
#endif
fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
- layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
+ layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, 0);
if (disty)
- layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
+ layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, 0);
for (int i = 0; i < len; ++i) {
int distx = (fx & 0x0000ffff);
@@ -3209,33 +3577,26 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
}
_mm_storel_epi64((__m128i*)(b+i), vt);
#else
- b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
#endif
fx += fdx;
}
length -= len;
b += len;
}
- } else { //rotation
- uint sbuf1[BufferSize];
- uint sbuf2[BufferSize];
- quint64 buf1[BufferSize];
- quint64 buf2[BufferSize];
- QRgba64 *end = buffer + length;
- QRgba64 *b = buffer;
-
+ } else { // rotation or shear
while (b < end) {
int len = qMin(length, BufferSize / 2);
fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
- layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
- layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
+ layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, 0);
+ layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, 0);
for (int i = 0; i < len; ++i) {
int distx = (fx & 0x0000ffff);
int disty = (fy & 0x0000ffff);
- b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
fx += fdx;
fy += fdy;
}
@@ -3244,7 +3605,7 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
b += len;
}
}
- } else {
+ } else { // !(data->fast_matrix)
const QTextureData &image = data->texture;
const qreal fdx = data->m11;
@@ -3256,16 +3617,11 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
- uint sbuf1[BufferSize];
- uint sbuf2[BufferSize];
- quint64 buf1[BufferSize];
- quint64 buf2[BufferSize];
- QRgba64 *b = buffer;
int distxs[BufferSize / 2];
int distys[BufferSize / 2];
- while (length) {
+ while (b < end) {
int len = qMin(length, BufferSize / 2);
for (int i = 0; i < len; ++i) {
const qreal iw = fw == 0 ? 1 : 1 / fw;
@@ -3283,21 +3639,165 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
- const uchar *s1 = data->texture.scanLine(y1);
- const uchar *s2 = data->texture.scanLine(y2);
+ const uchar *s1 = texture.scanLine(y1);
+ const uchar *s2 = texture.scanLine(y2);
- if (layout->bpp == QPixelLayout::BPP32) {
- sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
- sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
- sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
- sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
+ sbuf1[i * 2 + 0] = fetch(s1, x1);
+ sbuf1[i * 2 + 1] = fetch(s1, x2);
+ sbuf2[i * 2 + 0] = fetch(s2, x1);
+ sbuf2[i * 2 + 1] = fetch(s2, x2);
- } else {
- sbuf1[i * 2 + 0] = fetch(s1, x1);
- sbuf1[i * 2 + 1] = fetch(s1, x2);
- sbuf2[i * 2 + 0] = fetch(s2, x1);
- sbuf2[i * 2 + 1] = fetch(s2, x2);
+ fx += fdx;
+ fy += fdy;
+ fw += fdw;
+ //force increment to avoid /0
+ if (!fw)
+ fw += fdw;
+ }
+
+ layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, 0);
+ layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, 0);
+
+ for (int i = 0; i < len; ++i) {
+ int distx = distxs[i];
+ int disty = distys[i];
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
+ }
+
+ length -= len;
+ b += len;
+ }
+ }
+ return buffer;
+}
+
+template<TextureBlendType blendType>
+static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 *buffer, const QSpanData *data,
+ int y, int x, int length)
+{
+ const QTextureData &texture = data->texture;
+ Q_ASSERT(qPixelLayouts[texture.format].bpp == QPixelLayout::BPP64);
+ const auto convert = (data->texture.format == QImage::Format_RGBA64) ? convertRGBA64ToRGBA64PM : convertRGBA64PMToRGBA64PM;
+
+ const qreal cx = x + qreal(0.5);
+ const qreal cy = y + qreal(0.5);
+
+ QRgba64 buf1[BufferSize];
+ QRgba64 buf2[BufferSize];
+ QRgba64 *end = buffer + length;
+ QRgba64 *b = buffer;
+
+ if (data->fast_matrix) {
+ // The increment pr x in the scanline
+ const int fdx = (int)(data->m11 * fixed_scale);
+ const int fdy = (int)(data->m12 * fixed_scale);
+
+ int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
+ int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
+
+ fx -= half_point;
+ fy -= half_point;
+ const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
+
+ if (fdy == 0) { //simple scale, no rotation
+ while (length) {
+ int len = qMin(length, BufferSize / 2);
+ int disty = (fy & 0x0000ffff);
+#if defined(__SSE2__)
+ const __m128i vdy = _mm_set1_epi16(disty);
+ const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
+#endif
+ fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
+
+ convert(buf1, len * 2);
+ if (disty)
+ convert(buf2, len * 2);
+
+ for (int i = 0; i < len; ++i) {
+ int distx = (fx & 0x0000ffff);
+#if defined(__SSE2__)
+ __m128i vt = _mm_loadu_si128((const __m128i*)(buf1 + i*2));
+ if (disty) {
+ __m128i vb = _mm_loadu_si128((const __m128i*)(buf2 + i*2));
+ vt = _mm_mulhi_epu16(vt, vidy);
+ vb = _mm_mulhi_epu16(vb, vdy);
+ vt = _mm_add_epi16(vt, vb);
+ }
+ if (distx) {
+ const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
+ const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
+ vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
+ vt = _mm_add_epi16(vt, _mm_srli_si128(vt, 8));
+ }
+ _mm_storel_epi64((__m128i*)(b+i), vt);
+#else
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
+#endif
+ fx += fdx;
}
+ length -= len;
+ b += len;
+ }
+ } else { // rotation or shear
+ while (b < end) {
+ int len = qMin(length, BufferSize / 2);
+
+ fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
+
+ convert(buf1, len * 2);
+ convert(buf2, len * 2);
+
+ for (int i = 0; i < len; ++i) {
+ int distx = (fx & 0x0000ffff);
+ int disty = (fy & 0x0000ffff);
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
+ fx += fdx;
+ fy += fdy;
+ }
+
+ length -= len;
+ b += len;
+ }
+ }
+ } else { // !(data->fast_matrix)
+ const QTextureData &image = data->texture;
+
+ const qreal fdx = data->m11;
+ const qreal fdy = data->m12;
+ const qreal fdw = data->m13;
+
+ qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
+ qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
+ qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
+
+ int distxs[BufferSize / 2];
+ int distys[BufferSize / 2];
+
+ while (b < end) {
+ int len = qMin(length, BufferSize / 2);
+ for (int i = 0; i < len; ++i) {
+ const qreal iw = fw == 0 ? 1 : 1 / fw;
+ const qreal px = fx * iw - qreal(0.5);
+ const qreal py = fy * iw - qreal(0.5);
+
+ int x1 = int(px) - (px < 0);
+ int x2;
+ int y1 = int(py) - (py < 0);
+ int y2;
+
+ distxs[i] = int((px - x1) * (1<<16));
+ distys[i] = int((py - y1) * (1<<16));
+
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+
+ const uchar *s1 = texture.scanLine(y1);
+ const uchar *s2 = texture.scanLine(y2);
+
+ buf1[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s1)[x1];
+ buf1[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s1)[x2];
+ buf2[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s2)[x1];
+ buf2[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s2)[x2];
fx += fdx;
fy += fdy;
@@ -3307,23 +3807,31 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
fw += fdw;
}
- layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
- layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
+ convert(buf1, len * 2);
+ convert(buf2, len * 2);
for (int i = 0; i < len; ++i) {
int distx = distxs[i];
int disty = distys[i];
- b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
+ b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
}
length -= len;
b += len;
}
}
-
return buffer;
}
+template<TextureBlendType blendType>
+static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
+ const QSpanData *data, int y, int x, int length)
+{
+ if (qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64)
+ return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
+ return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
+}
+
// FetchUntransformed can have more specialized methods added depending on SIMD features.
static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
0, // Invalid
@@ -3351,6 +3859,9 @@ static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
fetchUntransformed, // Format_A2RGB30_Premultiplied
fetchUntransformed, // Alpha8
fetchUntransformed, // Grayscale8
+ fetchUntransformed, // RGBX64
+ fetchUntransformed, // RGBA64
+ fetchUntransformed, // RGBA64_Premultiplied
};
static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = {
@@ -3398,6 +3909,15 @@ static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = {
fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
};
+static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = {
+ fetchUntransformedRGBA64PM, // Untransformed
+ fetchUntransformedRGBA64PM, // Tiled
+ fetchTransformed64<BlendTransformed>, // Transformed
+ fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
+ fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
+ fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
+};
+
static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
{
if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied)
@@ -3411,6 +3931,13 @@ static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage:
return sourceFetchGeneric[blendType];
}
+static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
+{
+ if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied)
+ return sourceFetchRGBA64PM[blendType];
+ return sourceFetchGeneric64[blendType];
+}
+
#define FIXPT_BITS 8
#define FIXPT_SIZE (1<<FIXPT_BITS)
@@ -3753,7 +4280,7 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in
case QSpanData::Texture:
solidSource = !data->texture.hasAlpha;
op.srcFetch = getSourceFetch(getBlendType(data), data->texture.format);
- op.srcFetch64 = sourceFetchGeneric64[getBlendType(data)];
+ op.srcFetch64 = getSourceFetch64(getBlendType(data), data->texture.format);;
break;
default:
Q_UNREACHABLE();
@@ -3781,8 +4308,9 @@ static inline Operator getOperator(const QSpanData *data, const QSpan *spans, in
// If all spans are opaque we do not need to fetch dest.
// But don't clear passthrough destFetch as they are just as fast and save destStore.
if (op.destFetch != destFetchARGB32P)
- op.destFetch = 0;
- op.destFetch64 = destFetch64Undefined;
+ op.destFetch = destFetchUndefined;
+ if (op.destFetch64 != destFetchRGB64)
+ op.destFetch64 = destFetch64Undefined;
}
}
@@ -3816,7 +4344,7 @@ void blend_color_generic(int count, const QSpan *spans, void *userData)
int length = spans->len;
while (length) {
int l = qMin(BufferSize, length);
- uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
+ uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
op.funcSolid(dest, l, color, spans->coverage);
if (op.destStore)
op.destStore(data->rasterBuffer, x, spans->y, dest, l);
@@ -3831,7 +4359,7 @@ static void blend_color_argb(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
- Operator op = getOperator(data, spans, count);
+ const Operator op = getOperator(data, spans, count);
const uint color = data->solid.color.toArgb32();
if (op.mode == QPainter::CompositionMode_Source) {
@@ -3890,7 +4418,8 @@ void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
int l = qMin(BufferSize, length);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.funcSolid64(dest, l, color, spans->coverage);
- op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
+ if (op.destStore64)
+ op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
length -= l;
x += l;
}
@@ -4053,7 +4582,7 @@ public:
const uint *fetch(int x, int y, int len)
{
- dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, y, len) : buffer;
+ dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
return op.srcFetch(src_buffer, &op, data, y, x, len);
}
@@ -4079,7 +4608,7 @@ public:
bool isSupported() const
{
- return op.func64 && op.destFetch64 && op.destStore64;
+ return op.func64 && op.destFetch64;
}
const quint64 *fetch(int x, int y, int len)
@@ -4095,7 +4624,8 @@ public:
void store(int x, int y, int len)
{
- op.destStore64(data->rasterBuffer, x, y, (QRgba64 *)dest, len);
+ if (op.destStore64)
+ op.destStore64(data->rasterBuffer, x, y, (QRgba64 *)dest, len);
}
};
@@ -4151,7 +4681,7 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use
while (length) {
int l = qMin(BufferSize, length);
const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
- uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
+ uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
op.func(dest, src, l, coverage);
if (op.destStore)
op.destStore(data->rasterBuffer, x, spans->y, dest, l);
@@ -4202,7 +4732,8 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.func64(dest, src, l, coverage);
- op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
+ if (op.destStore64)
+ op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
x += l;
sx += l;
length -= l;
@@ -4392,7 +4923,7 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
if (BufferSize < l)
l = BufferSize;
const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
- uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
+ uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
op.func(dest, src, l, coverage);
if (op.destStore)
op.destStore(data->rasterBuffer, x, spans->y, dest, l);
@@ -4490,7 +5021,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.func64(dest, src, l, coverage);
- op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
+ if (op.destStore64)
+ op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
x += l;
sx += l;
length -= l;
@@ -4703,6 +5235,9 @@ void qBlendTexture(int count, const QSpan *spans, void *userData)
case QImage::Format_A2BGR30_Premultiplied:
case QImage::Format_RGB30:
case QImage::Format_A2RGB30_Premultiplied:
+ case QImage::Format_RGBX64:
+ case QImage::Format_RGBA64:
+ case QImage::Format_RGBA64_Premultiplied:
proc = processTextureSpansGeneric64[blendType];
break;
case QImage::Format_Invalid:
@@ -4974,7 +5509,8 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
const int coverage = map[j + (i - x)];
alphamapblend_generic(coverage, dest, j, srcColor, color, colorProfile);
}
- destStore64(rasterBuffer, i, y + ly, dest, l);
+ if (destStore64)
+ destStore64(rasterBuffer, i, y + ly, dest, l);
length -= l;
i += l;
}
@@ -5004,7 +5540,8 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
const int coverage = map[xp - x];
alphamapblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
}
- destStore64(rasterBuffer, start, clip.y, dest, end - start);
+ if (destStore64)
+ destStore64(rasterBuffer, start, clip.y, dest, end - start);
} // for (i -> line.count)
map += mapStride;
} // for (yp -> bottom)
@@ -5251,7 +5788,8 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
const uint coverage = src[j + (i - x)];
alphargbblend_generic(coverage, dest, j, srcColor, color, colorProfile);
}
- destStore64(rasterBuffer, i, y + ly, dest, l);
+ if (destStore64)
+ destStore64(rasterBuffer, i, y + ly, dest, l);
length -= l;
i += l;
}
@@ -5281,7 +5819,8 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
const uint coverage = src[xp - x];
alphargbblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
}
- destStore64(rasterBuffer, start, clip.y, dest, end - start);
+ if (destStore64)
+ destStore64(rasterBuffer, start, clip.y, dest, end - start);
} // for (i -> line.count)
src += srcStride;
} // for (yp -> bottom)
@@ -5433,6 +5972,17 @@ static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
qGray(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
}
+static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
+ int x, int y, int width, int height,
+ const QRgba64 &color)
+{
+ const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
+ quint64 c64;
+ store(reinterpret_cast<uchar *>(&c64), &color, 0, 1, nullptr, nullptr);
+ qt_rectfill<quint64>(reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
+ c64, x, y, width, height, rasterBuffer->bytesPerLine());
+}
+
// Map table for destination image format. Contains function pointers
// for blends of various types unto the destination
@@ -5647,6 +6197,33 @@ DrawHelper qDrawHelper[QImage::NImageFormats] =
qt_alphargbblit_generic,
qt_rectfill_gray
},
+ // Format_RGBX64
+ {
+ blend_color_generic_rgb64,
+ blend_src_generic_rgb64,
+ 0,
+ qt_alphamapblit_generic,
+ qt_alphargbblit_generic,
+ qt_rectfill_quint64
+ },
+ // Format_RGBA64
+ {
+ blend_color_generic_rgb64,
+ blend_src_generic_rgb64,
+ 0,
+ qt_alphamapblit_generic,
+ qt_alphargbblit_generic,
+ qt_rectfill_quint64
+ },
+ // Format_RGBA64_Premultiplied
+ {
+ blend_color_generic_rgb64,
+ blend_src_generic_rgb64,
+ 0,
+ qt_alphamapblit_generic,
+ qt_alphargbblit_generic,
+ qt_rectfill_quint64
+ },
};
#if defined(Q_CC_MSVC) && !defined(_MIPS_)
@@ -5795,10 +6372,6 @@ static void qInitDrawhelperFunctions()
qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
-#ifndef __SSSE3__
- extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count);
- qFetchPixels[QPixelLayout::BPP24] = fetchPixelsBPP24_ssse3;
-#endif
}
#endif // SSSE3
@@ -5816,6 +6389,10 @@ static void qInitDrawhelperFunctions()
const QVector<QRgb> *, QDitherInfo *);
extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *);
+ extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *);
+ extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *);
extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
@@ -5827,6 +6404,8 @@ static void qInitDrawhelperFunctions()
qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
+ qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
+ qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
}
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index bddd6899e3..078ab62251 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -1143,6 +1143,8 @@ static Q_ALWAYS_INLINE const uint *qt_convertRGBA8888ToARGB32PM(uint *buffer, co
return buffer;
}
+template<bool RGBA> void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count);
+
const uint qt_bayer_matrix[16][16] = {
{ 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc,
0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff},
@@ -1232,9 +1234,15 @@ typedef const uint *(QT_FASTCALL *FetchAndConvertPixelsFunc)(uint *buffer, const
typedef void (QT_FASTCALL *ConvertAndStorePixelsFunc)(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *clut, QDitherInfo *dither);
+typedef const QRgba64 *(QT_FASTCALL *FetchAndConvertPixelsFunc64)(QRgba64 *buffer, const uchar *src, int index, int count,
+ const QVector<QRgb> *clut, QDitherInfo *dither);
+typedef void (QT_FASTCALL *ConvertAndStorePixelsFunc64)(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *clut, QDitherInfo *dither);
+
typedef void (QT_FASTCALL *ConvertFunc)(uint *buffer, int count, const QVector<QRgb> *clut);
-typedef const QRgba64 *(QT_FASTCALL *ConvertFunc64)(QRgba64 *buffer, const uint *src, int count,
- const QVector<QRgb> *clut, QDitherInfo *dither);
+typedef void (QT_FASTCALL *Convert64Func)(quint64 *buffer, int count, const QVector<QRgb> *clut);
+typedef const QRgba64 *(QT_FASTCALL *ConvertTo64Func)(QRgba64 *buffer, const uint *src, int count,
+ const QVector<QRgb> *clut, QDitherInfo *dither);
typedef void (QT_FASTCALL *RbSwapFunc)(uchar *dst, const uchar *src, int count);
@@ -1249,6 +1257,7 @@ struct QPixelLayout
BPP16,
BPP24,
BPP32,
+ BPP64,
BPPCount
};
@@ -1257,17 +1266,19 @@ struct QPixelLayout
BPP bpp;
RbSwapFunc rbSwap;
ConvertFunc convertToARGB32PM;
- ConvertFunc64 convertToARGB64PM;
+ ConvertTo64Func convertToRGBA64PM;
FetchAndConvertPixelsFunc fetchToARGB32PM;
+ FetchAndConvertPixelsFunc64 fetchToRGBA64PM;
ConvertAndStorePixelsFunc storeFromARGB32PM;
ConvertAndStorePixelsFunc storeFromRGB32;
};
+extern ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats];
+
extern QPixelLayout qPixelLayouts[QImage::NImageFormats];
extern MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3];
-
QT_END_NAMESPACE
#endif // QDRAWHELPER_P_H
diff --git a/src/gui/painting/qdrawhelper_sse4.cpp b/src/gui/painting/qdrawhelper_sse4.cpp
index 0b6f963168..e3cc1dd43e 100644
--- a/src/gui/painting/qdrawhelper_sse4.cpp
+++ b/src/gui/painting/qdrawhelper_sse4.cpp
@@ -332,6 +332,20 @@ void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, in
convertARGBFromRGBA64PM_sse4<true>(dest, buffer, length);
}
+void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ uint *d = (uint*)dest + index;
+ convertARGBFromRGBA64PM_sse4<false>(d, src, count);
+}
+
+void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
+ const QVector<QRgb> *, QDitherInfo *)
+{
+ uint *d = (uint*)dest + index;
+ convertARGBFromRGBA64PM_sse4<true>(d, src, count);
+}
+
template
void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>(uchar *dest, const uint *src, int index, int count,
const QVector<QRgb> *, QDitherInfo *);
diff --git a/src/gui/painting/qmemrotate.cpp b/src/gui/painting/qmemrotate.cpp
index 25aa6a3122..43aeff3268 100644
--- a/src/gui/painting/qmemrotate.cpp
+++ b/src/gui/painting/qmemrotate.cpp
@@ -239,6 +239,12 @@ inline void qt_memrotate90_template<quint32>(const quint32 *src, int w, int h, i
qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
+template <>
+inline void qt_memrotate90_template<quint64>(const quint64 *src, int w, int h, int sstride, quint64 *dest, int dstride)
+{
+ qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride);
+}
+
template <class T>
Q_STATIC_TEMPLATE_FUNCTION
inline void qt_memrotate180_template(const T *src, int w, int h, int sstride, T *dest, int dstride)
@@ -275,6 +281,12 @@ inline void qt_memrotate270_template<quint32>(const quint32 *src, int w, int h,
qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
+template <>
+inline void qt_memrotate270_template<quint64>(const quint64 *src, int w, int h, int sstride, quint64 *dest, int dstride)
+{
+ qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride);
+}
+
#define QT_IMPL_MEMROTATE(type) \
Q_GUI_EXPORT void qt_memrotate90(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
@@ -309,9 +321,7 @@ Q_GUI_EXPORT void qt_memrotate270(const type *src, int w, int h, int sstride, \
qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride); \
}
-
-
-
+QT_IMPL_MEMROTATE(quint64)
QT_IMPL_MEMROTATE(quint32)
QT_IMPL_MEMROTATE(quint16)
QT_IMPL_MEMROTATE(quint24)
@@ -377,6 +387,22 @@ void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *d
qt_memrotate270((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
}
+
+void qt_memrotate90_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
+{
+ qt_memrotate90((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
+}
+
+void qt_memrotate180_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
+{
+ qt_memrotate180((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
+}
+
+void qt_memrotate270_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
+{
+ qt_memrotate270((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
+}
+
MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3] =
// 90, 180, 270
{
@@ -387,6 +413,7 @@ MemRotateFunc qMemRotateFunctions[QPixelLayout::BPPCount][3] =
{ qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // BPP16,
{ qt_memrotate90_24, qt_memrotate180_24, qt_memrotate270_24 }, // BPP24
{ qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // BPP32
+ { qt_memrotate90_64, qt_memrotate180_64, qt_memrotate270_64 }, // BPP64
};
QT_END_NAMESPACE
diff --git a/src/gui/painting/qmemrotate_p.h b/src/gui/painting/qmemrotate_p.h
index 9bc3fd1010..677fd57af9 100644
--- a/src/gui/painting/qmemrotate_p.h
+++ b/src/gui/painting/qmemrotate_p.h
@@ -65,6 +65,7 @@ QT_DECL_MEMROTATE(quint32);
QT_DECL_MEMROTATE(quint16);
QT_DECL_MEMROTATE(quint24);
QT_DECL_MEMROTATE(quint8);
+QT_DECL_MEMROTATE(quint64);
#undef QT_DECL_MEMROTATE