summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-02-19 11:20:52 +0100
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-03-03 13:38:17 +0000
commit59f168c5e5eec7ac247e6154a65f85a7ccc6e7ad (patch)
tree6c80267788ca979465fe7ea3cb921947c05a4476
parentbfb92c03e0d8e7a3a65b64d1f2f5b89f442e2b8a (diff)
Optimize fallback RGB888 to RGB32 conversion
Improves the conversion from RGB888 to RGB32 on platforms without SIMD versions. This includes the fallback used on non-neon ARM devices. Besides image conversion the routine is also used for decoding JPEG. On x86 this version is within 0.7x of the speed of the SSSE3 version. Change-Id: Id131994d7c3c4f879d89e80f9d6c435bb5535ed7 Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
-rw-r--r--src/gui/image/qimage_conversions.cpp117
-rw-r--r--src/gui/image/qjpeghandler.cpp18
-rw-r--r--tests/auto/gui/image/qimage/tst_qimage.cpp7
-rw-r--r--tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp24
4 files changed, 143 insertions, 23 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp
index a4c02bbbbe..5103d820d6 100644
--- a/src/gui/image/qimage_conversions.cpp
+++ b/src/gui/image/qimage_conversions.cpp
@@ -35,8 +35,8 @@
#include <private/qdrawingprimitive_sse2_p.h>
#include <private/qguiapplication_p.h>
#include <private/qsimd_p.h>
-
#include <private/qimage_p.h>
+#include <qendian.h>
QT_BEGIN_NAMESPACE
@@ -290,6 +290,108 @@ static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src
}
#endif
+Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dest_data, const uchar *src_data, int len)
+{
+ int pixel = 0;
+ // prolog: align input to 32bit
+ while ((quintptr(src_data) & 0x3) && pixel < len) {
+ *dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]);
+ src_data += 3;
+ ++dest_data;
+ ++pixel;
+ }
+
+ // Handle 4 pixels at a time 12 bytes input to 16 bytes output.
+ for (; pixel + 3 < len; pixel += 4) {
+ const quint32 *src_packed = (quint32 *) src_data;
+ const quint32 src1 = qFromBigEndian(src_packed[0]);
+ const quint32 src2 = qFromBigEndian(src_packed[1]);
+ const quint32 src3 = qFromBigEndian(src_packed[2]);
+
+ dest_data[0] = 0xff000000 | (src1 >> 8);
+ dest_data[1] = 0xff000000 | (src1 << 16) | (src2 >> 16);
+ dest_data[2] = 0xff000000 | (src2 << 8) | (src3 >> 24);
+ dest_data[3] = 0xff000000 | src3;
+
+ src_data += 12;
+ dest_data += 4;
+ }
+
+ // epilog: handle left over pixels
+ for (; pixel < len; ++pixel) {
+ *dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]);
+ src_data += 3;
+ ++dest_data;
+ }
+}
+
+Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgbx8888(quint32 *dest_data, const uchar *src_data, int len)
+{
+ int pixel = 0;
+ // prolog: align input to 32bit
+ while ((quintptr(src_data) & 0x3) && pixel < len) {
+ *dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]));
+ src_data += 3;
+ ++dest_data;
+ ++pixel;
+ }
+
+ // Handle 4 pixels at a time 12 bytes input to 16 bytes output.
+ for (; pixel + 3 < len; pixel += 4) {
+ const quint32 *src_packed = (quint32 *) src_data;
+ const quint32 src1 = src_packed[0];
+ const quint32 src2 = src_packed[1];
+ const quint32 src3 = src_packed[2];
+
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ dest_data[0] = 0xff000000 | src1;
+ dest_data[1] = 0xff000000 | (src1 >> 24) | (src2 << 8);
+ dest_data[2] = 0xff000000 | (src2 >> 16) | (src3 << 16);
+ dest_data[3] = 0xff000000 | (src3 >> 8);
+#else
+ dest_data[0] = 0xff | src1;
+ dest_data[1] = 0xff | (src1 << 24) | (src2 >> 8);
+ dest_data[2] = 0xff | (src2 << 16) | (src3 >> 16);
+ dest_data[3] = 0xff | (src3 << 8);
+#endif
+
+ src_data += 12;
+ dest_data += 4;
+ }
+
+ // epilog: handle left over pixels
+ for (; pixel < len; ++pixel) {
+ *dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]));
+ src_data += 3;
+ ++dest_data;
+ }
+}
+
+typedef void (QT_FASTCALL *Rgb888ToRgbConverter)(quint32 *dst, const uchar *src, int len);
+
+template <bool rgbx>
+static void convert_RGB888_to_RGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
+{
+ Q_ASSERT(src->format == QImage::Format_RGB888);
+ if (rgbx)
+ Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied);
+ else
+ Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied);
+ Q_ASSERT(src->width == dest->width);
+ Q_ASSERT(src->height == dest->height);
+
+ const uchar *src_data = (uchar *) src->data;
+ quint32 *dest_data = (quint32 *) dest->data;
+
+ Rgb888ToRgbConverter line_converter= rgbx ? qt_convert_rgb888_to_rgbx8888 : qt_convert_rgb888_to_rgb32;
+
+ for (int i = 0; i < src->height; ++i) {
+ line_converter(dest_data, src_data, src->width);
+ src_data += src->bytes_per_line;
+ dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line);
+ }
+}
+
extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
static void convert_ARGB_to_RGBx(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags)
@@ -2052,6 +2154,9 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat
0,
0,
0,
+ convert_RGB888_to_RGB<false>,
+ convert_RGB888_to_RGB<false>,
+ convert_RGB888_to_RGB<false>,
0,
0,
0,
@@ -2061,12 +2166,10 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat
0,
0,
0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0, 0, 0, 0, 0, 0, 0
+ convert_RGB888_to_RGB<true>,
+ convert_RGB888_to_RGB<true>,
+ convert_RGB888_to_RGB<true>,
+ 0, 0, 0, 0, 0, 0
}, // Format_RGB888
{
diff --git a/src/gui/image/qjpeghandler.cpp b/src/gui/image/qjpeghandler.cpp
index 13ac59ec26..b1146c4297 100644
--- a/src/gui/image/qjpeghandler.cpp
+++ b/src/gui/image/qjpeghandler.cpp
@@ -69,18 +69,10 @@ extern "C" {
QT_BEGIN_NAMESPACE
-void QT_FASTCALL convert_rgb888_to_rgb32_C(quint32 *dst, const uchar *src, int len)
-{
- // Expand 24->32 bpp.
- for (int i = 0; i < len; ++i) {
- *dst++ = qRgb(src[0], src[1], src[2]);
- src += 3;
- }
-}
-
+Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dst, const uchar *src, int len);
typedef void (QT_FASTCALL *Rgb888ToRgb32Converter)(quint32 *dst, const uchar *src, int len);
-static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = convert_rgb888_to_rgb32_C;
+static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32;
struct my_error_mgr : public jpeg_error_mgr {
jmp_buf setjmp_buffer;
@@ -1008,10 +1000,8 @@ QJpegHandler::QJpegHandler()
#endif
#if defined(QT_COMPILER_SUPPORTS_SSSE3)
- // from qimage_ssse3.cpp
-
- if (false) {
- } else if (qCpuHasFeature(SSSE3)) {
+ // from qimage_ssse3.cpps
+ if (qCpuHasFeature(SSSE3)) {
rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3;
}
#endif // QT_COMPILER_SUPPORTS_SSSE3
diff --git a/tests/auto/gui/image/qimage/tst_qimage.cpp b/tests/auto/gui/image/qimage/tst_qimage.cpp
index 7d2d009213..decd4ef931 100644
--- a/tests/auto/gui/image/qimage/tst_qimage.cpp
+++ b/tests/auto/gui/image/qimage/tst_qimage.cpp
@@ -734,6 +734,13 @@ void tst_QImage::convertToFormat_data()
QTest::newRow("blue rgb888 -> argb32") << int(QImage::Format_RGB888) << 0xff0000ff
<< int(QImage::Format_ARGB32) << 0xff0000ff;
+ QTest::newRow("red rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xffff0000
+ << int(QImage::Format_RGBX8888) << 0xffff0000;
+ QTest::newRow("green rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xff00ff00
+ << int(QImage::Format_RGBX8888) << 0xff00ff00;
+ QTest::newRow("blue rgb888 -> rgbx8888") << int(QImage::Format_RGB888) << 0xff0000ff
+ << int(QImage::Format_RGBX8888) << 0xff0000ff;
+
QTest::newRow("semired argb32 -> rgb888") << int(QImage::Format_ARGB32) << 0x7fff0000u
<< int(QImage::Format_RGB888) << 0xffff0000;
QTest::newRow("semigreen argb32 -> rgb888") << int(QImage::Format_ARGB32) << 0x7f00ff00u
diff --git a/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp b/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp
index d4834a04e2..2d4a453b58 100644
--- a/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp
+++ b/tests/benchmarks/gui/image/qimageconversion/tst_qimageconversion.cpp
@@ -43,6 +43,9 @@ private slots:
void convertRgb888ToRgb32_data();
void convertRgb888ToRgb32();
+ void convertRgb888ToRgbx8888_data();
+ void convertRgb888ToRgbx8888();
+
void convertRgb32ToRgb888_data();
void convertRgb32ToRgb888();
@@ -74,8 +77,8 @@ void tst_QImageConversion::convertRgb888ToRgb32_data()
// 16 pixels, minimum for the SSSE3 implementation
QTest::newRow("width: 16px; height: 5000px;") << generateImageRgb888(16, 5000);
- // 50 pixels, more realistic use case
- QTest::newRow("width: 50px; height: 5000px;") << generateImageRgb888(50, 5000);
+ // 200 pixels, more realistic use case
+ QTest::newRow("width: 200px; height: 5000px;") << generateImageRgb888(200, 5000);
// 2000 pixels -> typical values for pictures
QTest::newRow("width: 2000px; height: 2000px;") << generateImageRgb888(2000, 2000);
@@ -93,6 +96,23 @@ void tst_QImageConversion::convertRgb888ToRgb32()
}
}
+void tst_QImageConversion::convertRgb888ToRgbx8888_data()
+{
+ convertRgb888ToRgb32_data();
+}
+
+void tst_QImageConversion::convertRgb888ToRgbx8888()
+{
+ QFETCH(QImage, inputImage);
+
+ QBENCHMARK {
+ volatile QImage output = inputImage.convertToFormat(QImage::Format_RGBX8888);
+ // we need the volatile and the following to make sure the compiler does not do
+ // anything stupid :)
+ (void)output;
+ }
+}
+
void tst_QImageConversion::convertRgb32ToRgb888_data()
{
QTest::addColumn<QImage>("inputImage");