diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-02-19 11:20:52 +0100 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2015-03-03 13:38:17 +0000 |
commit | 59f168c5e5eec7ac247e6154a65f85a7ccc6e7ad (patch) | |
tree | 6c80267788ca979465fe7ea3cb921947c05a4476 /src/gui/image | |
parent | bfb92c03e0d8e7a3a65b64d1f2f5b89f442e2b8a (diff) |
Optimize fallback RGB888 to RGB32 conversion
Improves the conversion from RGB888 to RGB32 on platforms without SIMD
versions. This includes the fallback used on non-neon ARM devices.
Besides image conversion the routine is also used for decoding JPEG.
On x86 this version is within 0.7x of the speed of the SSSE3 version.
Change-Id: Id131994d7c3c4f879d89e80f9d6c435bb5535ed7
Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src/gui/image')
-rw-r--r-- | src/gui/image/qimage_conversions.cpp | 117 | ||||
-rw-r--r-- | src/gui/image/qjpeghandler.cpp | 18 |
2 files changed, 114 insertions, 21 deletions
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp index a4c02bbbbe..5103d820d6 100644 --- a/src/gui/image/qimage_conversions.cpp +++ b/src/gui/image/qimage_conversions.cpp @@ -35,8 +35,8 @@ #include <private/qdrawingprimitive_sse2_p.h> #include <private/qguiapplication_p.h> #include <private/qsimd_p.h> - #include <private/qimage_p.h> +#include <qendian.h> QT_BEGIN_NAMESPACE @@ -290,6 +290,108 @@ static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src } #endif +Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dest_data, const uchar *src_data, int len) +{ + int pixel = 0; + // prolog: align input to 32bit + while ((quintptr(src_data) & 0x3) && pixel < len) { + *dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]); + src_data += 3; + ++dest_data; + ++pixel; + } + + // Handle 4 pixels at a time 12 bytes input to 16 bytes output. + for (; pixel + 3 < len; pixel += 4) { + const quint32 *src_packed = (quint32 *) src_data; + const quint32 src1 = qFromBigEndian(src_packed[0]); + const quint32 src2 = qFromBigEndian(src_packed[1]); + const quint32 src3 = qFromBigEndian(src_packed[2]); + + dest_data[0] = 0xff000000 | (src1 >> 8); + dest_data[1] = 0xff000000 | (src1 << 16) | (src2 >> 16); + dest_data[2] = 0xff000000 | (src2 << 8) | (src3 >> 24); + dest_data[3] = 0xff000000 | src3; + + src_data += 12; + dest_data += 4; + } + + // epilog: handle left over pixels + for (; pixel < len; ++pixel) { + *dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]); + src_data += 3; + ++dest_data; + } +} + +Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgbx8888(quint32 *dest_data, const uchar *src_data, int len) +{ + int pixel = 0; + // prolog: align input to 32bit + while ((quintptr(src_data) & 0x3) && pixel < len) { + *dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2])); + src_data += 3; + ++dest_data; + ++pixel; + } + + // Handle 4 pixels at a time 12 bytes input to 16 bytes output. + for (; pixel + 3 < len; pixel += 4) { + const quint32 *src_packed = (quint32 *) src_data; + const quint32 src1 = src_packed[0]; + const quint32 src2 = src_packed[1]; + const quint32 src3 = src_packed[2]; + +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN + dest_data[0] = 0xff000000 | src1; + dest_data[1] = 0xff000000 | (src1 >> 24) | (src2 << 8); + dest_data[2] = 0xff000000 | (src2 >> 16) | (src3 << 16); + dest_data[3] = 0xff000000 | (src3 >> 8); +#else + dest_data[0] = 0xff | src1; + dest_data[1] = 0xff | (src1 << 24) | (src2 >> 8); + dest_data[2] = 0xff | (src2 << 16) | (src3 >> 16); + dest_data[3] = 0xff | (src3 << 8); +#endif + + src_data += 12; + dest_data += 4; + } + + // epilog: handle left over pixels + for (; pixel < len; ++pixel) { + *dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2])); + src_data += 3; + ++dest_data; + } +} + +typedef void (QT_FASTCALL *Rgb888ToRgbConverter)(quint32 *dst, const uchar *src, int len); + +template <bool rgbx> +static void convert_RGB888_to_RGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +{ + Q_ASSERT(src->format == QImage::Format_RGB888); + if (rgbx) + Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied); + else + Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied); + Q_ASSERT(src->width == dest->width); + Q_ASSERT(src->height == dest->height); + + const uchar *src_data = (uchar *) src->data; + quint32 *dest_data = (quint32 *) dest->data; + + Rgb888ToRgbConverter line_converter= rgbx ? qt_convert_rgb888_to_rgbx8888 : qt_convert_rgb888_to_rgb32; + + for (int i = 0; i < src->height; ++i) { + line_converter(dest_data, src_data, src->width); + src_data += src->bytes_per_line; + dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line); + } +} + extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags); static void convert_ARGB_to_RGBx(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) @@ -2052,6 +2154,9 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat 0, 0, 0, + convert_RGB888_to_RGB<false>, + convert_RGB888_to_RGB<false>, + convert_RGB888_to_RGB<false>, 0, 0, 0, @@ -2061,12 +2166,10 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat 0, 0, 0, - 0, - 0, - 0, - 0, - 0, - 0, 0, 0, 0, 0, 0, 0 + convert_RGB888_to_RGB<true>, + convert_RGB888_to_RGB<true>, + convert_RGB888_to_RGB<true>, + 0, 0, 0, 0, 0, 0 }, // Format_RGB888 { diff --git a/src/gui/image/qjpeghandler.cpp b/src/gui/image/qjpeghandler.cpp index 13ac59ec26..b1146c4297 100644 --- a/src/gui/image/qjpeghandler.cpp +++ b/src/gui/image/qjpeghandler.cpp @@ -69,18 +69,10 @@ extern "C" { QT_BEGIN_NAMESPACE -void QT_FASTCALL convert_rgb888_to_rgb32_C(quint32 *dst, const uchar *src, int len) -{ - // Expand 24->32 bpp. - for (int i = 0; i < len; ++i) { - *dst++ = qRgb(src[0], src[1], src[2]); - src += 3; - } -} - +Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dst, const uchar *src, int len); typedef void (QT_FASTCALL *Rgb888ToRgb32Converter)(quint32 *dst, const uchar *src, int len); -static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = convert_rgb888_to_rgb32_C; +static Rgb888ToRgb32Converter rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32; struct my_error_mgr : public jpeg_error_mgr { jmp_buf setjmp_buffer; @@ -1008,10 +1000,8 @@ QJpegHandler::QJpegHandler() #endif #if defined(QT_COMPILER_SUPPORTS_SSSE3) - // from qimage_ssse3.cpp - - if (false) { - } else if (qCpuHasFeature(SSSE3)) { + // from qimage_ssse3.cpps + if (qCpuHasFeature(SSSE3)) { rgb888ToRgb32ConverterPtr = qt_convert_rgb888_to_rgb32_ssse3; } #endif // QT_COMPILER_SUPPORTS_SSSE3 |