From 59f168c5e5eec7ac247e6154a65f85a7ccc6e7ad Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Thu, 19 Feb 2015 11:20:52 +0100 Subject: Optimize fallback RGB888 to RGB32 conversion Improves the conversion from RGB888 to RGB32 on platforms without SIMD versions. This includes the fallback used on non-neon ARM devices. Besides image conversion the routine is also used for decoding JPEG. On x86 this version is within 0.7x of the speed of the SSSE3 version. Change-Id: Id131994d7c3c4f879d89e80f9d6c435bb5535ed7 Reviewed-by: Gunnar Sletta --- src/gui/image/qimage_conversions.cpp | 117 ++++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 7 deletions(-) (limited to 'src/gui/image/qimage_conversions.cpp') diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp index a4c02bbbbe..5103d820d6 100644 --- a/src/gui/image/qimage_conversions.cpp +++ b/src/gui/image/qimage_conversions.cpp @@ -35,8 +35,8 @@ #include #include #include - #include +#include QT_BEGIN_NAMESPACE @@ -290,6 +290,108 @@ static void convert_ARGB_to_ARGB_PM_sse4(QImageData *dest, const QImageData *src } #endif +Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgb32(quint32 *dest_data, const uchar *src_data, int len) +{ + int pixel = 0; + // prolog: align input to 32bit + while ((quintptr(src_data) & 0x3) && pixel < len) { + *dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]); + src_data += 3; + ++dest_data; + ++pixel; + } + + // Handle 4 pixels at a time 12 bytes input to 16 bytes output. + for (; pixel + 3 < len; pixel += 4) { + const quint32 *src_packed = (quint32 *) src_data; + const quint32 src1 = qFromBigEndian(src_packed[0]); + const quint32 src2 = qFromBigEndian(src_packed[1]); + const quint32 src3 = qFromBigEndian(src_packed[2]); + + dest_data[0] = 0xff000000 | (src1 >> 8); + dest_data[1] = 0xff000000 | (src1 << 16) | (src2 >> 16); + dest_data[2] = 0xff000000 | (src2 << 8) | (src3 >> 24); + dest_data[3] = 0xff000000 | src3; + + src_data += 12; + dest_data += 4; + } + + // epilog: handle left over pixels + for (; pixel < len; ++pixel) { + *dest_data = 0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2]); + src_data += 3; + ++dest_data; + } +} + +Q_GUI_EXPORT void QT_FASTCALL qt_convert_rgb888_to_rgbx8888(quint32 *dest_data, const uchar *src_data, int len) +{ + int pixel = 0; + // prolog: align input to 32bit + while ((quintptr(src_data) & 0x3) && pixel < len) { + *dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2])); + src_data += 3; + ++dest_data; + ++pixel; + } + + // Handle 4 pixels at a time 12 bytes input to 16 bytes output. + for (; pixel + 3 < len; pixel += 4) { + const quint32 *src_packed = (quint32 *) src_data; + const quint32 src1 = src_packed[0]; + const quint32 src2 = src_packed[1]; + const quint32 src3 = src_packed[2]; + +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN + dest_data[0] = 0xff000000 | src1; + dest_data[1] = 0xff000000 | (src1 >> 24) | (src2 << 8); + dest_data[2] = 0xff000000 | (src2 >> 16) | (src3 << 16); + dest_data[3] = 0xff000000 | (src3 >> 8); +#else + dest_data[0] = 0xff | src1; + dest_data[1] = 0xff | (src1 << 24) | (src2 >> 8); + dest_data[2] = 0xff | (src2 << 16) | (src3 >> 16); + dest_data[3] = 0xff | (src3 << 8); +#endif + + src_data += 12; + dest_data += 4; + } + + // epilog: handle left over pixels + for (; pixel < len; ++pixel) { + *dest_data = ARGB2RGBA(0xff000000 | (src_data[0] << 16) | (src_data[1] << 8) | (src_data[2])); + src_data += 3; + ++dest_data; + } +} + +typedef void (QT_FASTCALL *Rgb888ToRgbConverter)(quint32 *dst, const uchar *src, int len); + +template +static void convert_RGB888_to_RGB(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) +{ + Q_ASSERT(src->format == QImage::Format_RGB888); + if (rgbx) + Q_ASSERT(dest->format == QImage::Format_RGBX8888 || dest->format == QImage::Format_RGBA8888 || dest->format == QImage::Format_RGBA8888_Premultiplied); + else + Q_ASSERT(dest->format == QImage::Format_RGB32 || dest->format == QImage::Format_ARGB32 || dest->format == QImage::Format_ARGB32_Premultiplied); + Q_ASSERT(src->width == dest->width); + Q_ASSERT(src->height == dest->height); + + const uchar *src_data = (uchar *) src->data; + quint32 *dest_data = (quint32 *) dest->data; + + Rgb888ToRgbConverter line_converter= rgbx ? qt_convert_rgb888_to_rgbx8888 : qt_convert_rgb888_to_rgb32; + + for (int i = 0; i < src->height; ++i) { + line_converter(dest_data, src_data, src->width); + src_data += src->bytes_per_line; + dest_data = (quint32 *)((uchar*)dest_data + dest->bytes_per_line); + } +} + extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags); static void convert_ARGB_to_RGBx(QImageData *dest, const QImageData *src, Qt::ImageConversionFlags) @@ -2052,6 +2154,9 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat 0, 0, 0, + convert_RGB888_to_RGB, + convert_RGB888_to_RGB, + convert_RGB888_to_RGB, 0, 0, 0, @@ -2061,12 +2166,10 @@ Image_Converter qimage_converter_map[QImage::NImageFormats][QImage::NImageFormat 0, 0, 0, - 0, - 0, - 0, - 0, - 0, - 0, 0, 0, 0, 0, 0, 0 + convert_RGB888_to_RGB, + convert_RGB888_to_RGB, + convert_RGB888_to_RGB, + 0, 0, 0, 0, 0, 0 }, // Format_RGB888 { -- cgit v1.2.3