summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@digia.com>2015-04-07 11:20:08 +0200
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-04-15 11:25:41 +0000
commit7432c7c08a6709a12a143d48fbaa9927962edae8 (patch)
tree8e4bec4c675cef7429edca53ab53477d33209ae0 /src
parent884679a7cc74b85d6c565316520304a9c73b5f04 (diff)
Cleanup and optimization of qimage smoothscale
Cleaning up smoothscale code. Upscaling is improved using existing optimized interpolation methods, and downscale is given SSE4.1 optimized versions. Change-Id: I7cdc256c26850948aef7dae26fda1622be6b8179 Reviewed-by: Gunnar Sletta <gunnar@sletta.org>
Diffstat (limited to 'src')
-rw-r--r--src/gui/image/qimage.cpp10
-rw-r--r--src/gui/painting/painting.pri3
-rw-r--r--src/gui/painting/qdrawhelper.cpp38
-rw-r--r--src/gui/painting/qdrawhelper_p.h36
-rw-r--r--src/gui/painting/qimagescale.cpp1155
-rw-r--r--src/gui/painting/qimagescale_p.h9
-rw-r--r--src/gui/painting/qimagescale_sse4.cpp247
7 files changed, 769 insertions, 729 deletions
diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp
index 5cc1cf760f..f20800440d 100644
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@@ -4248,11 +4248,15 @@ int QImage::bitPlaneCount() const
static QImage smoothScaled(const QImage &source, int w, int h) {
QImage src = source;
- bool canSkipConversion = (src.format() == QImage::Format_RGB32 || src.format() == QImage::Format_ARGB32_Premultiplied);
+ switch (src.format()) {
+ case QImage::Format_RGB32:
+ case QImage::Format_ARGB32_Premultiplied:
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
- canSkipConversion = canSkipConversion || (src.format() == QImage::Format_RGBX8888 || src.format() == QImage::Format_RGBA8888_Premultiplied);
+ case QImage::Format_RGBX8888:
#endif
- if (!canSkipConversion) {
+ case QImage::Format_RGBA8888_Premultiplied:
+ break;
+ default:
if (src.hasAlphaChannel())
src = src.convertToFormat(QImage::Format_ARGB32_Premultiplied);
else
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri
index 2f2d3daaf8..a861516821 100644
--- a/src/gui/painting/painting.pri
+++ b/src/gui/painting/painting.pri
@@ -93,7 +93,8 @@ SOURCES += \
SSE2_SOURCES += painting/qdrawhelper_sse2.cpp
SSSE3_SOURCES += painting/qdrawhelper_ssse3.cpp
-SSE4_1_SOURCES += painting/qdrawhelper_sse4.cpp
+SSE4_1_SOURCES += painting/qdrawhelper_sse4.cpp \
+ painting/qimagescale_sse4.cpp
AVX2_SOURCES += painting/qdrawhelper_avx2.cpp
!ios {
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 57bb111538..b75018452a 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -1245,44 +1245,6 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, i
}
#endif
-#if defined(__SSE2__)
-static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
-{
- // First interpolate right and left pixels in parallel.
- __m128i vl = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tl), _mm_cvtsi32_si128(bl));
- __m128i vr = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tr), _mm_cvtsi32_si128(br));
- vl = _mm_unpacklo_epi8(vl, _mm_setzero_si128());
- vr = _mm_unpacklo_epi8(vr, _mm_setzero_si128());
- vl = _mm_mullo_epi16(vl, _mm_set1_epi16(256 - distx));
- vr = _mm_mullo_epi16(vr, _mm_set1_epi16(distx));
- __m128i vtb = _mm_add_epi16(vl, vr);
- vtb = _mm_srli_epi16(vtb, 8);
- // vtb now contains the result of the first two interpolate calls vtb = unpacked((xbot << 64) | xtop)
-
- // Now the last interpolate between top and bottom interpolations.
- const __m128i vidisty = _mm_shufflelo_epi16(_mm_cvtsi32_si128(256 - disty), _MM_SHUFFLE(0, 0, 0, 0));
- const __m128i vdisty = _mm_shufflelo_epi16(_mm_cvtsi32_si128(disty), _MM_SHUFFLE(0, 0, 0, 0));
- const __m128i vmuly = _mm_unpacklo_epi16(vidisty, vdisty);
- vtb = _mm_unpacklo_epi16(vtb, _mm_srli_si128(vtb, 8));
- // vtb now contains the colors of top and bottom interleaved { ta, ba, tr, br, tg, bg, tb, bb }
- vtb = _mm_madd_epi16(vtb, vmuly); // Multiply and horizontal add.
- vtb = _mm_srli_epi32(vtb, 8);
- vtb = _mm_packs_epi32(vtb, _mm_setzero_si128());
- vtb = _mm_packus_epi16(vtb, _mm_setzero_si128());
- return _mm_cvtsi128_si32(vtb);
-}
-#else
-static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
-{
- uint idistx = 256 - distx;
- uint idisty = 256 - disty;
- uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx);
- uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx);
- return INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty);
-}
-#endif
-
-
template<TextureBlendType blendType>
void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 480ba4c97b..0d391b2cec 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -605,6 +605,42 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
}
#endif
+#ifdef __SSE2__
+static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
+{
+ // First interpolate right and left pixels in parallel.
+ __m128i vl = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tl), _mm_cvtsi32_si128(bl));
+ __m128i vr = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tr), _mm_cvtsi32_si128(br));
+ vl = _mm_unpacklo_epi8(vl, _mm_setzero_si128());
+ vr = _mm_unpacklo_epi8(vr, _mm_setzero_si128());
+ vl = _mm_mullo_epi16(vl, _mm_set1_epi16(256 - distx));
+ vr = _mm_mullo_epi16(vr, _mm_set1_epi16(distx));
+ __m128i vtb = _mm_add_epi16(vl, vr);
+ vtb = _mm_srli_epi16(vtb, 8);
+ // vtb now contains the result of the first two interpolate calls vtb = unpacked((xbot << 64) | xtop)
+
+ // Now the last interpolate between top and bottom interpolations.
+ const __m128i vidisty = _mm_shufflelo_epi16(_mm_cvtsi32_si128(256 - disty), _MM_SHUFFLE(0, 0, 0, 0));
+ const __m128i vdisty = _mm_shufflelo_epi16(_mm_cvtsi32_si128(disty), _MM_SHUFFLE(0, 0, 0, 0));
+ const __m128i vmuly = _mm_unpacklo_epi16(vidisty, vdisty);
+ vtb = _mm_unpacklo_epi16(vtb, _mm_srli_si128(vtb, 8));
+ // vtb now contains the colors of top and bottom interleaved { ta, ba, tr, br, tg, bg, tb, bb }
+ vtb = _mm_madd_epi16(vtb, vmuly); // Multiply and horizontal add.
+ vtb = _mm_srli_epi32(vtb, 8);
+ vtb = _mm_packs_epi32(vtb, _mm_setzero_si128());
+ vtb = _mm_packus_epi16(vtb, _mm_setzero_si128());
+ return _mm_cvtsi128_si32(vtb);
+}
+#else
+static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
+{
+ uint idistx = 256 - distx;
+ uint idisty = 256 - disty;
+ uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx);
+ uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx);
+ return INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty);
+}
+#endif
#if Q_BYTE_ORDER == Q_BIG_ENDIAN
static Q_ALWAYS_INLINE quint32 RGBA2ARGB(quint32 x) {
diff --git a/src/gui/painting/qimagescale.cpp b/src/gui/painting/qimagescale.cpp
index 58e9112dd6..5f1b25e189 100644
--- a/src/gui/painting/qimagescale.cpp
+++ b/src/gui/painting/qimagescale.cpp
@@ -38,10 +38,6 @@
QT_BEGIN_NAMESPACE
-namespace QImageScale {
- struct QImageScaleInfo;
-}
-
typedef void (*qt_qimageScaleFunc)(QImageScale::QImageScaleInfo *isi, unsigned int *dest,
int dxx, int dyy, int dx, int dy, int dw,
int dh, int dow, int sow);
@@ -105,15 +101,7 @@ qt_qimageScaleFunc qt_qimageScaleRgb = qt_qimageScaleAARGB;
namespace QImageScale {
- struct QImageScaleInfo {
- int *xpoints;
- const unsigned int **ypoints;
- int *xapoints, *yapoints;
- int xup_yup;
- };
-
- const unsigned int** qimageCalcYPoints(const unsigned int *src, int sw, int sh,
- int dh);
+ const unsigned int** qimageCalcYPoints(const unsigned int *src, int sw, int sh, int dh);
int* qimageCalcXPoints(int sw, int dw);
int* qimageCalcApoints(int s, int d, int up);
QImageScaleInfo* qimageFreeScaleInfo(QImageScaleInfo *isi);
@@ -134,16 +122,11 @@ using namespace QImageScale;
#define G_VAL(p) (qGreen(*p))
#define B_VAL(p) (qBlue(*p))
-#define INV_XAP (256 - xapoints[x])
-#define XAP (xapoints[x])
-#define INV_YAP (256 - yapoints[dyy + y])
-#define YAP (yapoints[dyy + y])
-
const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src,
int sw, int sh, int dh)
{
const unsigned int **p;
- int i, j = 0, rv = 0;
+ int j = 0, rv = 0;
qint64 val, inc;
if(dh < 0){
@@ -155,12 +138,12 @@ const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src,
int up = qAbs(dh) >= sh;
val = up ? 0x8000 * sh / dh - 0x8000 : 0;
inc = (((qint64)sh) << 16) / dh;
- for(i = 0; i < dh; i++){
+ for (int i = 0; i < dh; i++) {
p[j++] = src + qMax(0LL, val >> 16) * sw;
val += inc;
}
- if(rv){
- for(i = dh / 2; --i >= 0; ){
+ if (rv) {
+ for (int i = dh / 2; --i >= 0; ) {
const unsigned int *tmp = p[i];
p[i] = p[dh - i - 1];
p[dh - i - 1] = tmp;
@@ -171,7 +154,7 @@ const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src,
int* QImageScale::qimageCalcXPoints(int sw, int dw)
{
- int *p, i, j = 0, rv = 0;
+ int *p, j = 0, rv = 0;
qint64 val, inc;
if(dw < 0){
@@ -183,13 +166,13 @@ int* QImageScale::qimageCalcXPoints(int sw, int dw)
int up = qAbs(dw) >= sw;
val = up ? 0x8000 * sw / dw - 0x8000 : 0;
inc = (((qint64)sw) << 16) / dw;
- for(i = 0; i < dw; i++){
+ for (int i = 0; i < dw; i++) {
p[j++] = qMax(0LL, val >> 16);
val += inc;
}
- if(rv){
- for(i = dw / 2; --i >= 0; ){
+ if (rv) {
+ for (int i = dw / 2; --i >= 0; ) {
int tmp = p[i];
p[i] = p[dw - i - 1];
p[dw - i - 1] = tmp;
@@ -200,7 +183,7 @@ int* QImageScale::qimageCalcXPoints(int sw, int dw)
int* QImageScale::qimageCalcApoints(int s, int d, int up)
{
- int *p, i, j = 0, rv = 0;
+ int *p, j = 0, rv = 0;
if(d < 0){
rv = 1;
@@ -214,7 +197,7 @@ int* QImageScale::qimageCalcApoints(int s, int d, int up)
val = 0x8000 * s / d - 0x8000;
inc = (((qint64)s) << 16) / d;
- for(i = 0; i < d; i++){
+ for (int i = 0; i < d; i++) {
int pos = val >> 16;
if (pos < 0)
p[j++] = 0;
@@ -226,14 +209,12 @@ int* QImageScale::qimageCalcApoints(int s, int d, int up)
}
}
/* scaling down */
- else{
- qint64 val, inc;
- int ap, Cp;
- val = 0;
- inc = (((qint64)s) << 16) / d;
- Cp = ((d << 14) / s) + 1;
- for(i = 0; i < d; i++){
- ap = ((0x100 - ((val >> 8) & 0xff)) * Cp) >> 8;
+ else {
+ qint64 val = 0;
+ qint64 inc = (((qint64)s) << 16) / d;
+ int Cp = (((d << 14) + s - 1) / s);
+ for (int i = 0; i < d; i++) {
+ int ap = ((0x10000 - (val & 0xffff)) * Cp) >> 16;
p[j] = ap | (Cp << 16);
j++;
val += inc;
@@ -241,13 +222,13 @@ int* QImageScale::qimageCalcApoints(int s, int d, int up)
}
if(rv){
int tmp;
- for(i = d / 2; --i >= 0; ){
+ for (int i = d / 2; --i >= 0; ) {
tmp = p[i];
p[i] = p[d - i - 1];
p[d - i - 1] = tmp;
}
}
- return(p);
+ return p;
}
QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi)
@@ -297,700 +278,500 @@ QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const QImage &img,
return(isi);
}
-/* FIXME: NEED to optimize ScaleAARGBA - currently its "ok" but needs work*/
-/* scale by area sampling */
-static void qt_qimageScaleAARGBA(QImageScaleInfo *isi, unsigned int *dest,
- int dxx, int dyy, int dx, int dy, int dw,
- int dh, int dow, int sow)
+static void qt_qimageScaleAARGBA_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow);
+
+#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
+template<bool RGB>
+void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+template<bool RGB>
+void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+template<bool RGB>
+void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+#endif
+
+static void qt_qimageScaleAARGBA_up_xy(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow)
{
- const unsigned int *sptr;
- unsigned int *dptr;
- int x, y, end;
const unsigned int **ypoints = isi->ypoints;
int *xpoints = isi->xpoints;
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- end = dxx + dw;
- /* scaling up both ways */
- if(isi->xup_yup == 3){
- /* go through every scanline in the output buffer */
- for(y = 0; y < dh; y++){
- /* calculate the source line we'll scan from */
- dptr = dest + dx + ((y + dy) * dow);
- sptr = ypoints[dyy + y];
- if(YAP > 0){
- for(x = dxx; x < end; x++){
- int r, g, b, a;
- int rr, gg, bb, aa;
- const unsigned int *pix;
-
- if(XAP > 0){
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * INV_XAP;
- g = G_VAL(pix) * INV_XAP;
- b = B_VAL(pix) * INV_XAP;
- a = A_VAL(pix) * INV_XAP;
- pix++;
- r += R_VAL(pix) * XAP;
- g += G_VAL(pix) * XAP;
- b += B_VAL(pix) * XAP;
- a += A_VAL(pix) * XAP;
- pix += sow;
- rr = R_VAL(pix) * XAP;
- gg = G_VAL(pix) * XAP;
- bb = B_VAL(pix) * XAP;
- aa = A_VAL(pix) * XAP;
- pix--;
- rr += R_VAL(pix) * INV_XAP;
- gg += G_VAL(pix) * INV_XAP;
- bb += B_VAL(pix) * INV_XAP;
- aa += A_VAL(pix) * INV_XAP;
- r = ((rr * YAP) + (r * INV_YAP)) >> 16;
- g = ((gg * YAP) + (g * INV_YAP)) >> 16;
- b = ((bb * YAP) + (b * INV_YAP)) >> 16;
- a = ((aa * YAP) + (a * INV_YAP)) >> 16;
- *dptr++ = qRgba(r, g, b, a);
- }
- else{
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * INV_YAP;
- g = G_VAL(pix) * INV_YAP;
- b = B_VAL(pix) * INV_YAP;
- a = A_VAL(pix) * INV_YAP;
- pix += sow;
- r += R_VAL(pix) * YAP;
- g += G_VAL(pix) * YAP;
- b += B_VAL(pix) * YAP;
- a += A_VAL(pix) * YAP;
- r >>= 8;
- g >>= 8;
- b >>= 8;
- a >>= 8;
- *dptr++ = qRgba(r, g, b, a);
- }
- }
+ int end = dxx + dw;
+ /* go through every scanline in the output buffer */
+ for (int y = 0; y < dh; y++) {
+ /* calculate the source line we'll scan from */
+ const unsigned int *sptr = ypoints[dyy + y];
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ const int yap = yapoints[dyy + y];
+ if (yap > 0) {
+ for (int x = dxx; x < end; x++) {
+ const unsigned int *pix = sptr + xpoints[x];
+ const int xap = xapoints[x];
+ if (xap > 0)
+ *dptr = interpolate_4_pixels(pix[0], pix[1], pix[sow], pix[sow + 1], xap, yap);
+ else
+ *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - yap, pix[sow], yap);
+ dptr++;
}
- else{
- for(x = dxx; x < end; x++){
- int r, g, b, a;
- const unsigned int *pix;
-
- if(XAP > 0){
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * INV_XAP;
- g = G_VAL(pix) * INV_XAP;
- b = B_VAL(pix) * INV_XAP;
- a = A_VAL(pix) * INV_XAP;
- pix++;
- r += R_VAL(pix) * XAP;
- g += G_VAL(pix) * XAP;
- b += B_VAL(pix) * XAP;
- a += A_VAL(pix) * XAP;
- r >>= 8;
- g >>= 8;
- b >>= 8;
- a >>= 8;
- *dptr++ = qRgba(r, g, b, a);
- }
- else
- *dptr++ = sptr[xpoints[x] ];
- }
+ } else {
+ for (int x = dxx; x < end; x++) {
+ const unsigned int *pix = sptr + xpoints[x];
+ const int xap = xapoints[x];
+ *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - xap, pix[1], xap);
+ dptr++;
}
}
}
+}
+
+/* scale by area sampling */
+static void qt_qimageScaleAARGBA(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow)
+{
+ /* scaling up both ways */
+ if (isi->xup_yup == 3){
+ qt_qimageScaleAARGBA_up_xy(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ }
/* if we're scaling down vertically */
- else if(isi->xup_yup == 1){
- /*\ 'Correct' version, with math units prepared for MMXification \*/
- int Cy, j;
- const unsigned int *pix;
- int r, g, b, a, rr, gg, bb, aa;
- int yap;
-
- /* go through every scanline in the output buffer */
- for(y = 0; y < dh; y++){
- Cy = YAP >> 16;
- yap = YAP & 0xffff;
-
- dptr = dest + dx + ((y + dy) * dow);
- for(x = dxx; x < end; x++){
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * yap;
- g = G_VAL(pix) * yap;
- b = B_VAL(pix) * yap;
- a = A_VAL(pix) * yap;
- for(j = (1 << 14) - yap; j > Cy; j -= Cy){
- pix += sow;
- r += R_VAL(pix) * Cy;
- g += G_VAL(pix) * Cy;
- b += B_VAL(pix) * Cy;
- a += A_VAL(pix) * Cy;
- }
- if(j > 0){
- pix += sow;
- r += R_VAL(pix) * j;
- g += G_VAL(pix) * j;
- b += B_VAL(pix) * j;
- a += A_VAL(pix) * j;
- }
- if(XAP > 0){
- pix = ypoints[dyy + y] + xpoints[x] + 1;
- rr = R_VAL(pix) * yap;
- gg = G_VAL(pix) * yap;
- bb = B_VAL(pix) * yap;
- aa = A_VAL(pix) * yap;
- for(j = (1 << 14) - yap; j > Cy; j -= Cy){
- pix += sow;
- rr += R_VAL(pix) * Cy;
- gg += G_VAL(pix) * Cy;
- bb += B_VAL(pix) * Cy;
- aa += A_VAL(pix) * Cy;
- }
- if(j > 0){
- pix += sow;
- rr += R_VAL(pix) * j;
- gg += G_VAL(pix) * j;
- bb += B_VAL(pix) * j;
- aa += A_VAL(pix) * j;
- }
- r = r * INV_XAP;
- g = g * INV_XAP;
- b = b * INV_XAP;
- a = a * INV_XAP;
- r = (r + ((rr * XAP))) >> 12;
- g = (g + ((gg * XAP))) >> 12;
- b = (b + ((bb * XAP))) >> 12;
- a = (a + ((aa * XAP))) >> 12;
- }
- else{
- r >>= 4;
- g >>= 4;
- b >>= 4;
- a >>= 4;
- }
- *dptr = qRgba(r >> 10, g >> 10, b >> 10, a >> 10);
- dptr++;
- }
- }
+ else if (isi->xup_yup == 1) {
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
+ if (qCpuHasFeature(SSE4_1))
+ qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ else
+#endif
+ qt_qimageScaleAARGBA_up_x_down_y(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
}
/* if we're scaling down horizontally */
- else if(isi->xup_yup == 2){
- /*\ 'Correct' version, with math units prepared for MMXification \*/
- int Cx, j;
- const unsigned int *pix;
- int r, g, b, a, rr, gg, bb, aa;
- int xap;
-
- /* go through every scanline in the output buffer */
- for(y = 0; y < dh; y++){
- dptr = dest + dx + ((y + dy) * dow);
- for(x = dxx; x < end; x++){
- Cx = XAP >> 16;
- xap = XAP & 0xffff;
-
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * xap;
- g = G_VAL(pix) * xap;
- b = B_VAL(pix) * xap;
- a = A_VAL(pix) * xap;
- for(j = (1 << 14) - xap; j > Cx; j -= Cx){
- pix++;
- r += R_VAL(pix) * Cx;
- g += G_VAL(pix) * Cx;
- b += B_VAL(pix) * Cx;
- a += A_VAL(pix) * Cx;
- }
- if(j > 0){
- pix++;
- r += R_VAL(pix) * j;
- g += G_VAL(pix) * j;
- b += B_VAL(pix) * j;
- a += A_VAL(pix) * j;
- }
- if(YAP > 0){
- pix = ypoints[dyy + y] + xpoints[x] + sow;
- rr = R_VAL(pix) * xap;
- gg = G_VAL(pix) * xap;
- bb = B_VAL(pix) * xap;
- aa = A_VAL(pix) * xap;
- for(j = (1 << 14) - xap; j > Cx; j -= Cx){
- pix++;
- rr += R_VAL(pix) * Cx;
- gg += G_VAL(pix) * Cx;
- bb += B_VAL(pix) * Cx;
- aa += A_VAL(pix) * Cx;
- }
- if(j > 0){
- pix++;
- rr += R_VAL(pix) * j;
- gg += G_VAL(pix) * j;
- bb += B_VAL(pix) * j;
- aa += A_VAL(pix) * j;
- }
- r = r * INV_YAP;
- g = g * INV_YAP;
- b = b * INV_YAP;
- a = a * INV_YAP;
- r = (r + ((rr * YAP))) >> 12;
- g = (g + ((gg * YAP))) >> 12;
- b = (b + ((bb * YAP))) >> 12;
- a = (a + ((aa * YAP))) >> 12;
- }
- else{
- r >>= 4;
- g >>= 4;
- b >>= 4;
- a >>= 4;
- }
- *dptr = qRgba(r >> 10, g >> 10, b >> 10, a >> 10);
- dptr++;
- }
- }
+ else if (isi->xup_yup == 2) {
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
+ if (qCpuHasFeature(SSE4_1))
+ qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ else
+#endif
+ qt_qimageScaleAARGBA_down_x_up_y(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
}
/* if we're scaling down horizontally & vertically */
- else{
- /*\ 'Correct' version, with math units prepared for MMXification:
- |*| The operation 'b = (b * c) >> 16' translates to pmulhw,
- |*| so the operation 'b = (b * c) >> d' would translate to
- |*| psllw (16 - d), %mmb; pmulh %mmc, %mmb
- \*/
- int Cx, Cy, i, j;
- const unsigned int *pix;
- int a, r, g, b, ax, rx, gx, bx;
- int xap, yap;
-
- for(y = 0; y < dh; y++){
- Cy = YAP >> 16;
- yap = YAP & 0xffff;
-
- dptr = dest + dx + ((y + dy) * dow);
- for(x = dxx; x < end; x++){
- Cx = XAP >> 16;
- xap = XAP & 0xffff;
-
- sptr = ypoints[dyy + y] + xpoints[x];
- pix = sptr;
- sptr += sow;
- rx = R_VAL(pix) * xap;
- gx = G_VAL(pix) * xap;
- bx = B_VAL(pix) * xap;
- ax = A_VAL(pix) * xap;
-
- pix++;
- for(i = (1 << 14) - xap; i > Cx; i -= Cx){
- rx += R_VAL(pix) * Cx;
- gx += G_VAL(pix) * Cx;
- bx += B_VAL(pix) * Cx;
- ax += A_VAL(pix) * Cx;
- pix++;
- }
- if(i > 0){
- rx += R_VAL(pix) * i;
- gx += G_VAL(pix) * i;
- bx += B_VAL(pix) * i;
- ax += A_VAL(pix) * i;
- }
-
- r = (rx >> 5) * yap;
- g = (gx >> 5) * yap;
- b = (bx >> 5) * yap;
- a = (ax >> 5) * yap;
-
- for(j = (1 << 14) - yap; j > Cy; j -= Cy){
- pix = sptr;
- sptr += sow;
- rx = R_VAL(pix) * xap;
- gx = G_VAL(pix) * xap;
- bx = B_VAL(pix) * xap;
- ax = A_VAL(pix) * xap;
- pix++;
- for(i = (1 << 14) - xap; i > Cx; i -= Cx){
- rx += R_VAL(pix) * Cx;
- gx += G_VAL(pix) * Cx;
- bx += B_VAL(pix) * Cx;
- ax += A_VAL(pix) * Cx;
- pix++;
- }
- if(i > 0){
- rx += R_VAL(pix) * i;
- gx += G_VAL(pix) * i;
- bx += B_VAL(pix) * i;
- ax += A_VAL(pix) * i;
- }
-
- r += (rx >> 5) * Cy;
- g += (gx >> 5) * Cy;
- b += (bx >> 5) * Cy;
- a += (ax >> 5) * Cy;
- }
- if(j > 0){
- pix = sptr;
- sptr += sow;
- rx = R_VAL(pix) * xap;
- gx = G_VAL(pix) * xap;
- bx = B_VAL(pix) * xap;
- ax = A_VAL(pix) * xap;
- pix++;
- for(i = (1 << 14) - xap; i > Cx; i -= Cx){
- rx += R_VAL(pix) * Cx;
- gx += G_VAL(pix) * Cx;
- bx += B_VAL(pix) * Cx;
- ax += A_VAL(pix) * Cx;
- pix++;
- }
- if(i > 0){
- rx += R_VAL(pix) * i;
- gx += G_VAL(pix) * i;
- bx += B_VAL(pix) * i;
- ax += A_VAL(pix) * i;
- }
-
- r += (rx >> 5) * j;
- g += (gx >> 5) * j;
- b += (bx >> 5) * j;
- a += (ax >> 5) * j;
- }
-
- *dptr = qRgba(r >> 23, g >> 23, b >> 23, a >> 23);
- dptr++;
+ else {
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
+ if (qCpuHasFeature(SSE4_1))
+ qt_qimageScaleAARGBA_down_xy_sse4<false>(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ else
+#endif
+ qt_qimageScaleAARGBA_down_xy(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ }
+}
+
+inline static void qt_qimageScaleAARGBA_helper_x(const unsigned int *pix, int xap, int Cx, int &r, int &g, int &b, int &a)
+{
+ r = R_VAL(pix) * xap;
+ g = G_VAL(pix) * xap;
+ b = B_VAL(pix) * xap;
+ a = A_VAL(pix) * xap;
+ int j;
+ for (j = (1 << 14) - xap; j > Cx; j -= Cx ){
+ pix++;
+ r += R_VAL(pix) * Cx;
+ g += G_VAL(pix) * Cx;
+ b += B_VAL(pix) * Cx;
+ a += A_VAL(pix) * Cx;
+ }
+ pix++;
+ r += R_VAL(pix) * j;
+ g += G_VAL(pix) * j;
+ b += B_VAL(pix) * j;
+ a += A_VAL(pix) * j;
+}
+
+inline static void qt_qimageScaleAARGBA_helper_y(const unsigned int *pix, int yap, int Cy, int sow, int &r, int &g, int &b, int &a)
+{
+ r = R_VAL(pix) * yap;
+ g = G_VAL(pix) * yap;
+ b = B_VAL(pix) * yap;
+ a = A_VAL(pix) * yap;
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy ){
+ pix += sow;
+ r += R_VAL(pix) * Cy;
+ g += G_VAL(pix) * Cy;
+ b += B_VAL(pix) * Cy;
+ a += A_VAL(pix) * Cy;
+ }
+ pix += sow;
+ r += R_VAL(pix) * j;
+ g += G_VAL(pix) * j;
+ b += B_VAL(pix) * j;
+ a += A_VAL(pix) * j;
+}
+
+static void qt_qimageScaleAARGBA_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow)
+{
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ int end = dxx + dw;
+
+ /* go through every scanline in the output buffer */
+ for (int y = 0; y < dh; y++) {
+ int Cy = (yapoints[dyy + y]) >> 16;
+ int yap = (yapoints[dyy + y]) & 0xffff;
+
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ int r, g, b, a;
+ qt_qimageScaleAARGBA_helper_y(sptr, yap, Cy, sow, r, g, b, a);
+
+ int xap = xapoints[x];
+ if (xap > 0) {
+ int rr, gg, bb, aa;
+ qt_qimageScaleAARGBA_helper_y(sptr + 1, yap, Cy, sow, rr, gg, bb, aa);
+
+ r = r * (256 - xap);
+ g = g * (256 - xap);
+ b = b * (256 - xap);
+ a = a * (256 - xap);
+ r = (r + (rr * xap)) >> 8;
+ g = (g + (gg * xap)) >> 8;
+ b = (b + (bb * xap)) >> 8;
+ a = (a + (aa * xap)) >> 8;
}
+ *dptr++ = qRgba(r >> 14, g >> 14, b >> 14, a >> 14);
}
}
}
-/* scale by area sampling - IGNORE the ALPHA byte*/
-static void qt_qimageScaleAARGB(QImageScaleInfo *isi, unsigned int *dest,
- int dxx, int dyy, int dx, int dy, int dw,
- int dh, int dow, int sow)
+static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow)
{
- const unsigned int *sptr;
- unsigned int *dptr;
- int x, y, end;
const unsigned int **ypoints = isi->ypoints;
int *xpoints = isi->xpoints;
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- end = dxx + dw;
- /* scaling up both ways */
- if(isi->xup_yup == 3){
- /* go through every scanline in the output buffer */
- for(y = 0; y < dh; y++){
- /* calculate the source line we'll scan from */
- dptr = dest + dx + ((y + dy) * dow);
- sptr = ypoints[dyy + y];
- if(YAP > 0){
- for(x = dxx; x < end; x++){
- int r = 0, g = 0, b = 0;
- int rr = 0, gg = 0, bb = 0;
- const unsigned int *pix;
-
- if(XAP > 0){
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * INV_XAP;
- g = G_VAL(pix) * INV_XAP;
- b = B_VAL(pix) * INV_XAP;
- pix++;
- r += R_VAL(pix) * XAP;
- g += G_VAL(pix) * XAP;
- b += B_VAL(pix) * XAP;
- pix += sow;
- rr = R_VAL(pix) * XAP;
- gg = G_VAL(pix) * XAP;
- bb = B_VAL(pix) * XAP;
- pix --;
- rr += R_VAL(pix) * INV_XAP;
- gg += G_VAL(pix) * INV_XAP;
- bb += B_VAL(pix) * INV_XAP;
- r = ((rr * YAP) + (r * INV_YAP)) >> 16;
- g = ((gg * YAP) + (g * INV_YAP)) >> 16;
- b = ((bb * YAP) + (b * INV_YAP)) >> 16;
- *dptr++ = qRgba(r, g, b, 0xff);
- }
- else{
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * INV_YAP;
- g = G_VAL(pix) * INV_YAP;
- b = B_VAL(pix) * INV_YAP;
- pix += sow;
- r += R_VAL(pix) * YAP;
- g += G_VAL(pix) * YAP;
- b += B_VAL(pix) * YAP;
- r >>= 8;
- g >>= 8;
- b >>= 8;
- *dptr++ = qRgba(r, g, b, 0xff);
- }
- }
- }
- else{
- for(x = dxx; x < end; x++){
- int r = 0, g = 0, b = 0;
- const unsigned int *pix;
-
- if(XAP > 0){
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * INV_XAP;
- g = G_VAL(pix) * INV_XAP;
- b = B_VAL(pix) * INV_XAP;
- pix++;
- r += R_VAL(pix) * XAP;
- g += G_VAL(pix) * XAP;
- b += B_VAL(pix) * XAP;
- r >>= 8;
- g >>= 8;
- b >>= 8;
- *dptr++ = qRgba(r, g, b, 0xff);
- }
- else
- *dptr++ = sptr[xpoints[x] ];
- }
+ int end = dxx + dw;
+
+ /* go through every scanline in the output buffer */
+ for (int y = 0; y < dh; y++) {
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ int r, g, b, a;
+ qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, r, g, b, a);
+
+ int yap = yapoints[dyy + y];
+ if (yap > 0) {
+ int rr, gg, bb, aa;
+ qt_qimageScaleAARGBA_helper_x(sptr + sow, xap, Cx, rr, gg, bb, aa);
+
+ r = r * (256 - yap);
+ g = g * (256 - yap);
+ b = b * (256 - yap);
+ a = a * (256 - yap);
+ r = (r + (rr * yap)) >> 8;
+ g = (g + (gg * yap)) >> 8;
+ b = (b + (bb * yap)) >> 8;
+ a = (a + (aa * yap)) >> 8;
}
+ *dptr = qRgba(r >> 14, g >> 14, b >> 14, a >> 14);
+ dptr++;
}
}
- /* if we're scaling down vertically */
- else if(isi->xup_yup == 1){
- /*\ 'Correct' version, with math units prepared for MMXification \*/
- int Cy, j;
- const unsigned int *pix;
- int r, g, b, rr, gg, bb;
- int yap;
-
- /* go through every scanline in the output buffer */
- for(y = 0; y < dh; y++){
- Cy = YAP >> 16;
- yap = YAP & 0xffff;
-
- dptr = dest + dx + ((y + dy) * dow);
- for(x = dxx; x < end; x++){
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * yap;
- g = G_VAL(pix) * yap;
- b = B_VAL(pix) * yap;
- pix += sow;
- for(j = (1 << 14) - yap; j > Cy; j -= Cy){
- r += R_VAL(pix) * Cy;
- g += G_VAL(pix) * Cy;
- b += B_VAL(pix) * Cy;
- pix += sow;
- }
- if(j > 0){
- r += R_VAL(pix) * j;
- g += G_VAL(pix) * j;
- b += B_VAL(pix) * j;
- }
- if(XAP > 0){
- pix = ypoints[dyy + y] + xpoints[x] + 1;
- rr = R_VAL(pix) * yap;
- gg = G_VAL(pix) * yap;
- bb = B_VAL(pix) * yap;
- pix += sow;
- for(j = (1 << 14) - yap; j > Cy; j -= Cy){
- rr += R_VAL(pix) * Cy;
- gg += G_VAL(pix) * Cy;
- bb += B_VAL(pix) * Cy;
- pix += sow;
- }
- if(j > 0){
- rr += R_VAL(pix) * j;
- gg += G_VAL(pix) * j;
- bb += B_VAL(pix) * j;
- }
- r = r * INV_XAP;
- g = g * INV_XAP;
- b = b * INV_XAP;
- r = (r + ((rr * XAP))) >> 12;
- g = (g + ((gg * XAP))) >> 12;
- b = (b + ((bb * XAP))) >> 12;
- }
- else{
- r >>= 4;
- g >>= 4;
- b >>= 4;
- }
- *dptr = qRgba(r >> 10, g >> 10, b >> 10, 0xff);
- dptr++;
+}
+
+static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow)
+{
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ int end = dxx + dw;
+
+ for (int y = 0; y < dh; y++) {
+ int Cy = (yapoints[dyy + y]) >> 16;
+ int yap = (yapoints[dyy + y]) & 0xffff;
+
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ int rx, gx, bx, ax;
+ qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, rx, gx, bx, ax);
+
+ int r = ((rx>>4) * yap);
+ int g = ((gx>>4) * yap);
+ int b = ((bx>>4) * yap);
+ int a = ((ax>>4) * yap);
+
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, rx, gx, bx, ax);
+ r += ((rx>>4) * Cy);
+ g += ((gx>>4) * Cy);
+ b += ((bx>>4) * Cy);
+ a += ((ax>>4) * Cy);
}
+ sptr += sow;
+ qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, rx, gx, bx, ax);
+
+ r += ((rx>>4) * j);
+ g += ((gx>>4) * j);
+ b += ((bx>>4) * j);
+ a += ((ax>>4) * j);
+
+ *dptr = qRgba(r >> 24, g >> 24, b >> 24, a >> 24);
+ dptr++;
}
}
+}
+
+static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow);
+
+static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow);
+
+static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow);
+
+/* scale by area sampling - IGNORE the ALPHA byte*/
+static void qt_qimageScaleAARGB(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow)
+{
+ /* scaling up both ways */
+ if (isi->xup_yup == 3) {
+ qt_qimageScaleAARGBA_up_xy(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ }
+ /* if we're scaling down vertically */
+ else if (isi->xup_yup == 1) {
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
+ if (qCpuHasFeature(SSE4_1))
+ qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ else
+#endif
+ qt_qimageScaleAARGB_up_x_down_y(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ }
/* if we're scaling down horizontally */
- else if(isi->xup_yup == 2){
- /*\ 'Correct' version, with math units prepared for MMXification \*/
- int Cx, j;
- const unsigned int *pix;
- int r, g, b, rr, gg, bb;
- int xap;
-
- /* go through every scanline in the output buffer */
- for(y = 0; y < dh; y++){
- dptr = dest + dx + ((y + dy) * dow);
- for(x = dxx; x < end; x++){
- Cx = XAP >> 16;
- xap = XAP & 0xffff;
-
- pix = ypoints[dyy + y] + xpoints[x];
- r = R_VAL(pix) * xap;
- g = G_VAL(pix) * xap;
- b = B_VAL(pix) * xap;
- pix++;
- for(j = (1 << 14) - xap; j > Cx; j -= Cx){
- r += R_VAL(pix) * Cx;
- g += G_VAL(pix) * Cx;
- b += B_VAL(pix) * Cx;
- pix++;
- }
- if(j > 0){
- r += R_VAL(pix) * j;
- g += G_VAL(pix) * j;
- b += B_VAL(pix) * j;
- }
- if(YAP > 0){
- pix = ypoints[dyy + y] + xpoints[x] + sow;
- rr = R_VAL(pix) * xap;
- gg = G_VAL(pix) * xap;
- bb = B_VAL(pix) * xap;
- pix++;
- for(j = (1 << 14) - xap; j > Cx; j -= Cx){
- rr += R_VAL(pix) * Cx;
- gg += G_VAL(pix) * Cx;
- bb += B_VAL(pix) * Cx;
- pix++;
- }
- if(j > 0){
- rr += R_VAL(pix) * j;
- gg += G_VAL(pix) * j;
- bb += B_VAL(pix) * j;
- }
- r = r * INV_YAP;
- g = g * INV_YAP;
- b = b * INV_YAP;
- r = (r + ((rr * YAP))) >> 12;
- g = (g + ((gg * YAP))) >> 12;
- b = (b + ((bb * YAP))) >> 12;
- }
- else{
- r >>= 4;
- g >>= 4;
- b >>= 4;
- }
- *dptr = qRgba(r >> 10, g >> 10, b >> 10, 0xff);
- dptr++;
- }
- }
+ else if (isi->xup_yup == 2) {
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
+ if (qCpuHasFeature(SSE4_1))
+ qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ else
+#endif
+ qt_qimageScaleAARGB_down_x_up_y(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
}
- /* fully optimized (i think) - onyl change of algorithm can help */
/* if we're scaling down horizontally & vertically */
- else{
- /*\ 'Correct' version, with math units prepared for MMXification \*/
- int Cx, Cy, i, j;
- const unsigned int *pix;
- int r, g, b, rx, gx, bx;
- int xap, yap;
-
- for(y = 0; y < dh; y++){
- Cy = YAP >> 16;
- yap = YAP & 0xffff;
-
- dptr = dest + dx + ((y + dy) * dow);
- for(x = dxx; x < end; x++){
- Cx = XAP >> 16;
- xap = XAP & 0xffff;
-
- sptr = ypoints[dyy + y] + xpoints[x];
- pix = sptr;
- sptr += sow;
- rx = R_VAL(pix) * xap;
- gx = G_VAL(pix) * xap;
- bx = B_VAL(pix) * xap;
- pix++;
- for(i = (1 << 14) - xap; i > Cx; i -= Cx){
- rx += R_VAL(pix) * Cx;
- gx += G_VAL(pix) * Cx;
- bx += B_VAL(pix) * Cx;
- pix++;
- }
- if(i > 0){
- rx += R_VAL(pix) * i;
- gx += G_VAL(pix) * i;
- bx += B_VAL(pix) * i;
- }
-
- r = (rx >> 5) * yap;
- g = (gx >> 5) * yap;
- b = (bx >> 5) * yap;
-
- for(j = (1 << 14) - yap; j > Cy; j -= Cy){
- pix = sptr;
- sptr += sow;
- rx = R_VAL(pix) * xap;
- gx = G_VAL(pix) * xap;
- bx = B_VAL(pix) * xap;
- pix++;
- for(i = (1 << 14) - xap; i > Cx; i -= Cx){
- rx += R_VAL(pix) * Cx;
- gx += G_VAL(pix) * Cx;
- bx += B_VAL(pix) * Cx;
- pix++;
- }
- if(i > 0){
- rx += R_VAL(pix) * i;
- gx += G_VAL(pix) * i;
- bx += B_VAL(pix) * i;
- }
-
- r += (rx >> 5) * Cy;
- g += (gx >> 5) * Cy;
- b += (bx >> 5) * Cy;
- }
- if(j > 0){
- pix = sptr;
- sptr += sow;
- rx = R_VAL(pix) * xap;
- gx = G_VAL(pix) * xap;
- bx = B_VAL(pix) * xap;
- pix++;
- for(i = (1 << 14) - xap; i > Cx; i -= Cx){
- rx += R_VAL(pix) * Cx;
- gx += G_VAL(pix) * Cx;
- bx += B_VAL(pix) * Cx;
- pix++;
- }
- if(i > 0){
- rx += R_VAL(pix) * i;
- gx += G_VAL(pix) * i;
- bx += B_VAL(pix) * i;
- }
-
- r += (rx >> 5) * j;
- g += (gx >> 5) * j;
- b += (bx >> 5) * j;
- }
-
- *dptr = qRgb(r >> 23, g >> 23, b >> 23);
- dptr++;
+ else {
+#ifdef QT_COMPILER_SUPPORTS_SSE4_1
+ if (qCpuHasFeature(SSE4_1))
+ qt_qimageScaleAARGBA_down_xy_sse4<true>(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ else
+#endif
+ qt_qimageScaleAARGB_down_xy(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ }
+}
+
+
+inline static void qt_qimageScaleAARGB_helper_x(const unsigned int *pix, int xap, int Cx, int &r, int &g, int &b)
+{
+ r = R_VAL(pix) * xap;
+ g = G_VAL(pix) * xap;
+ b = B_VAL(pix) * xap;
+ int j;
+ for (j = (1 << 14) - xap; j > Cx; j -= Cx ){
+ pix++;
+ r += R_VAL(pix) * Cx;
+ g += G_VAL(pix) * Cx;
+ b += B_VAL(pix) * Cx;
+ }
+ pix++;
+ r += R_VAL(pix) * j;
+ g += G_VAL(pix) * j;
+ b += B_VAL(pix) * j;
+}
+
+inline static void qt_qimageScaleAARGB_helper_y(const unsigned int *pix, int yap, int Cy, int sow, int &r, int &g, int &b)
+{
+ r = R_VAL(pix) * yap;
+ g = G_VAL(pix) * yap;
+ b = B_VAL(pix) * yap;
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy ){
+ pix += sow;
+ r += R_VAL(pix) * Cy;
+ g += G_VAL(pix) * Cy;
+ b += B_VAL(pix) * Cy;
+ }
+ pix += sow;
+ r += R_VAL(pix) * j;
+ g += G_VAL(pix) * j;
+ b += B_VAL(pix) * j;
+}
+
+static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow)
+{
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ int end = dxx + dw;
+
+ /* go through every scanline in the output buffer */
+ for (int y = 0; y < dh; y++) {
+ int Cy = (yapoints[dyy + y]) >> 16;
+ int yap = (yapoints[dyy + y]) & 0xffff;
+
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ int r, g, b;
+ qt_qimageScaleAARGB_helper_y(sptr, yap, Cy, sow, r, g, b);
+
+ int xap = xapoints[x];
+ if (xap > 0) {
+ int rr, bb, gg;
+ qt_qimageScaleAARGB_helper_y(sptr + 1, yap, Cy, sow, rr, gg, bb);
+
+ r = r * (256 - xap);
+ g = g * (256 - xap);
+ b = b * (256 - xap);
+ r = (r + (rr * xap)) >> 8;
+ g = (g + (gg * xap)) >> 8;
+ b = (b + (bb * xap)) >> 8;
}
+ *dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
}
}
}
-#if 0
-static void qt_qimageScaleAARGBASetup(QImageScaleInfo *isi, unsigned int *dest,
- int dxx, int dyy, int dx, int dy, int dw,
- int dh, int dow, int sow)
+static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow)
{
- qInitDrawhelperAsm();
- qt_qimageScaleAARGBA(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ int end = dxx + dw;
+
+ /* go through every scanline in the output buffer */
+ for (int y = 0; y < dh; y++) {
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ int r, g, b;
+ qt_qimageScaleAARGB_helper_x(sptr, xap, Cx, r, g, b);
+
+ int yap = yapoints[dyy + y];
+ if (yap > 0) {
+ int rr, bb, gg;
+ qt_qimageScaleAARGB_helper_x(sptr + sow, xap, Cx, rr, gg, bb);
+
+ r = r * (256 - yap);
+ g = g * (256 - yap);
+ b = b * (256 - yap);
+ r = (r + (rr * yap)) >> 8;
+ g = (g + (gg * yap)) >> 8;
+ b = (b + (bb * yap)) >> 8;
+ }
+ *dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
+ }
+ }
}
-static void qt_qimageScaleAARGBSetup(QImageScaleInfo *isi, unsigned int *dest,
- int dxx, int dyy, int dx, int dy, int dw,
- int dh, int dow, int sow)
+static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy, int dw,
+ int dh, int dow, int sow)
{
- qInitDrawhelperAsm();
- qt_qimageScaleAARGB(isi, dest, dxx, dyy, dx, dy, dw, dh, dow, sow);
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ int end = dxx + dw;
+
+ for (int y = 0; y < dh; y++) {
+ int Cy = (yapoints[dyy + y]) >> 16;
+ int yap = (yapoints[dyy + y]) & 0xffff;
+
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ int rx, gx, bx;
+ qt_qimageScaleAARGB_helper_x(sptr, xap, Cx, rx, gx, bx);
+
+ int r = (rx >> 4) * yap;
+ int g = (gx >> 4) * yap;
+ int b = (bx >> 4) * yap;
+
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ qt_qimageScaleAARGB_helper_x(sptr, xap, Cx, rx, gx, bx);
+
+ r += (rx >> 4) * Cy;
+ g += (gx >> 4) * Cy;
+ b += (bx >> 4) * Cy;
+ }
+ sptr += sow;
+ qt_qimageScaleAARGB_helper_x(sptr, xap, Cx, rx, gx, bx);
+
+ r += (rx >> 4) * j;
+ g += (gx >> 4) * j;
+ b += (bx >> 4) * j;
+
+ *dptr = qRgb(r >> 24, g >> 24, b >> 24);
+ dptr++;
+ }
+ }
}
-#endif
QImage qSmoothScaleImage(const QImage &src, int dw, int dh)
{
@@ -1012,7 +793,7 @@ QImage qSmoothScaleImage(const QImage &src, int dw, int dh)
return QImage();
}
- if (src.format() == QImage::Format_ARGB32_Premultiplied || src.format() == QImage::Format_RGBA8888_Premultiplied)
+ if (src.hasAlphaChannel())
qt_qimageScaleArgb(scaleinfo, (unsigned int *)buffer.scanLine(0),
0, 0, 0, 0, dw, dh, dw, src.bytesPerLine() / 4);
else
diff --git a/src/gui/painting/qimagescale_p.h b/src/gui/painting/qimagescale_p.h
index 512ec6488e..c35aea451a 100644
--- a/src/gui/painting/qimagescale_p.h
+++ b/src/gui/painting/qimagescale_p.h
@@ -53,6 +53,15 @@ QT_BEGIN_NAMESPACE
*/
QImage qSmoothScaleImage(const QImage &img, int w, int h);
+namespace QImageScale {
+ struct QImageScaleInfo {
+ int *xpoints;
+ const unsigned int **ypoints;
+ int *xapoints, *yapoints;
+ int xup_yup;
+ };
+}
+
QT_END_NAMESPACE
#endif
diff --git a/src/gui/painting/qimagescale_sse4.cpp b/src/gui/painting/qimagescale_sse4.cpp
new file mode 100644
index 0000000000..565ea4daa1
--- /dev/null
+++ b/src/gui/painting/qimagescale_sse4.cpp
@@ -0,0 +1,247 @@
+/****************************************************************************
+**
+** Copyright (C) 2015 The Qt Company Ltd.
+** Contact: http://www.qt.io/licensing/
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL21$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see http://www.qt.io/terms-conditions. For further
+** information use the contact form at http://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 or version 3 as published by the Free
+** Software Foundation and appearing in the file LICENSE.LGPLv21 and
+** LICENSE.LGPLv3 included in the packaging of this file. Please review the
+** following information to ensure the GNU Lesser General Public License
+** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** As a special exception, The Qt Company gives you certain additional
+** rights. These rights are described in The Qt Company LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qimagescale_p.h"
+#include "qimage.h"
+#include <private/qsimd_p.h>
+
+#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
+
+QT_BEGIN_NAMESPACE
+
+using namespace QImageScale;
+
+inline static __m128i qt_qimageScaleAARGBA_helper_x(const unsigned int *pix, int xap, int Cx, const __m128i vxap, const __m128i vCx)
+{
+ __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
+ __m128i vx = _mm_mullo_epi32(vpix, vxap);
+ int i;
+ for (i = (1 << 14) - xap; i > Cx; i -= Cx) {
+ pix++;
+ vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
+ vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCx));
+ }
+ pix++;
+ vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
+ vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
+ return vx;
+}
+
+inline static __m128i qt_qimageScaleAARGBA_helper_y(const unsigned int *pix, int yap, int Cy, int sow, const __m128i vyap, const __m128i vCy)
+{
+ __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
+ __m128i vx = _mm_mullo_epi32(vpix, vyap);
+ int i;
+ for (i = (1 << 14) - yap; i > Cy; i -= Cy) {
+ pix += sow;
+ vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
+ vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCy));
+ }
+ pix += sow;
+ vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
+ vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
+ return vx;
+}
+
+template<bool RGB>
+void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow)
+{
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ int end = dxx + dw;
+
+ const __m128i v256 = _mm_set1_epi32(256);
+
+ /* go through every scanline in the output buffer */
+ for (int y = 0; y < dh; y++) {
+ int Cy = (yapoints[dyy + y]) >> 16;
+ int yap = (yapoints[dyy + y]) & 0xffff;
+ const __m128i vCy = _mm_set1_epi32(Cy);
+ const __m128i vyap = _mm_set1_epi32(yap);
+
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ __m128i vx = qt_qimageScaleAARGBA_helper_y(sptr, yap, Cy, sow, vyap, vCy);
+
+ int xap = xapoints[x];
+ if (xap > 0) {
+ const __m128i vxap = _mm_set1_epi32(xap);
+ const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
+ __m128i vr = qt_qimageScaleAARGBA_helper_y(sptr + 1, yap, Cy, sow, vyap, vCy);
+
+ vx = _mm_mullo_epi32(vx, vinvxap);
+ vr = _mm_mullo_epi32(vr, vxap);
+ vx = _mm_add_epi32(vx, vr);
+ vx = _mm_srli_epi32(vx, 8);
+ }
+ vx = _mm_srli_epi32(vx, 14);
+ vx = _mm_packus_epi32(vx, _mm_setzero_si128());
+ vx = _mm_packus_epi16(vx, _mm_setzero_si128());
+ *dptr = _mm_cvtsi128_si32(vx);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
+ }
+ }
+}
+
+template<bool RGB>
+void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow)
+{
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ int end = dxx + dw;
+
+ const __m128i v256 = _mm_set1_epi32(256);
+
+ /* go through every scanline in the output buffer */
+ for (int y = 0; y < dh; y++) {
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ for (int x = dxx; x < end; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+ const __m128i vCx = _mm_set1_epi32(Cx);
+ const __m128i vxap = _mm_set1_epi32(xap);
+
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ __m128i vx = qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, vxap, vCx);
+
+ int yap = yapoints[dyy + y];
+ if (yap > 0) {
+ const __m128i vyap = _mm_set1_epi32(yap);
+ const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
+ __m128i vr = qt_qimageScaleAARGBA_helper_x(sptr + sow, xap, Cx, vxap, vCx);
+
+ vx = _mm_mullo_epi32(vx, vinvyap);
+ vr = _mm_mullo_epi32(vr, vyap);
+ vx = _mm_add_epi32(vx, vr);
+ vx = _mm_srli_epi32(vx, 8);
+ }
+ vx = _mm_srli_epi32(vx, 14);
+ vx = _mm_packus_epi32(vx, _mm_setzero_si128());
+ vx = _mm_packus_epi16(vx, _mm_setzero_si128());
+ *dptr = _mm_cvtsi128_si32(vx);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
+ }
+ }
+}
+
+template<bool RGB>
+void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow)
+{
+ const unsigned int **ypoints = isi->ypoints;
+ int *xpoints = isi->xpoints;
+ int *xapoints = isi->xapoints;
+ int *yapoints = isi->yapoints;
+
+ for (int y = 0; y < dh; y++) {
+ int Cy = (yapoints[dyy + y]) >> 16;
+ int yap = (yapoints[dyy + y]) & 0xffff;
+ const __m128i vCy = _mm_set1_epi32(Cy);
+ const __m128i vyap = _mm_set1_epi32(yap);
+
+ unsigned int *dptr = dest + dx + ((y + dy) * dow);
+ int end = dxx + dw;
+ for (int x = dxx; x < end; x++) {
+ const int Cx = xapoints[x] >> 16;
+ const int xap = xapoints[x] & 0xffff;
+ const __m128i vCx = _mm_set1_epi32(Cx);
+ const __m128i vxap = _mm_set1_epi32(xap);
+
+ const unsigned int *sptr = ypoints[dyy + y] + xpoints[x];
+ __m128i vx = qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, vxap, vCx);
+ __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
+
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ vx = qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, vxap, vCx);
+ vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
+ }
+ sptr += sow;
+ vx = qt_qimageScaleAARGBA_helper_x(sptr, xap, Cx, vxap, vCx);
+ vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
+
+ vr = _mm_srli_epi32(vr, 24);
+ vr = _mm_packus_epi32(vr, _mm_setzero_si128());
+ vr = _mm_packus_epi16(vr, _mm_setzero_si128());
+ *dptr = _mm_cvtsi128_si32(vr);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
+ }
+ }
+}
+
+template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
+ int dxx, int dyy, int dx, int dy,
+ int dw, int dh, int dow, int sow);
+
+QT_END_NAMESPACE
+
+#endif