summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2019-11-22 13:02:31 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-03-09 23:22:43 +0100
commit332816779c42e8a3fed34e9295c09338bc9b4945 (patch)
treed6787b66139b1a48960b671b26ced14a08929781 /src/gui/painting
parentcc59f0de557e2d8fba274a86ba43afc4ffcd935b (diff)
Multithread some QImage routines
Use QThreadPool to process QImage smooth-scaling, format conversions, and colorspace transforms multithreaded. Change-Id: Ic142b1fa899f56e7e5099d36ca713701a47b681b Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/qimagescale.cpp794
-rw-r--r--src/gui/painting/qimagescale_neon.cpp218
-rw-r--r--src/gui/painting/qimagescale_p.h2
-rw-r--r--src/gui/painting/qimagescale_sse4.cpp246
4 files changed, 818 insertions, 442 deletions
diff --git a/src/gui/painting/qimagescale.cpp b/src/gui/painting/qimagescale.cpp
index 2e2f65b483..ecb0230e71 100644
--- a/src/gui/painting/qimagescale.cpp
+++ b/src/gui/painting/qimagescale.cpp
@@ -43,6 +43,11 @@
#include "qcolor.h"
#include "qrgba64_p.h"
+#if QT_CONFIG(thread)
+#include "qsemaphore.h"
+#include "qthreadpool.h"
+#endif
+
QT_BEGIN_NAMESPACE
/*
@@ -239,6 +244,8 @@ static QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const QImage &img,
isi = new QImageScaleInfo;
if (!isi)
return nullptr;
+ isi->sh = sh;
+ isi->sw = sw;
isi->xup_yup = (qAbs(dw) >= sw) + ((qAbs(dh) >= sh) << 1);
@@ -303,33 +310,54 @@ static void qt_qimageScaleAARGBA_up_xy(QImageScaleInfo *isi, unsigned int *dest,
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- /* calculate the source line we'll scan from */
- const unsigned int *sptr = ypoints[y];
- unsigned int *dptr = dest + (y * dow);
- const int yap = yapoints[y];
- if (yap > 0) {
- for (int x = 0; x < dw; x++) {
- const unsigned int *pix = sptr + xpoints[x];
- const int xap = xapoints[x];
- if (xap > 0)
- *dptr = interpolate_4_pixels(pix, pix + sow, xap, yap);
- else
- *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - yap, pix[sow], yap);
- dptr++;
- }
- } else {
- for (int x = 0; x < dw; x++) {
- const unsigned int *pix = sptr + xpoints[x];
- const int xap = xapoints[x];
- if (xap > 0)
- *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - xap, pix[1], xap);
- else
- *dptr = pix[0];
- dptr++;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ /* calculate the source line we'll scan from */
+ const unsigned int *sptr = ypoints[y];
+ unsigned int *dptr = dest + (y * dow);
+ const int yap = yapoints[y];
+ if (yap > 0) {
+ for (int x = 0; x < dw; x++) {
+ const unsigned int *pix = sptr + xpoints[x];
+ const int xap = xapoints[x];
+ if (xap > 0)
+ *dptr = interpolate_4_pixels(pix, pix + sow, xap, yap);
+ else
+ *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - yap, pix[sow], yap);
+ dptr++;
+ }
+ } else {
+ for (int x = 0; x < dw; x++) {
+ const unsigned int *pix = sptr + xpoints[x];
+ const int xap = xapoints[x];
+ if (xap > 0)
+ *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - xap, pix[1], xap);
+ else
+ *dptr = pix[0];
+ dptr++;
+ }
}
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
/* scale by area sampling - with alpha */
@@ -411,33 +439,54 @@ static void qt_qimageScaleAARGBA_up_x_down_y(QImageScaleInfo *isi, unsigned int
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- int Cy = yapoints[y] >> 16;
- int yap = yapoints[y] & 0xffff;
-
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- int r, g, b, a;
- qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, r, g, b, a);
-
- int xap = xapoints[x];
- if (xap > 0) {
- int rr, gg, bb, aa;
- qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, rr, gg, bb, aa);
-
- r = r * (256 - xap);
- g = g * (256 - xap);
- b = b * (256 - xap);
- a = a * (256 - xap);
- r = (r + (rr * xap)) >> 8;
- g = (g + (gg * xap)) >> 8;
- b = (b + (bb * xap)) >> 8;
- a = (a + (aa * xap)) >> 8;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = yapoints[y] >> 16;
+ int yap = yapoints[y] & 0xffff;
+
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ int r, g, b, a;
+ qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, r, g, b, a);
+
+ int xap = xapoints[x];
+ if (xap > 0) {
+ int rr, gg, bb, aa;
+ qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, rr, gg, bb, aa);
+
+ r = r * (256 - xap);
+ g = g * (256 - xap);
+ b = b * (256 - xap);
+ a = a * (256 - xap);
+ r = (r + (rr * xap)) >> 8;
+ g = (g + (gg * xap)) >> 8;
+ b = (b + (bb * xap)) >> 8;
+ a = (a + (aa * xap)) >> 8;
+ }
+ *dptr++ = qRgba(r >> 14, g >> 14, b >> 14, a >> 14);
}
- *dptr++ = qRgba(r >> 14, g >> 14, b >> 14, a >> 14);
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
@@ -449,34 +498,55 @@ static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
-
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- int r, g, b, a;
- qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, r, g, b, a);
-
- int yap = yapoints[y];
- if (yap > 0) {
- int rr, gg, bb, aa;
- qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, rr, gg, bb, aa);
-
- r = r * (256 - yap);
- g = g * (256 - yap);
- b = b * (256 - yap);
- a = a * (256 - yap);
- r = (r + (rr * yap)) >> 8;
- g = (g + (gg * yap)) >> 8;
- b = (b + (bb * yap)) >> 8;
- a = (a + (aa * yap)) >> 8;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ int r, g, b, a;
+ qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, r, g, b, a);
+
+ int yap = yapoints[y];
+ if (yap > 0) {
+ int rr, gg, bb, aa;
+ qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, rr, gg, bb, aa);
+
+ r = r * (256 - yap);
+ g = g * (256 - yap);
+ b = b * (256 - yap);
+ a = a * (256 - yap);
+ r = (r + (rr * yap)) >> 8;
+ g = (g + (gg * yap)) >> 8;
+ b = (b + (bb * yap)) >> 8;
+ a = (a + (aa * yap)) >> 8;
+ }
+ *dptr = qRgba(r >> 14, g >> 14, b >> 14, a >> 14);
+ dptr++;
}
- *dptr = qRgba(r >> 14, g >> 14, b >> 14, a >> 14);
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *dest,
@@ -487,45 +557,66 @@ static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *des
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- int Cy = (yapoints[y]) >> 16;
- int yap = (yapoints[y]) & 0xffff;
-
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = (yapoints[y]) >> 16;
+ int yap = (yapoints[y]) & 0xffff;
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- int rx, gx, bx, ax;
- qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
- int r = ((rx>>4) * yap);
- int g = ((gx>>4) * yap);
- int b = ((bx>>4) * yap);
- int a = ((ax>>4) * yap);
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ int rx, gx, bx, ax;
+ qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
- int j;
- for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ int r = ((rx>>4) * yap);
+ int g = ((gx>>4) * yap);
+ int b = ((bx>>4) * yap);
+ int a = ((ax>>4) * yap);
+
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
+ r += ((rx>>4) * Cy);
+ g += ((gx>>4) * Cy);
+ b += ((bx>>4) * Cy);
+ a += ((ax>>4) * Cy);
+ }
sptr += sow;
qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
- r += ((rx>>4) * Cy);
- g += ((gx>>4) * Cy);
- b += ((bx>>4) * Cy);
- a += ((ax>>4) * Cy);
- }
- sptr += sow;
- qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
- r += ((rx>>4) * j);
- g += ((gx>>4) * j);
- b += ((bx>>4) * j);
- a += ((ax>>4) * j);
+ r += ((rx>>4) * j);
+ g += ((gx>>4) * j);
+ b += ((bx>>4) * j);
+ a += ((ax>>4) * j);
- *dptr = qRgba(r >> 24, g >> 24, b >> 24, a >> 24);
- dptr++;
+ *dptr = qRgba(r >> 24, g >> 24, b >> 24, a >> 24);
+ dptr++;
+ }
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
#if QT_CONFIG(raster_64bit)
@@ -546,32 +637,53 @@ static void qt_qimageScaleRgba64_up_xy(QImageScaleInfo *isi, QRgba64 *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- const QRgba64 *sptr = ypoints[y];
- QRgba64 *dptr = dest + (y * dow);
- const int yap = yapoints[y];
- if (yap > 0) {
- for (int x = 0; x < dw; x++) {
- const QRgba64 *pix = sptr + xpoints[x];
- const int xap = xapoints[x];
- if (xap > 0)
- *dptr = interpolate_4_pixels_rgb64(pix, pix + sow, xap * 256, yap * 256);
- else
- *dptr = interpolate256(pix[0], 256 - yap, pix[sow], yap);
- dptr++;
- }
- } else {
- for (int x = 0; x < dw; x++) {
- const QRgba64 *pix = sptr + xpoints[x];
- const int xap = xapoints[x];
- if (xap > 0)
- *dptr = interpolate256(pix[0], 256 - xap, pix[1], xap);
- else
- *dptr = pix[0];
- dptr++;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ const QRgba64 *sptr = ypoints[y];
+ QRgba64 *dptr = dest + (y * dow);
+ const int yap = yapoints[y];
+ if (yap > 0) {
+ for (int x = 0; x < dw; x++) {
+ const QRgba64 *pix = sptr + xpoints[x];
+ const int xap = xapoints[x];
+ if (xap > 0)
+ *dptr = interpolate_4_pixels_rgb64(pix, pix + sow, xap * 256, yap * 256);
+ else
+ *dptr = interpolate256(pix[0], 256 - yap, pix[sow], yap);
+ dptr++;
+ }
+ } else {
+ for (int x = 0; x < dw; x++) {
+ const QRgba64 *pix = sptr + xpoints[x];
+ const int xap = xapoints[x];
+ if (xap > 0)
+ *dptr = interpolate256(pix[0], 256 - xap, pix[1], xap);
+ else
+ *dptr = pix[0];
+ dptr++;
+ }
}
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
void qt_qimageScaleRgba64(QImageScaleInfo *isi, QRgba64 *dest,
@@ -616,33 +728,54 @@ static void qt_qimageScaleRgba64_up_x_down_y(QImageScaleInfo *isi, QRgba64 *dest
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- int Cy = (yapoints[y]) >> 16;
- int yap = (yapoints[y]) & 0xffff;
-
- QRgba64 *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- const QRgba64 *sptr = ypoints[y] + xpoints[x];
- qint64 r, g, b, a;
- qt_qimageScaleRgba64_helper(sptr, yap, Cy, sow, r, g, b, a);
-
- int xap = xapoints[x];
- if (xap > 0) {
- qint64 rr, gg, bb, aa;
- qt_qimageScaleRgba64_helper(sptr + 1, yap, Cy, sow, rr, gg, bb, aa);
-
- r = r * (256 - xap);
- g = g * (256 - xap);
- b = b * (256 - xap);
- a = a * (256 - xap);
- r = (r + (rr * xap)) >> 8;
- g = (g + (gg * xap)) >> 8;
- b = (b + (bb * xap)) >> 8;
- a = (a + (aa * xap)) >> 8;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = (yapoints[y]) >> 16;
+ int yap = (yapoints[y]) & 0xffff;
+
+ QRgba64 *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ const QRgba64 *sptr = ypoints[y] + xpoints[x];
+ qint64 r, g, b, a;
+ qt_qimageScaleRgba64_helper(sptr, yap, Cy, sow, r, g, b, a);
+
+ int xap = xapoints[x];
+ if (xap > 0) {
+ qint64 rr, gg, bb, aa;
+ qt_qimageScaleRgba64_helper(sptr + 1, yap, Cy, sow, rr, gg, bb, aa);
+
+ r = r * (256 - xap);
+ g = g * (256 - xap);
+ b = b * (256 - xap);
+ a = a * (256 - xap);
+ r = (r + (rr * xap)) >> 8;
+ g = (g + (gg * xap)) >> 8;
+ b = (b + (bb * xap)) >> 8;
+ a = (a + (aa * xap)) >> 8;
+ }
+ *dptr++ = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14);
}
- *dptr++ = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14);
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest,
@@ -653,34 +786,55 @@ static void qt_qimageScaleRgba64_down_x_up_y(QImageScaleInfo *isi, QRgba64 *dest
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- QRgba64 *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
-
- const QRgba64 *sptr = ypoints[y] + xpoints[x];
- qint64 r, g, b, a;
- qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, r, g, b, a);
-
- int yap = yapoints[y];
- if (yap > 0) {
- qint64 rr, gg, bb, aa;
- qt_qimageScaleRgba64_helper(sptr + sow, xap, Cx, 1, rr, gg, bb, aa);
-
- r = r * (256 - yap);
- g = g * (256 - yap);
- b = b * (256 - yap);
- a = a * (256 - yap);
- r = (r + (rr * yap)) >> 8;
- g = (g + (gg * yap)) >> 8;
- b = (b + (bb * yap)) >> 8;
- a = (a + (aa * yap)) >> 8;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ QRgba64 *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const QRgba64 *sptr = ypoints[y] + xpoints[x];
+ qint64 r, g, b, a;
+ qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, r, g, b, a);
+
+ int yap = yapoints[y];
+ if (yap > 0) {
+ qint64 rr, gg, bb, aa;
+ qt_qimageScaleRgba64_helper(sptr + sow, xap, Cx, 1, rr, gg, bb, aa);
+
+ r = r * (256 - yap);
+ g = g * (256 - yap);
+ b = b * (256 - yap);
+ a = a * (256 - yap);
+ r = (r + (rr * yap)) >> 8;
+ g = (g + (gg * yap)) >> 8;
+ b = (b + (bb * yap)) >> 8;
+ a = (a + (aa * yap)) >> 8;
+ }
+ *dptr = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14);
+ dptr++;
}
- *dptr = qRgba64(r >> 14, g >> 14, b >> 14, a >> 14);
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest,
@@ -691,43 +845,64 @@ static void qt_qimageScaleRgba64_down_xy(QImageScaleInfo *isi, QRgba64 *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- int Cy = (yapoints[y]) >> 16;
- int yap = (yapoints[y]) & 0xffff;
-
- QRgba64 *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
-
- const QRgba64 *sptr = ypoints[y] + xpoints[x];
- qint64 rx, gx, bx, ax;
- qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
-
- qint64 r = rx * yap;
- qint64 g = gx * yap;
- qint64 b = bx * yap;
- qint64 a = ax * yap;
- int j;
- for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = (yapoints[y]) >> 16;
+ int yap = (yapoints[y]) & 0xffff;
+
+ QRgba64 *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const QRgba64 *sptr = ypoints[y] + xpoints[x];
+ qint64 rx, gx, bx, ax;
+ qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
+
+ qint64 r = rx * yap;
+ qint64 g = gx * yap;
+ qint64 b = bx * yap;
+ qint64 a = ax * yap;
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
+ r += rx * Cy;
+ g += gx * Cy;
+ b += bx * Cy;
+ a += ax * Cy;
+ }
sptr += sow;
qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
- r += rx * Cy;
- g += gx * Cy;
- b += bx * Cy;
- a += ax * Cy;
+ r += rx * j;
+ g += gx * j;
+ b += bx * j;
+ a += ax * j;
+
+ *dptr = qRgba64(r >> 28, g >> 28, b >> 28, a >> 28);
+ dptr++;
}
- sptr += sow;
- qt_qimageScaleRgba64_helper(sptr, xap, Cx, 1, rx, gx, bx, ax);
- r += rx * j;
- g += gx * j;
- b += bx * j;
- a += ax * j;
-
- *dptr = qRgba64(r >> 28, g >> 28, b >> 28, a >> 28);
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
#endif
@@ -817,31 +992,52 @@ static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- int Cy = yapoints[y] >> 16;
- int yap = yapoints[y] & 0xffff;
-
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- int r, g, b;
- qt_qimageScaleAARGB_helper(sptr, yap, Cy, sow, r, g, b);
-
- int xap = xapoints[x];
- if (xap > 0) {
- int rr, bb, gg;
- qt_qimageScaleAARGB_helper(sptr + 1, yap, Cy, sow, rr, gg, bb);
-
- r = r * (256 - xap);
- g = g * (256 - xap);
- b = b * (256 - xap);
- r = (r + (rr * xap)) >> 8;
- g = (g + (gg * xap)) >> 8;
- b = (b + (bb * xap)) >> 8;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = yapoints[y] >> 16;
+ int yap = yapoints[y] & 0xffff;
+
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ int r, g, b;
+ qt_qimageScaleAARGB_helper(sptr, yap, Cy, sow, r, g, b);
+
+ int xap = xapoints[x];
+ if (xap > 0) {
+ int rr, bb, gg;
+ qt_qimageScaleAARGB_helper(sptr + 1, yap, Cy, sow, rr, gg, bb);
+
+ r = r * (256 - xap);
+ g = g * (256 - xap);
+ b = b * (256 - xap);
+ r = (r + (rr * xap)) >> 8;
+ g = (g + (gg * xap)) >> 8;
+ b = (b + (bb * xap)) >> 8;
+ }
+ *dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
}
- *dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest,
@@ -853,31 +1049,52 @@ static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
-
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- int r, g, b;
- qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, r, g, b);
-
- int yap = yapoints[y];
- if (yap > 0) {
- int rr, bb, gg;
- qt_qimageScaleAARGB_helper(sptr + sow, xap, Cx, 1, rr, gg, bb);
-
- r = r * (256 - yap);
- g = g * (256 - yap);
- b = b * (256 - yap);
- r = (r + (rr * yap)) >> 8;
- g = (g + (gg * yap)) >> 8;
- b = (b + (bb * yap)) >> 8;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ int r, g, b;
+ qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, r, g, b);
+
+ int yap = yapoints[y];
+ if (yap > 0) {
+ int rr, bb, gg;
+ qt_qimageScaleAARGB_helper(sptr + sow, xap, Cx, 1, rr, gg, bb);
+
+ r = r * (256 - yap);
+ g = g * (256 - yap);
+ b = b * (256 - yap);
+ r = (r + (rr * yap)) >> 8;
+ g = (g + (gg * yap)) >> 8;
+ b = (b + (bb * yap)) >> 8;
+ }
+ *dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
}
- *dptr++ = qRgb(r >> 14, g >> 14, b >> 14);
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest,
@@ -888,43 +1105,64 @@ static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- int Cy = yapoints[y] >> 16;
- int yap = yapoints[y] & 0xffff;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = yapoints[y] >> 16;
+ int yap = yapoints[y] & 0xffff;
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ int rx, gx, bx;
+ qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx);
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- int rx, gx, bx;
- qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx);
+ int r = (rx >> 4) * yap;
+ int g = (gx >> 4) * yap;
+ int b = (bx >> 4) * yap;
- int r = (rx >> 4) * yap;
- int g = (gx >> 4) * yap;
- int b = (bx >> 4) * yap;
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx);
- int j;
- for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ r += (rx >> 4) * Cy;
+ g += (gx >> 4) * Cy;
+ b += (bx >> 4) * Cy;
+ }
sptr += sow;
qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx);
- r += (rx >> 4) * Cy;
- g += (gx >> 4) * Cy;
- b += (bx >> 4) * Cy;
- }
- sptr += sow;
- qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx);
-
- r += (rx >> 4) * j;
- g += (gx >> 4) * j;
- b += (bx >> 4) * j;
+ r += (rx >> 4) * j;
+ g += (gx >> 4) * j;
+ b += (bx >> 4) * j;
- *dptr = qRgb(r >> 24, g >> 24, b >> 24);
- dptr++;
+ *dptr = qRgb(r >> 24, g >> 24, b >> 24);
+ dptr++;
+ }
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
QImage qSmoothScaleImage(const QImage &src, int dw, int dh)
diff --git a/src/gui/painting/qimagescale_neon.cpp b/src/gui/painting/qimagescale_neon.cpp
index 4ae113b002..416155e139 100644
--- a/src/gui/painting/qimagescale_neon.cpp
+++ b/src/gui/painting/qimagescale_neon.cpp
@@ -41,6 +41,11 @@
#include "qimage.h"
#include <private/qsimd_p.h>
+#if QT_CONFIG(thread)
+#include "qsemaphore.h"
+#include "qthreadpool.h"
+#endif
+
#if defined(__ARM_NEON__)
QT_BEGIN_NAMESPACE
@@ -76,33 +81,54 @@ void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *d
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- int Cy = yapoints[y] >> 16;
- int yap = yapoints[y] & 0xffff;
-
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow);
-
- int xap = xapoints[x];
- if (xap > 0) {
- uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow);
-
- vx = vmulq_n_u32(vx, 256 - xap);
- vr = vmulq_n_u32(vr, xap);
- vx = vaddq_u32(vx, vr);
- vx = vshrq_n_u32(vx, 8);
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = yapoints[y] >> 16;
+ int yap = yapoints[y] & 0xffff;
+
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow);
+
+ int xap = xapoints[x];
+ if (xap > 0) {
+ uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow);
+
+ vx = vmulq_n_u32(vx, 256 - xap);
+ vr = vmulq_n_u32(vr, xap);
+ vx = vaddq_u32(vx, vr);
+ vx = vshrq_n_u32(vx, 8);
+ }
+ vx = vshrq_n_u32(vx, 14);
+ const uint16x4_t vx16 = vmovn_u32(vx);
+ const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
+ *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
}
- vx = vshrq_n_u32(vx, 14);
- const uint16x4_t vx16 = vmovn_u32(vx);
- const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
- *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
- if (RGB)
- *dptr |= 0xff000000;
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
template<bool RGB>
@@ -115,33 +141,54 @@ void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *d
int *yapoints = isi->yapoints;
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
-
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
-
- int yap = yapoints[y];
- if (yap > 0) {
- uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1);
-
- vx = vmulq_n_u32(vx, 256 - yap);
- vr = vmulq_n_u32(vr, yap);
- vx = vaddq_u32(vx, vr);
- vx = vshrq_n_u32(vx, 8);
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
+
+ int yap = yapoints[y];
+ if (yap > 0) {
+ uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1);
+
+ vx = vmulq_n_u32(vx, 256 - yap);
+ vr = vmulq_n_u32(vr, yap);
+ vx = vaddq_u32(vx, vr);
+ vx = vshrq_n_u32(vx, 8);
+ }
+ vx = vshrq_n_u32(vx, 14);
+ const uint16x4_t vx16 = vmovn_u32(vx);
+ const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
+ *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
}
- vx = vshrq_n_u32(vx, 14);
- const uint16x4_t vx16 = vmovn_u32(vx);
- const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
- *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
- if (RGB)
- *dptr |= 0xff000000;
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
template<bool RGB>
@@ -153,43 +200,64 @@ void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- int Cy = yapoints[y] >> 16;
- int yap = yapoints[y] & 0xffff;
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = yapoints[y] >> 16;
+ int yap = yapoints[y] & 0xffff;
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- const int Cx = xapoints[x] >> 16;
- const int xap = xapoints[x] & 0xffff;
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ const int Cx = xapoints[x] >> 16;
+ const int xap = xapoints[x] & 0xffff;
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
- vx = vshrq_n_u32(vx, 4);
- uint32x4_t vr = vmulq_n_u32(vx, yap);
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
+ vx = vshrq_n_u32(vx, 4);
+ uint32x4_t vr = vmulq_n_u32(vx, yap);
- int j;
- for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
+ vx = vshrq_n_u32(vx, 4);
+ vx = vmulq_n_u32(vx, Cy);
+ vr = vaddq_u32(vr, vx);
+ }
sptr += sow;
vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
vx = vshrq_n_u32(vx, 4);
- vx = vmulq_n_u32(vx, Cy);
+ vx = vmulq_n_u32(vx, j);
vr = vaddq_u32(vr, vx);
+
+ vx = vshrq_n_u32(vr, 24);
+ const uint16x4_t vx16 = vmovn_u32(vx);
+ const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
+ *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
}
- sptr += sow;
- vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
- vx = vshrq_n_u32(vx, 4);
- vx = vmulq_n_u32(vx, j);
- vr = vaddq_u32(vr, vx);
-
- vx = vshrq_n_u32(vr, 24);
- const uint16x4_t vx16 = vmovn_u32(vx);
- const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
- *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
- if (RGB)
- *dptr |= 0xff000000;
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
template void qt_qimageScaleAARGBA_up_x_down_y_neon<false>(QImageScaleInfo *isi, unsigned int *dest,
diff --git a/src/gui/painting/qimagescale_p.h b/src/gui/painting/qimagescale_p.h
index 244d681718..a9a4c0f858 100644
--- a/src/gui/painting/qimagescale_p.h
+++ b/src/gui/painting/qimagescale_p.h
@@ -66,6 +66,8 @@ namespace QImageScale {
int *xapoints{nullptr};
int *yapoints{nullptr};
int xup_yup{0};
+ int sh = 0;
+ int sw = 0;
};
}
diff --git a/src/gui/painting/qimagescale_sse4.cpp b/src/gui/painting/qimagescale_sse4.cpp
index 5861a2e2ff..902ae61ed2 100644
--- a/src/gui/painting/qimagescale_sse4.cpp
+++ b/src/gui/painting/qimagescale_sse4.cpp
@@ -42,6 +42,11 @@
#include <private/qdrawhelper_x86_p.h>
#include <private/qsimd_p.h>
+#if QT_CONFIG(thread)
+#include "qsemaphore.h"
+#include "qthreadpool.h"
+#endif
+
#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
QT_BEGIN_NAMESPACE
@@ -70,44 +75,65 @@ void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *d
int dw, int dh, int dow, int sow)
{
const unsigned int **ypoints = isi->ypoints;
- int *xpoints = isi->xpoints;
- int *xapoints = isi->xapoints;
- int *yapoints = isi->yapoints;
+ const int *xpoints = isi->xpoints;
+ const int *xapoints = isi->xapoints;
+ const int *yapoints = isi->yapoints;
const __m128i v256 = _mm_set1_epi32(256);
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- int Cy = yapoints[y] >> 16;
- int yap = yapoints[y] & 0xffff;
- const __m128i vCy = _mm_set1_epi32(Cy);
- const __m128i vyap = _mm_set1_epi32(yap);
-
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
-
- int xap = xapoints[x];
- if (xap > 0) {
- const __m128i vxap = _mm_set1_epi32(xap);
- const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
- __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ const int Cy = yapoints[y] >> 16;
+ const int yap = yapoints[y] & 0xffff;
+ const __m128i vCy = _mm_set1_epi32(Cy);
+ const __m128i vyap = _mm_set1_epi32(yap);
- vx = _mm_mullo_epi32(vx, vinvxap);
- vr = _mm_mullo_epi32(vr, vxap);
- vx = _mm_add_epi32(vx, vr);
- vx = _mm_srli_epi32(vx, 8);
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
+
+ const int xap = xapoints[x];
+ if (xap > 0) {
+ const __m128i vxap = _mm_set1_epi32(xap);
+ const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
+ __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
+
+ vx = _mm_mullo_epi32(vx, vinvxap);
+ vr = _mm_mullo_epi32(vr, vxap);
+ vx = _mm_add_epi32(vx, vr);
+ vx = _mm_srli_epi32(vx, 8);
+ }
+ vx = _mm_srli_epi32(vx, 14);
+ vx = _mm_packus_epi32(vx, vx);
+ vx = _mm_packus_epi16(vx, vx);
+ *dptr = _mm_cvtsi128_si32(vx);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
}
- vx = _mm_srli_epi32(vx, 14);
- vx = _mm_packus_epi32(vx, _mm_setzero_si128());
- vx = _mm_packus_epi16(vx, _mm_setzero_si128());
- *dptr = _mm_cvtsi128_si32(vx);
- if (RGB)
- *dptr |= 0xff000000;
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
template<bool RGB>
@@ -122,37 +148,58 @@ void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *d
const __m128i v256 = _mm_set1_epi32(256);
/* go through every scanline in the output buffer */
- for (int y = 0; y < dh; y++) {
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- int Cx = xapoints[x] >> 16;
- int xap = xapoints[x] & 0xffff;
- const __m128i vCx = _mm_set1_epi32(Cx);
- const __m128i vxap = _mm_set1_epi32(xap);
-
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
-
- int yap = yapoints[y];
- if (yap > 0) {
- const __m128i vyap = _mm_set1_epi32(yap);
- const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
- __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
-
- vx = _mm_mullo_epi32(vx, vinvyap);
- vr = _mm_mullo_epi32(vr, vyap);
- vx = _mm_add_epi32(vx, vr);
- vx = _mm_srli_epi32(vx, 8);
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ int Cx = xapoints[x] >> 16;
+ int xap = xapoints[x] & 0xffff;
+ const __m128i vCx = _mm_set1_epi32(Cx);
+ const __m128i vxap = _mm_set1_epi32(xap);
+
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
+
+ int yap = yapoints[y];
+ if (yap > 0) {
+ const __m128i vyap = _mm_set1_epi32(yap);
+ const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
+ __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
+
+ vx = _mm_mullo_epi32(vx, vinvyap);
+ vr = _mm_mullo_epi32(vr, vyap);
+ vx = _mm_add_epi32(vx, vr);
+ vx = _mm_srli_epi32(vx, 8);
+ }
+ vx = _mm_srli_epi32(vx, 14);
+ vx = _mm_packus_epi32(vx, vx);
+ vx = _mm_packus_epi16(vx, vx);
+ *dptr = _mm_cvtsi128_si32(vx);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
}
- vx = _mm_srli_epi32(vx, 14);
- vx = _mm_packus_epi32(vx, _mm_setzero_si128());
- vx = _mm_packus_epi16(vx, _mm_setzero_si128());
- *dptr = _mm_cvtsi128_si32(vx);
- if (RGB)
- *dptr |= 0xff000000;
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
template<bool RGB>
@@ -164,42 +211,63 @@ void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
int *xapoints = isi->xapoints;
int *yapoints = isi->yapoints;
- for (int y = 0; y < dh; y++) {
- int Cy = yapoints[y] >> 16;
- int yap = yapoints[y] & 0xffff;
- const __m128i vCy = _mm_set1_epi32(Cy);
- const __m128i vyap = _mm_set1_epi32(yap);
-
- unsigned int *dptr = dest + (y * dow);
- for (int x = 0; x < dw; x++) {
- const int Cx = xapoints[x] >> 16;
- const int xap = xapoints[x] & 0xffff;
- const __m128i vCx = _mm_set1_epi32(Cx);
- const __m128i vxap = _mm_set1_epi32(xap);
-
- const unsigned int *sptr = ypoints[y] + xpoints[x];
- __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
- __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
-
- int j;
- for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ auto scaleSection = [&] (int yStart, int yEnd) {
+ for (int y = yStart; y < yEnd; ++y) {
+ int Cy = yapoints[y] >> 16;
+ int yap = yapoints[y] & 0xffff;
+ const __m128i vCy = _mm_set1_epi32(Cy);
+ const __m128i vyap = _mm_set1_epi32(yap);
+
+ unsigned int *dptr = dest + (y * dow);
+ for (int x = 0; x < dw; x++) {
+ const int Cx = xapoints[x] >> 16;
+ const int xap = xapoints[x] & 0xffff;
+ const __m128i vCx = _mm_set1_epi32(Cx);
+ const __m128i vxap = _mm_set1_epi32(xap);
+
+ const unsigned int *sptr = ypoints[y] + xpoints[x];
+ __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
+ __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
+
+ int j;
+ for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
+ sptr += sow;
+ vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
+ vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
+ }
sptr += sow;
vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
- vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
+ vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
+
+ vr = _mm_srli_epi32(vr, 24);
+ vr = _mm_packus_epi32(vr, _mm_setzero_si128());
+ vr = _mm_packus_epi16(vr, _mm_setzero_si128());
+ *dptr = _mm_cvtsi128_si32(vr);
+ if (RGB)
+ *dptr |= 0xff000000;
+ dptr++;
}
- sptr += sow;
- vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
- vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
-
- vr = _mm_srli_epi32(vr, 24);
- vr = _mm_packus_epi32(vr, _mm_setzero_si128());
- vr = _mm_packus_epi16(vr, _mm_setzero_si128());
- *dptr = _mm_cvtsi128_si32(vr);
- if (RGB)
- *dptr |= 0xff000000;
- dptr++;
}
+ };
+#if QT_CONFIG(thread)
+ int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
+ segments = std::min(segments, dh);
+ if (segments > 1) {
+ QSemaphore semaphore;
+ int y = 0;
+ for (int i = 0; i < segments; ++i) {
+ int yn = (dh - y) / (segments - i);
+ QThreadPool::globalInstance()->start([&, y, yn]() {
+ scaleSection(y, y + yn);
+ semaphore.release(1);
+ });
+ y += yn;
+ }
+ semaphore.acquire(segments);
+ return;
}
+#endif
+ scaleSection(0, dh);
}
template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,