diff options
Diffstat (limited to 'src/gui/painting/qimagescale_neon.cpp')
-rw-r--r-- | src/gui/painting/qimagescale_neon.cpp | 218 |
1 files changed, 143 insertions, 75 deletions
diff --git a/src/gui/painting/qimagescale_neon.cpp b/src/gui/painting/qimagescale_neon.cpp index 4ae113b002..416155e139 100644 --- a/src/gui/painting/qimagescale_neon.cpp +++ b/src/gui/painting/qimagescale_neon.cpp @@ -41,6 +41,11 @@ #include "qimage.h" #include <private/qsimd_p.h> +#if QT_CONFIG(thread) +#include "qsemaphore.h" +#include "qthreadpool.h" +#endif + #if defined(__ARM_NEON__) QT_BEGIN_NAMESPACE @@ -76,33 +81,54 @@ void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *d int *yapoints = isi->yapoints; /* go through every scanline in the output buffer */ - for (int y = 0; y < dh; y++) { - int Cy = yapoints[y] >> 16; - int yap = yapoints[y] & 0xffff; - - unsigned int *dptr = dest + (y * dow); - for (int x = 0; x < dw; x++) { - const unsigned int *sptr = ypoints[y] + xpoints[x]; - uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow); - - int xap = xapoints[x]; - if (xap > 0) { - uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow); - - vx = vmulq_n_u32(vx, 256 - xap); - vr = vmulq_n_u32(vr, xap); - vx = vaddq_u32(vx, vr); - vx = vshrq_n_u32(vx, 8); + auto scaleSection = [&] (int yStart, int yEnd) { + for (int y = yStart; y < yEnd; ++y) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const unsigned int *sptr = ypoints[y] + xpoints[x]; + uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow); + + int xap = xapoints[x]; + if (xap > 0) { + uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow); + + vx = vmulq_n_u32(vx, 256 - xap); + vr = vmulq_n_u32(vr, xap); + vx = vaddq_u32(vx, vr); + vx = vshrq_n_u32(vx, 8); + } + vx = vshrq_n_u32(vx, 14); + const uint16x4_t vx16 = vmovn_u32(vx); + const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); + *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); + if (RGB) + *dptr |= 0xff000000; + dptr++; } - vx = vshrq_n_u32(vx, 14); - const uint16x4_t vx16 = vmovn_u32(vx); - const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); - *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); - if (RGB) - *dptr |= 0xff000000; - dptr++; } + }; +#if QT_CONFIG(thread) + int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16); + segments = std::min(segments, dh); + if (segments > 1) { + QSemaphore semaphore; + int y = 0; + for (int i = 0; i < segments; ++i) { + int yn = (dh - y) / (segments - i); + QThreadPool::globalInstance()->start([&, y, yn]() { + scaleSection(y, y + yn); + semaphore.release(1); + }); + y += yn; + } + semaphore.acquire(segments); + return; } +#endif + scaleSection(0, dh); } template<bool RGB> @@ -115,33 +141,54 @@ void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *d int *yapoints = isi->yapoints; /* go through every scanline in the output buffer */ - for (int y = 0; y < dh; y++) { - unsigned int *dptr = dest + (y * dow); - for (int x = 0; x < dw; x++) { - int Cx = xapoints[x] >> 16; - int xap = xapoints[x] & 0xffff; - - const unsigned int *sptr = ypoints[y] + xpoints[x]; - uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); - - int yap = yapoints[y]; - if (yap > 0) { - uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1); - - vx = vmulq_n_u32(vx, 256 - yap); - vr = vmulq_n_u32(vr, yap); - vx = vaddq_u32(vx, vr); - vx = vshrq_n_u32(vx, 8); + auto scaleSection = [&] (int yStart, int yEnd) { + for (int y = yStart; y < yEnd; ++y) { + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); + + int yap = yapoints[y]; + if (yap > 0) { + uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1); + + vx = vmulq_n_u32(vx, 256 - yap); + vr = vmulq_n_u32(vr, yap); + vx = vaddq_u32(vx, vr); + vx = vshrq_n_u32(vx, 8); + } + vx = vshrq_n_u32(vx, 14); + const uint16x4_t vx16 = vmovn_u32(vx); + const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); + *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); + if (RGB) + *dptr |= 0xff000000; + dptr++; } - vx = vshrq_n_u32(vx, 14); - const uint16x4_t vx16 = vmovn_u32(vx); - const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); - *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); - if (RGB) - *dptr |= 0xff000000; - dptr++; } + }; +#if QT_CONFIG(thread) + int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16); + segments = std::min(segments, dh); + if (segments > 1) { + QSemaphore semaphore; + int y = 0; + for (int i = 0; i < segments; ++i) { + int yn = (dh - y) / (segments - i); + QThreadPool::globalInstance()->start([&, y, yn]() { + scaleSection(y, y + yn); + semaphore.release(1); + }); + y += yn; + } + semaphore.acquire(segments); + return; } +#endif + scaleSection(0, dh); } template<bool RGB> @@ -153,43 +200,64 @@ void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest, int *xapoints = isi->xapoints; int *yapoints = isi->yapoints; - for (int y = 0; y < dh; y++) { - int Cy = yapoints[y] >> 16; - int yap = yapoints[y] & 0xffff; + auto scaleSection = [&] (int yStart, int yEnd) { + for (int y = yStart; y < yEnd; ++y) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; - unsigned int *dptr = dest + (y * dow); - for (int x = 0; x < dw; x++) { - const int Cx = xapoints[x] >> 16; - const int xap = xapoints[x] & 0xffff; + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const int Cx = xapoints[x] >> 16; + const int xap = xapoints[x] & 0xffff; - const unsigned int *sptr = ypoints[y] + xpoints[x]; - uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); - vx = vshrq_n_u32(vx, 4); - uint32x4_t vr = vmulq_n_u32(vx, yap); + const unsigned int *sptr = ypoints[y] + xpoints[x]; + uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); + vx = vshrq_n_u32(vx, 4); + uint32x4_t vr = vmulq_n_u32(vx, yap); - int j; - for (j = (1 << 14) - yap; j > Cy; j -= Cy) { + int j; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) { + sptr += sow; + vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); + vx = vshrq_n_u32(vx, 4); + vx = vmulq_n_u32(vx, Cy); + vr = vaddq_u32(vr, vx); + } sptr += sow; vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); vx = vshrq_n_u32(vx, 4); - vx = vmulq_n_u32(vx, Cy); + vx = vmulq_n_u32(vx, j); vr = vaddq_u32(vr, vx); + + vx = vshrq_n_u32(vr, 24); + const uint16x4_t vx16 = vmovn_u32(vx); + const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); + *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); + if (RGB) + *dptr |= 0xff000000; + dptr++; } - sptr += sow; - vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); - vx = vshrq_n_u32(vx, 4); - vx = vmulq_n_u32(vx, j); - vr = vaddq_u32(vr, vx); - - vx = vshrq_n_u32(vr, 24); - const uint16x4_t vx16 = vmovn_u32(vx); - const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); - *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); - if (RGB) - *dptr |= 0xff000000; - dptr++; } + }; +#if QT_CONFIG(thread) + int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16); + segments = std::min(segments, dh); + if (segments > 1) { + QSemaphore semaphore; + int y = 0; + for (int i = 0; i < segments; ++i) { + int yn = (dh - y) / (segments - i); + QThreadPool::globalInstance()->start([&, y, yn]() { + scaleSection(y, y + yn); + semaphore.release(1); + }); + y += yn; + } + semaphore.acquire(segments); + return; } +#endif + scaleSection(0, dh); } template void qt_qimageScaleAARGBA_up_x_down_y_neon<false>(QImageScaleInfo *isi, unsigned int *dest, |