diff options
Diffstat (limited to 'src/gui')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 7a5c5dc660..b5ccafdf9a 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1709,6 +1709,163 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c } } else { //we are zooming less than 8x, use 4bit precision + + if (blendType != BlendTransformedBilinearTiled) { +#define BILINEAR_ROTATE_BOUNDS_PROLOG \ + while (b < end) { \ + int x1 = (fx >> 16); \ + int x2; \ + int y1 = (fy >> 16); \ + int y2; \ + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \ + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); \ + if (x1 != x2 && y1 != y2) \ + break; \ + const uint *s1 = (const uint *)data->texture.scanLine(y1); \ + const uint *s2 = (const uint *)data->texture.scanLine(y2); \ + uint tl = s1[x1]; \ + uint tr = s1[x2]; \ + uint bl = s2[x1]; \ + uint br = s2[x2]; \ + int distx = (fx & 0x0000ffff) >> 12; \ + int disty = (fy & 0x0000ffff) >> 12; \ + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \ + fx += fdx; \ + fy += fdy; \ + ++b; \ + } \ + uint *boundedEnd = end - 3; \ + boundedEnd -= 3; + +#if defined(__SSE2__) + BILINEAR_ROTATE_BOUNDS_PROLOG + + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i v_256 = _mm_set1_epi16(256); + __m128i v_fdx = _mm_set1_epi32(fdx*4); + __m128i v_fdy = _mm_set1_epi32(fdy*4); + + const uchar *textureData = data->texture.imageData; + const int bytesPerLine = data->texture.bytesPerLine; + + union Vect_buffer { __m128i vect; qint32 i[4]; }; + Vect_buffer v_fx, v_fy; + + for (int i = 0; i < 4; i++) { + v_fx.i[i] = fx; + v_fy.i[i] = fy; + fx += fdx; + fy += fdy; + } + + while (b < boundedEnd) { + if (fdx > 0 && (v_fx.i[3] >> 16) >= image_x2) + break; + if (fdx < 0 && (v_fx.i[3] >> 16) < image_x1) + break; + if (fdy > 0 && (v_fy.i[3] >> 16) >= image_y2) + break; + if (fdy < 0 && (v_fy.i[3] >> 16) < image_y1) + break; + + Vect_buffer tl, tr, bl, br; + Vect_buffer v_fx_shifted, v_fy_shifted; + v_fx_shifted.vect = _mm_srli_epi32(v_fx.vect, 16); + v_fy_shifted.vect = _mm_srli_epi32(v_fy.vect, 16); + + for (int i = 0; i < 4; i++) { + const int x1 = v_fx_shifted.i[i]; + const int y1 = v_fy_shifted.i[i]; + const uchar *sl = textureData + bytesPerLine * y1; + const uint *s1 = (const uint *)sl; + const uint *s2 = (const uint *)(sl + bytesPerLine); + tl.i[i] = s1[x1]; + tr.i[i] = s1[x1+1]; + bl.i[i] = s2[x1]; + br.i[i] = s2[x1+1]; + } + __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); + __m128i v_disty = _mm_srli_epi16(v_fy.vect, 12); + v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); + v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); + + interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b); + b+=4; + v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx); + v_fy.vect = _mm_add_epi32(v_fy.vect, v_fdy); + } + fx = v_fx.i[0]; + fy = v_fy.i[0]; +#elif defined(__ARM_NEON__) + BILINEAR_ROTATE_BOUNDS_PROLOG + + const int16x8_t colorMask = vdupq_n_s16(0x00ff); + const int16x8_t invColorMask = vmvnq_s16(colorMask); + const int16x8_t v_256 = vdupq_n_s16(256); + int32x4_t v_fdx = vdupq_n_s32(fdx*4); + int32x4_t v_fdy = vdupq_n_s32(fdy*4); + + const uchar *textureData = data->texture.imageData; + const int bytesPerLine = data->texture.bytesPerLine; + + union Vect_buffer { int32x4_t vect; quint32 i[4]; }; + Vect_buffer v_fx, v_fy; + + for (int i = 0; i < 4; i++) { + v_fx.i[i] = fx; + v_fy.i[i] = fy; + fx += fdx; + fy += fdy; + } + + const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff); + + while (b < boundedEnd) { + if (fdx > 0 && (v_fx.i[3] >> 16) >= image_x2) + break; + if (fdx < 0 && (v_fx.i[3] >> 16) < image_x1) + break; + if (fdy > 0 && (v_fy.i[3] >> 16) >= image_y2) + break; + if (fdy < 0 && (v_fy.i[3] >> 16) < image_y1) + break; + + Vect_buffer tl, tr, bl, br; + + Vect_buffer v_fx_shifted, v_fy_shifted; + v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16); + v_fy_shifted.vect = vshrq_n_s32(v_fy.vect, 16); + + for (int i = 0; i < 4; i++) { + const int x1 = v_fx_shifted.i[i]; + const int y1 = v_fy_shifted.i[i]; + const uchar *sl = textureData + bytesPerLine * y1; + const uint *s1 = (const uint *)sl; + const uint *s2 = (const uint *)(sl + bytesPerLine); + tl.i[i] = s1[x1]; + tr.i[i] = s1[x1+1]; + bl.i[i] = s2[x1]; + br.i[i] = s2[x1+1]; + } + + int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12); + int32x4_t v_disty = vshrq_n_s32(vandq_s32(v_fy.vect, v_ffff_mask), 12); + v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16)); + v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16)); + int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4); + + interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b); + b+=4; + v_fx.vect = vaddq_s32(v_fx.vect, v_fdx); + v_fy.vect = vaddq_s32(v_fy.vect, v_fdy); + } + fx = v_fx.i[0]; + fy = v_fy.i[0]; +#endif + } + while (b < end) { int x1 = (fx >> 16); int x2; |