diff options
author | Liang Qi <liang.qi@theqtcompany.com> | 2015-04-06 19:10:10 +0200 |
---|---|---|
committer | Liang Qi <liang.qi@theqtcompany.com> | 2015-04-06 19:10:25 +0200 |
commit | 20cac3d9c9c22153e9e316daff32b6050ff6be6b (patch) | |
tree | b563a89475df9afb4f40841ec371be9488d5b1ed /src/gui/painting | |
parent | 8ce85d74b692392a4ea0785360156f37418cff13 (diff) | |
parent | 9eb0b09abce28b11e4915fc9c3b3e996eb19cef2 (diff) |
Merge remote-tracking branch 'origin/5.5' into dev
Change-Id: If9fd98525b6b4ca07e5e006fc98bf372a73b8a21
Diffstat (limited to 'src/gui/painting')
-rw-r--r-- | src/gui/painting/qcosmeticstroker.cpp | 20 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 131 |
2 files changed, 67 insertions, 84 deletions
diff --git a/src/gui/painting/qcosmeticstroker.cpp b/src/gui/painting/qcosmeticstroker.cpp index f82b098012..8fb5f4fd3f 100644 --- a/src/gui/painting/qcosmeticstroker.cpp +++ b/src/gui/painting/qcosmeticstroker.cpp @@ -602,8 +602,7 @@ void QCosmeticStroker::drawPath(const QVectorPath &path) if (!closed && drawCaps && points == end - 2) caps |= CapEnd; - QCosmeticStroker::Point last = this->lastPixel; - bool unclipped = stroke(this, p.x(), p.y(), p2.x(), p2.y(), caps); + bool moveNextStart = stroke(this, p.x(), p.y(), p2.x(), p2.y(), caps); /* fix for gaps in polylines with fastpen and aliased in a sequence of points with small distances: if current point p2 has been dropped @@ -613,14 +612,8 @@ void QCosmeticStroker::drawPath(const QVectorPath &path) still need to update p to avoid drawing the line after this one from a bad starting position. */ - if (fastPenAliased && unclipped) { - if (last.x != lastPixel.x || last.y != lastPixel.y - || points == begin + 2 || points == end - 2) { - p = p2; - } - } else { + if (!fastPenAliased || moveNextStart || points == begin + 2 || points == end - 2) p = p2; - } points += 2; caps = NoCaps; } @@ -727,8 +720,9 @@ template<DrawPixel drawPixel, class Dasher> static bool drawLine(QCosmeticStroker *stroker, qreal rx1, qreal ry1, qreal rx2, qreal ry2, int caps) { if (stroker->clipLine(rx1, ry1, rx2, ry2)) - return false; + return true; + bool didDraw = false; const int half = stroker->legacyRounding ? 31 : 0; int x1 = toF26Dot6(rx1) + half; int y1 = toF26Dot6(ry1) + half; @@ -814,6 +808,7 @@ static bool drawLine(QCosmeticStroker *stroker, qreal rx1, qreal ry1, qreal rx2, dasher.adjust(); x += xinc; } while (++y < ys); + didDraw = true; } } else { // horizontal @@ -889,10 +884,11 @@ static bool drawLine(QCosmeticStroker *stroker, qreal rx1, qreal ry1, qreal rx2, dasher.adjust(); y += yinc; } while (++x < xs); + didDraw = true; } } stroker->lastPixel = last; - return true; + return didDraw; } @@ -900,7 +896,7 @@ template<DrawPixel drawPixel, class Dasher> static bool drawLineAA(QCosmeticStroker *stroker, qreal rx1, qreal ry1, qreal rx2, qreal ry2, int caps) { if (stroker->clipLine(rx1, ry1, rx2, ry2)) - return false; + return true; int x1 = toF26Dot6(rx1); int y1 = toF26Dot6(ry1); diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 538389f15f..57bb111538 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1539,40 +1539,29 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); const __m128i v_256 = _mm_set1_epi16(256); const __m128i v_disty = _mm_set1_epi16(disty); - __m128i v_fdx = _mm_set1_epi32(fdx*4); - - ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1); - - union Vect_buffer { __m128i vect; quint32 i[4]; }; - Vect_buffer v_fx; - - for (int i = 0; i < 4; i++) { - v_fx.i[i] = fx; - fx += fdx; - } + const __m128i v_fdx = _mm_set1_epi32(fdx*4); + __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); while (b < boundedEnd) { - - Vect_buffer tl, tr, bl, br; - - for (int i = 0; i < 4; i++) { - int x1 = v_fx.i[i] >> 16; - const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1; - const uint *addr_tr = addr_tl + 1; - tl.i[i] = *addr_tl; - tr.i[i] = *addr_tr; - bl.i[i] = *(addr_tl+secondLine); - br.i[i] = *(addr_tr+secondLine); - } - __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); + __m128i offset = _mm_srli_epi32(v_fx, 16); + const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset3 = _mm_cvtsi128_si32(offset); + const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]); + const __m128i tr = _mm_setr_epi32(s1[offset0 + 1], s1[offset1 + 1], s1[offset2 + 1], s1[offset3 + 1]); + const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]); + const __m128i br = _mm_setr_epi32(s2[offset0 + 1], s2[offset1 + 1], s2[offset2 + 1], s2[offset3 + 1]); + + __m128i v_distx = _mm_srli_epi16(v_fx, 12); v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); - interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b); - b+=4; - v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx); + interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b); + b += 4; + v_fx = _mm_add_epi32(v_fx, v_fdx); } - fx = v_fx.i[0]; + fx = _mm_cvtsi128_si32(v_fx); #elif defined(__ARM_NEON__) BILINEAR_DOWNSCALE_BOUNDS_PROLOG @@ -1687,9 +1676,9 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c uint tr = s1[x2]; \ uint bl = s2[x1]; \ uint br = s2[x2]; \ - int distx = (fx & 0x0000ffff) >> 12; \ - int disty = (fy & 0x0000ffff) >> 12; \ - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \ + int distx = (fx & 0x0000ffff) >> 8; \ + int disty = (fy & 0x0000ffff) >> 8; \ + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); \ fx += fdx; \ fy += fdy; \ ++b; \ @@ -1702,62 +1691,54 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); const __m128i v_256 = _mm_set1_epi16(256); - __m128i v_fdx = _mm_set1_epi32(fdx*4); - __m128i v_fdy = _mm_set1_epi32(fdy*4); + const __m128i v_fdx = _mm_set1_epi32(fdx*4); + const __m128i v_fdy = _mm_set1_epi32(fdy*4); + __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); + __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy); const uchar *textureData = data->texture.imageData; const int bytesPerLine = data->texture.bytesPerLine; - - union Vect_buffer { __m128i vect; qint32 i[4]; }; - Vect_buffer v_fx, v_fy; - - for (int i = 0; i < 4; i++) { - v_fx.i[i] = fx; - v_fy.i[i] = fy; - fx += fdx; - fy += fdy; - } + const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0)); while (b < boundedEnd) { - if (fdx > 0 && (v_fx.i[3] >> 16) >= image_x2) + if (fdx > 0 && (short)_mm_extract_epi16(v_fx, 7) >= image_x2) break; - if (fdx < 0 && (v_fx.i[3] >> 16) < image_x1) + if (fdx < 0 && (short)_mm_extract_epi16(v_fx, 7) < image_x1) break; - if (fdy > 0 && (v_fy.i[3] >> 16) >= image_y2) + if (fdy > 0 && (short)_mm_extract_epi16(v_fy, 7) >= image_y2) break; - if (fdy < 0 && (v_fy.i[3] >> 16) < image_y1) + if (fdy < 0 && (short)_mm_extract_epi16(v_fy, 7) < image_y1) break; - Vect_buffer tl, tr, bl, br; - Vect_buffer v_fx_shifted, v_fy_shifted; - v_fx_shifted.vect = _mm_srli_epi32(v_fx.vect, 16); - v_fy_shifted.vect = _mm_srli_epi32(v_fy.vect, 16); - - for (int i = 0; i < 4; i++) { - const int x1 = v_fx_shifted.i[i]; - const int y1 = v_fy_shifted.i[i]; - const uchar *sl = textureData + bytesPerLine * y1; - const uint *s1 = (const uint *)sl; - const uint *s2 = (const uint *)(sl + bytesPerLine); - tl.i[i] = s1[x1]; - tr.i[i] = s1[x1+1]; - bl.i[i] = s2[x1]; - br.i[i] = s2[x1+1]; - } - __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); - __m128i v_disty = _mm_srli_epi16(v_fy.vect, 12); + const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, 16), _mm_setzero_si128()); + // 4x16bit * 4x16bit -> 4x32bit + __m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl)); + offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, 16)); + const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, 4); + const int offset3 = _mm_cvtsi128_si32(offset); + const uint *topData = (const uint *)(textureData); + const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]); + const __m128i tr = _mm_setr_epi32(topData[offset0 + 1], topData[offset1 + 1], topData[offset2 + 1], topData[offset3 + 1]); + const uint *bottomData = (const uint *)(textureData + bytesPerLine); + const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]); + const __m128i br = _mm_setr_epi32(bottomData[offset0 + 1], bottomData[offset1 + 1], bottomData[offset2 + 1], bottomData[offset3 + 1]); + + __m128i v_distx = _mm_srli_epi16(v_fx, 12); + __m128i v_disty = _mm_srli_epi16(v_fy, 12); v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0)); - interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b); - b+=4; - v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx); - v_fy.vect = _mm_add_epi32(v_fy.vect, v_fdy); + interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b); + b += 4; + v_fx = _mm_add_epi32(v_fx, v_fdx); + v_fy = _mm_add_epi32(v_fy, v_fdy); } - fx = v_fx.i[0]; - fy = v_fy.i[0]; + fx = _mm_cvtsi128_si32(v_fx); + fy = _mm_cvtsi128_si32(v_fy); #endif } @@ -1778,10 +1759,16 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c uint bl = s2[x1]; uint br = s2[x2]; +#if defined(__SSE2__) + // The SSE2 optimized interpolate_4_pixels is faster than interpolate_4_pixels_16. + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty); +#else int distx = (fx & 0x0000ffff) >> 12; int disty = (fy & 0x0000ffff) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); +#endif fx += fdx; fy += fdy; |