From 715fd425efe1e10f88d20577d6fba680072b05d0 Mon Sep 17 00:00:00 2001 From: Allan Sandfeld Jensen Date: Tue, 29 Nov 2016 12:20:04 +0100 Subject: Split fetchTransformedBilinear and fetchTransformedBilinear64 Split out basic fetching to share it between the two. Change-Id: I6c27a7cea3a5c10b511232edc68bd32490514a27 Reviewed-by: Eirik Aavitsland --- src/gui/painting/qdrawhelper.cpp | 645 ++++++++++++++++++--------------------- 1 file changed, 289 insertions(+), 356 deletions(-) (limited to 'src') diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 5c38648fe7..8c9d1e1c49 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -2657,6 +2657,241 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c return buffer; } +template +static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, uint *end, const QTextureData &image, + int &fx, int &fy, int fdx, int /*fdy*/) +{ + const QPixelLayout *layout = &qPixelLayouts[image.format]; + const QVector *clut = image.colorTable; + Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout->bpp); + // When templated 'fetch' should be inlined at compile time: + const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : fetchPixels; + const ConvertFunc convertToARGB32PM = layout->convertToARGB32PM; + + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + const uchar *s1 = image.scanLine(y1); + const uchar *s2 = image.scanLine(y2); + + int disty = (fy & 0x0000ffff) >> 8; + int idisty = 256 - disty; + int x = fx >> 16; + int length = end - b; + + // The idea is first to do the interpolation between the row s1 and the row s2 + // into an intermediate buffer, then we interpolate between two pixel of this buffer. + // +1 for the last pixel to interpolate with, and +1 for rounding errors. + uint buf1[buffer_size + 2]; + uint buf2[buffer_size + 2]; + const uint *ptr1; + const uint *ptr2; + + int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2; + Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + + if (blendType == BlendTransformedBilinearTiled) { + x %= image.width; + if (x < 0) + x += image.width; + int len1 = qMin(count, image.width - x); + int len2 = qMin(x, count - len1); + + ptr1 = fetch(buf1, s1, x, len1); + ptr1 = convertToARGB32PM(buf1, ptr1, len1, clut, 0); + ptr2 = fetch(buf2, s2, x, len1); + ptr2 = convertToARGB32PM(buf2, ptr2, len1, clut, 0); + for (int i = 0; i < len1; ++i) { + uint t = ptr1[i]; + uint b = ptr2[i]; + buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + } + + if (len2) { + ptr1 = fetch(buf1 + len1, s1, 0, len2); + ptr1 = convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0); + ptr2 = fetch(buf2 + len1, s2, 0, len2); + ptr2 = convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0); + for (int i = 0; i < len2; ++i) { + uint t = ptr1[i]; + uint b = ptr2[i]; + buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + } + } + for (int i = image.width; i < count; ++i) { + buf1[i] = buf1[i - image.width]; + buf2[i] = buf2[i - image.width]; + } + } else { + int start = qMax(x, image.x1); + int end = qMin(x + count, image.x2); + int len = qMax(1, end - start); + int leading = start - x; + + ptr1 = fetch(buf1 + leading, s1, start, len); + ptr1 = convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0); + ptr2 = fetch(buf2 + leading, s2, start, len); + ptr2 = convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0); + + for (int i = 0; i < len; ++i) { + uint t = ptr1[i]; + uint b = ptr2[i]; + buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + } + + for (int i = 0; i < leading; ++i) { + buf1[i] = buf1[leading]; + buf2[i] = buf2[leading]; + } + for (int i = leading + len; i < count; ++i) { + buf1[i] = buf1[i - 1]; + buf2[i] = buf2[i - 1]; + } + } + + // Now interpolate the values from the intermediate_buffer to get the final result. + fx &= fixed_scale - 1; + Q_ASSERT((fx >> 16) == 0); + while (b < end) { + int x1 = (fx >> 16); + int x2 = x1 + 1; + Q_ASSERT(x1 >= 0); + Q_ASSERT(x2 < count); + + int distx = (fx & 0x0000ffff) >> 8; + int idistx = 256 - distx; + int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff; + int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00; + *b++ = rb | ag; + fx += fdx; + } +} + + +typedef void (QT_FASTCALL *BilinearFastTransformFetcher)(uint *buf1, uint *buf2, const int len, const QTextureData &image, + int fx, int fy, const int fdx, const int fdy); + +template +static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, const int len, const QTextureData &image, + int fx, int fy, const int fdx, const int fdy) +{ + const QPixelLayout &layout = qPixelLayouts[image.format]; + Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout.bpp); + // When templated 'fetch1' should be inlined at compile time: + const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel; + if (fdy == 0) { + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + const uchar *s1 = image.scanLine(y1); + const uchar *s2 = image.scanLine(y2); + + int i = 0; + if (blendType == BlendTransformedBilinear) { + for (; i < len; ++i) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + if (x1 != x2) + break; + buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1); + buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1); + fx += fdx; + } + int fastLen = len; + if (fdx > 0) + fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx)); + else if (fdx < 0) + fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx)); + + for (; i < fastLen; ++i) { + int x = (fx >> 16); + buf1[i * 2 + 0] = fetch1(s1, x); + buf1[i * 2 + 1] = fetch1(s1, x + 1); + buf2[i * 2 + 0] = fetch1(s2, x); + buf2[i * 2 + 1] = fetch1(s2, x + 1); + fx += fdx; + } + } + + for (; i < len; ++i) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + buf1[i * 2 + 0] = fetch1(s1, x1); + buf1[i * 2 + 1] = fetch1(s1, x2); + buf2[i * 2 + 0] = fetch1(s2, x1); + buf2[i * 2 + 1] = fetch1(s2, x2); + fx += fdx; + } + } else { + int i = 0; + if (blendType == BlendTransformedBilinear) { + for (; i < len; ++i) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + if (x1 != x2 && y1 != y2) + break; + const uchar *s1 = image.scanLine(y1); + const uchar *s2 = image.scanLine(y2); + buf1[i * 2 + 0] = fetch1(s1, x1); + buf1[i * 2 + 1] = fetch1(s1, x2); + buf2[i * 2 + 0] = fetch1(s2, x1); + buf2[i * 2 + 1] = fetch1(s2, x2); + fx += fdx; + fy += fdy; + } + int fastLen = len; + if (fdx > 0) + fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx)); + else if (fdx < 0) + fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx)); + if (fdy > 0) + fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy)); + else if (fdy < 0) + fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy)); + + for (; i < fastLen; ++i) { + int x = (fx >> 16); + int y = (fy >> 16); + const uchar *s1 = image.scanLine(y); + const uchar *s2 = s1 + image.bytesPerLine; + buf1[i * 2 + 0] = fetch1(s1, x); + buf1[i * 2 + 1] = fetch1(s1, x + 1); + buf2[i * 2 + 0] = fetch1(s2, x); + buf2[i * 2 + 1] = fetch1(s2, x + 1); + fx += fdx; + fy += fdy; + } + } + + for (; i < len; ++i) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); + + const uchar *s1 = image.scanLine(y1); + const uchar *s2 = image.scanLine(y2); + buf1[i * 2 + 0] = fetch1(s1, x1); + buf1[i * 2 + 1] = fetch1(s1, x2); + buf2[i * 2 + 0] = fetch1(s2, x1); + buf2[i * 2 + 1] = fetch1(s2, x2); + fx += fdx; + fy += fdy; + } + } +} + // blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled template static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *, @@ -2664,19 +2899,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper { const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; const QVector *clut = data->texture.colorTable; - if (bpp != QPixelLayout::BPPNone) // Like this to not ICE on GCC 5.3.1 - Q_ASSERT(layout->bpp == bpp); - // When templated 'fetch' should be inlined at compile time: - const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : FetchPixelsFunc(fetchPixels); - const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel); - - int image_width = data->texture.width; - int image_height = data->texture.height; - - int image_x1 = data->texture.x1; - int image_y1 = data->texture.y1; - int image_x2 = data->texture.x2 - 1; - int image_y2 = data->texture.y2 - 1; + Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp); const qreal cx = x + qreal(0.5); const qreal cy = y + qreal(0.5); @@ -2692,203 +2915,80 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper fx -= half_point; fy -= half_point; - if (fdy == 0) { //simple scale, no rotation - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); - const uchar *s1 = data->texture.scanLine(y1); - const uchar *s2 = data->texture.scanLine(y2); - + if (fdy == 0) { // simple scale, no rotation or shear if (fdx <= fixed_scale && fdx > 0) { // scale up on X - int disty = (fy & 0x0000ffff) >> 8; - int idisty = 256 - disty; - int x = fx >> 16; - - // The idea is first to do the interpolation between the row s1 and the row s2 - // into an intermediate buffer, then we interpolate between two pixel of this buffer. - // +1 for the last pixel to interpolate with, and +1 for rounding errors. - uint buf1[buffer_size + 2]; - uint buf2[buffer_size + 2]; - const uint *ptr1; - const uint *ptr2; - - int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2; - Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case - - if (blendType == BlendTransformedBilinearTiled) { - x %= image_width; - if (x < 0) - x += image_width; - int len1 = qMin(count, image_width - x); - int len2 = qMin(x, count - len1); - - ptr1 = fetch(buf1, s1, x, len1); - ptr1 = layout->convertToARGB32PM(buf1, ptr1, len1, clut, 0); - ptr2 = fetch(buf2, s2, x, len1); - ptr2 = layout->convertToARGB32PM(buf2, ptr2, len1, clut, 0); - for (int i = 0; i < len1; ++i) { - uint t = ptr1[i]; - uint b = ptr2[i]; - buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; - buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; - } - - if (len2) { - ptr1 = fetch(buf1 + len1, s1, 0, len2); - ptr1 = layout->convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0); - ptr2 = fetch(buf2 + len1, s2, 0, len2); - ptr2 = layout->convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0); - for (int i = 0; i < len2; ++i) { - uint t = ptr1[i]; - uint b = ptr2[i]; - buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; - buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; - } - } - for (int i = image_width; i < count; ++i) { - buf1[i] = buf1[i - image_width]; - buf2[i] = buf2[i - image_width]; - } - } else { - int start = qMax(x, image_x1); - int end = qMin(x + count, image_x2 + 1); - int len = qMax(1, end - start); - int leading = start - x; - - ptr1 = fetch(buf1 + leading, s1, start, len); - ptr1 = layout->convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0); - ptr2 = fetch(buf2 + leading, s2, start, len); - ptr2 = layout->convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0); - - for (int i = 0; i < len; ++i) { - uint t = ptr1[i]; - uint b = ptr2[i]; - buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; - buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff; - } - - for (int i = 0; i < leading; ++i) { - buf1[i] = buf1[leading]; - buf2[i] = buf2[leading]; - } - for (int i = leading + len; i < count; ++i) { - buf1[i] = buf1[i - 1]; - buf2[i] = buf2[i - 1]; - } - } - - // Now interpolate the values from the intermediate_buffer to get the final result. - fx &= fixed_scale - 1; - Q_ASSERT((fx >> 16) == 0); - for (int i = 0; i < length; ++i) { - int x1 = (fx >> 16); - int x2 = x1 + 1; - Q_ASSERT(x1 >= 0); - Q_ASSERT(x2 < count); - - int distx = (fx & 0x0000ffff) >> 8; - int idistx = 256 - distx; - int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff; - int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00; - buffer[i] = rb | ag; - fx += fdx; - } + fetchTransformedBilinear_simple_upscale_helper(buffer, buffer + length, data->texture, fx, fy, fdx, fdy); } else { + const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher; + uint buf1[buffer_size]; uint buf2[buffer_size]; uint *b = buffer; while (length) { int len = qMin(length, buffer_size / 2); - int fracX = fx; - for (int i = 0; i < len; ++i) { - int x1 = (fx >> 16); - int x2; - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - buf1[i * 2 + 0] = fetch1(s1, x1); - buf1[i * 2 + 1] = fetch1(s1, x2); - buf2[i * 2 + 0] = fetch1(s2, x1); - buf2[i * 2 + 1] = fetch1(s2, x2); - fx += fdx; - } + fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0); layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); if ((fdx < 0 && fdx > -(fixed_scale / 8)) || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x int disty = (fy & 0x0000ffff) >> 8; for (int i = 0; i < len; ++i) { - int distx = (fracX & 0x0000ffff) >> 8; + int distx = (fx & 0x0000ffff) >> 8; b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty); - fracX += fdx; + fx += fdx; } - } else { //scale down + } else { int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; for (int i = 0; i < len; ++i) { uint tl = buf1[i * 2 + 0]; uint tr = buf1[i * 2 + 1]; uint bl = buf2[i * 2 + 0]; uint br = buf2[i * 2 + 1]; - int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12; + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); - fracX += fdx; + fx += fdx; } } length -= len; b += len; } } - } else { //rotation + } else { // rotation or shear + const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher; + uint buf1[buffer_size]; uint buf2[buffer_size]; uint *b = buffer; - while (length) { int len = qMin(length, buffer_size / 2); - int fracX = fx; - int fracY = fy; - for (int i = 0; i < len; ++i) { - int x1 = (fx >> 16); - int x2; - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); - - const uchar *s1 = data->texture.scanLine(y1); - const uchar *s2 = data->texture.scanLine(y2); - buf1[i * 2 + 0] = fetch1(s1, x1); - buf1[i * 2 + 1] = fetch1(s1, x2); - buf2[i * 2 + 0] = fetch1(s2, x1); - buf2[i * 2 + 1] = fetch1(s2, x2); - fx += fdx; - fy += fdy; - } + fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy); layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0); layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0); - if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) { - //if we are zooming more than 8 times, we use 8bit precision for the position. + if (qAbs(data->m11) < qreal(1./8.)|| qAbs(data->m22) < qreal(1./8.)) { + // If we are zooming more than 8 times, we use 8bit precision for the position. for (int i = 0; i < len; ++i) { - int distx = (fracX & 0x0000ffff) >> 8; - int disty = (fracY & 0x0000ffff) >> 8; + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty); - fracX += fdx; - fracY += fdy; + fx += fdx; + fy += fdy; } } else { - //we are zooming less than 8x, use 4bit precision + // We are zooming less than 8x, use 4bit precision for (int i = 0; i < len; ++i) { uint tl = buf1[i * 2 + 0]; uint tr = buf1[i * 2 + 1]; uint bl = buf2[i * 2 + 0]; uint br = buf2[i * 2 + 1]; - int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12; - int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12; + int distx = ((fx & 0x0000ffff) + 0x0800) >> 12; + int disty = ((fy & 0x0000ffff) + 0x0800) >> 12; b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); - fracX += fdx; - fracY += fdy; + fx += fdx; + fy += fdy; } } @@ -2897,6 +2997,11 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper } } } else { + // When templated 'fetch' should be inlined at compile time: + const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : fetchPixel; + + const QTextureData &image = data->texture; + const qreal fdx = data->m11; const qreal fdy = data->m12; const qreal fdw = data->m13; @@ -2927,8 +3032,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper distxs[i] = int((px - x1) * 256); distys[i] = int((py - y1) * 256); - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); const uchar *s1 = data->texture.scanLine(y1); const uchar *s2 = data->texture.scanLine(y2); @@ -2969,21 +3074,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; const QVector *clut = data->texture.colorTable; - int image_width = data->texture.width; - int image_height = data->texture.height; - - int image_x1 = data->texture.x1; - int image_y1 = data->texture.y1; - int image_x2 = data->texture.x2 - 1; - int image_y2 = data->texture.y2 - 1; - const qreal cx = x + qreal(0.5); const qreal cy = y + qreal(0.5); - const qreal fdx = data->m11; - const qreal fdy = data->m12; - const qreal fdw = data->m13; - if (data->fast_matrix) { // The increment pr x in the scanline int fdx = (int)(data->m11 * fixed_scale); @@ -2995,14 +3088,13 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co fx -= half_point; fy -= half_point; + const BilinearFastTransformFetcher fetcher = + (layout->bpp == QPixelLayout::BPP32) + ? fetchTransformedBilinear_fetcher + : fetchTransformedBilinear_fetcher; + if (fdy == 0) { //simple scale, no rotation - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); - const uchar *s1 = data->texture.scanLine(y1); - const uchar *s2 = data->texture.scanLine(y2); - FetchPixelFunc fetch = qFetchPixel[layout->bpp]; uint sbuf1[buffer_size]; uint sbuf2[buffer_size]; quint64 buf1[buffer_size]; @@ -3010,84 +3102,19 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co QRgba64 *b = buffer; while (length) { int len = qMin(length, buffer_size / 2); - int fracX = fx; - int i = 0; int disty = (fy & 0x0000ffff); #if defined(__SSE2__) const __m128i vdy = _mm_set1_epi16(disty); const __m128i vidy = _mm_set1_epi16(0x10000 - disty); - if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) { - for (; i < len; ++i) { - int x1 = (fx >> 16); - int x2; - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - if (x1 != x2) - break; - sbuf1[i * 2 + 0] = sbuf1[i * 2 + 1] = ((const uint*)s1)[x1]; - sbuf2[i * 2 + 0] = sbuf2[i * 2 + 1] = ((const uint*)s2)[x1]; - fx += fdx; - } - int fastLen; - if (fdx > 0) - fastLen = qMin(len, int((image_x2 - (fx >> 16)) / data->m11)); - else - fastLen = qMin(len, int((image_x1 - (fx >> 16)) / data->m11)); - fastLen -= 3; - - const __m128i v_fdx = _mm_set1_epi32(fdx*4); - __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); - for (; i < fastLen; i += 4) { - int offset = _mm_extract_epi16(v_fx, 1); - sbuf1[i * 2 + 0] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 0] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1]; - offset = _mm_extract_epi16(v_fx, 3); - sbuf1[i * 2 + 2] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 2] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1]; - offset = _mm_extract_epi16(v_fx, 5); - sbuf1[i * 2 + 4] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 4] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1]; - offset = _mm_extract_epi16(v_fx, 7); - sbuf1[i * 2 + 6] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 6] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1]; - v_fx = _mm_add_epi32(v_fx, v_fdx); - } - fx = _mm_cvtsi128_si32(v_fx); - } #endif - for (; i < len; ++i) { - int x1 = (fx >> 16); - int x2; - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - - if (layout->bpp == QPixelLayout::BPP32) { - sbuf1[i * 2 + 0] = ((const uint*)s1)[x1]; - sbuf1[i * 2 + 1] = ((const uint*)s1)[x2]; - sbuf2[i * 2 + 0] = ((const uint*)s2)[x1]; - sbuf2[i * 2 + 1] = ((const uint*)s2)[x2]; - - } else { - sbuf1[i * 2 + 0] = fetch(s1, x1); - sbuf1[i * 2 + 1] = fetch(s1, x2); - sbuf2[i * 2 + 0] = fetch(s2, x1); - sbuf2[i * 2 + 1] = fetch(s2, x2); - } + fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy); - fx += fdx; - } layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0); if (disty) layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0); for (int i = 0; i < len; ++i) { - int distx = (fracX & 0x0000ffff); + int distx = (fx & 0x0000ffff); #if defined(__SSE2__) const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0)); @@ -3104,13 +3131,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co #else b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty); #endif - fracX += fdx; + fx += fdx; } length -= len; b += len; } } else { //rotation - FetchPixelFunc fetch = qFetchPixel[layout->bpp]; uint sbuf1[buffer_size]; uint sbuf2[buffer_size]; quint64 buf1[buffer_size]; @@ -3120,117 +3146,18 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co while (b < end) { int len = qMin(length, buffer_size / 2); - int fracX = fx; - int fracY = fy; - int i = 0; -#if defined(__SSE2__) - if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) { - for (; i < len; ++i) { - int x1 = (fx >> 16); - int x2; - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); - if (x1 != x2 && y1 != y2) - break; - const uchar *s1 = data->texture.scanLine(y1); - const uchar *s2 = data->texture.scanLine(y2); - sbuf1[i * 2 + 0] = ((const uint*)s1)[x1]; - sbuf1[i * 2 + 1] = ((const uint*)s1)[x2]; - sbuf2[i * 2 + 0] = ((const uint*)s2)[x1]; - sbuf2[i * 2 + 1] = ((const uint*)s2)[x2]; - fx += fdx; - fy += fdy; - } - int fastLen = len; - if (fdx > 0) - fastLen = qMin(fastLen, int((qint64(image_x2) * fixed_scale - fx) / fdx)); - else if (fdx < 0) - fastLen = qMin(fastLen, int((qint64(image_x1) * fixed_scale - fx) / fdx)); - if (fdy > 0) - fastLen = qMin(fastLen, int((qint64(image_y2) * fixed_scale - fy) / fdy)); - else if (fdy < 0) - fastLen = qMin(fastLen, int((qint64(image_y1) * fixed_scale - fy) / fdy)); - fastLen -= 3; - - const __m128i v_fdx = _mm_set1_epi32(fdx*4); - const __m128i v_fdy = _mm_set1_epi32(fdy*4); - __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx); - __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy); - const int bytesPerLine = data->texture.bytesPerLine; - const uchar *s1 = data->texture.imageData; - const uchar *s2 = s1 + bytesPerLine; - const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0)); - for (; i < fastLen; i += 4) { - const __m128i vy = _mm_packs_epi32(_mm_srai_epi32(v_fy, 16), _mm_setzero_si128()); - __m128i voffset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epu16(vy, vbpl)); - voffset = _mm_add_epi32(voffset, _mm_srli_epi32(v_fx, 16)); - - int offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4); - sbuf1[i * 2 + 0] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 0] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1]; - offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4); - sbuf1[i * 2 + 2] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 2] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1]; - offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4); - sbuf1[i * 2 + 4] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 4] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1]; - offset = _mm_cvtsi128_si32(voffset); - sbuf1[i * 2 + 6] = ((const uint*)s1)[offset]; - sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1]; - sbuf2[i * 2 + 6] = ((const uint*)s2)[offset]; - sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1]; - - v_fx = _mm_add_epi32(v_fx, v_fdx); - v_fy = _mm_add_epi32(v_fy, v_fdy); - } - fx = _mm_cvtsi128_si32(v_fx); - fy = _mm_cvtsi128_si32(v_fy); - } -#endif - for (; i < len; ++i) { - int x1 = (fx >> 16); - int x2; - int y1 = (fy >> 16); - int y2; - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); - const uchar *s1 = data->texture.scanLine(y1); - const uchar *s2 = data->texture.scanLine(y2); - - if (layout->bpp == QPixelLayout::BPP32) { - sbuf1[i * 2 + 0] = ((const uint*)s1)[x1]; - sbuf1[i * 2 + 1] = ((const uint*)s1)[x2]; - sbuf2[i * 2 + 0] = ((const uint*)s2)[x1]; - sbuf2[i * 2 + 1] = ((const uint*)s2)[x2]; - - } else { - sbuf1[i * 2 + 0] = fetch(s1, x1); - sbuf1[i * 2 + 1] = fetch(s1, x2); - sbuf2[i * 2 + 0] = fetch(s2, x1); - sbuf2[i * 2 + 1] = fetch(s2, x2); - } + fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy); - fx += fdx; - fy += fdy; - } layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0); layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0); for (int i = 0; i < len; ++i) { - int distx = (fracX & 0x0000ffff); - int disty = (fracY & 0x0000ffff); + int distx = (fx & 0x0000ffff); + int disty = (fy & 0x0000ffff); b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty); - fracX += fdx; - fracY += fdy; + fx += fdx; + fy += fdy; } length -= len; @@ -3238,6 +3165,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co } } } else { + const QTextureData &image = data->texture; + + const qreal fdx = data->m11; + const qreal fdy = data->m12; + const qreal fdw = data->m13; + qreal fx = data->m21 * cy + data->m11 * cx + data->dx; qreal fy = data->m22 * cy + data->m12 * cx + data->dy; qreal fw = data->m23 * cy + data->m13 * cx + data->m33; @@ -3267,8 +3200,8 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co distxs[i] = int((px - x1) * (1<<16)); distys[i] = int((py - y1) * (1<<16)); - fetchTransformedBilinear_pixelBounds(image_width, image_x1, image_x2, x1, x2); - fetchTransformedBilinear_pixelBounds(image_height, image_y1, image_y2, y1, y2); + fetchTransformedBilinear_pixelBounds(image.width, image.x1, image.x2 - 1, x1, x2); + fetchTransformedBilinear_pixelBounds(image.height, image.y1, image.y2 - 1, y1, y2); const uchar *s1 = data->texture.scanLine(y1); const uchar *s2 = data->texture.scanLine(y2); -- cgit v1.2.3