summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2016-11-29 12:20:04 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2017-03-19 11:56:26 +0000
commit715fd425efe1e10f88d20577d6fba680072b05d0 (patch)
tree8662e7f6a4936b8faea29708ce82737438d1b51d /src
parentf1a23a546720e4f1541404185ff8e765463e6bf6 (diff)
Split fetchTransformedBilinear and fetchTransformedBilinear64
Split out basic fetching to share it between the two. Change-Id: I6c27a7cea3a5c10b511232edc68bd32490514a27 Reviewed-by: Eirik Aavitsland <eirik.aavitsland@qt.io>
Diffstat (limited to 'src')
-rw-r--r--src/gui/painting/qdrawhelper.cpp645
1 files changed, 289 insertions, 356 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 5c38648fe7..8c9d1e1c49 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -2657,6 +2657,241 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
return buffer;
}
+template<TextureBlendType blendType, QPixelLayout::BPP bpp>
+static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/)
+{
+ const QPixelLayout *layout = &qPixelLayouts[image.format];
+ const QVector<QRgb> *clut = image.colorTable;
+ Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout->bpp);
+ // When templated 'fetch' should be inlined at compile time:
+ const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : fetchPixels<bpp>;
+ const ConvertFunc convertToARGB32PM = layout->convertToARGB32PM;
+
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+ const uchar *s1 = image.scanLine(y1);
+ const uchar *s2 = image.scanLine(y2);
+
+ int disty = (fy & 0x0000ffff) >> 8;
+ int idisty = 256 - disty;
+ int x = fx >> 16;
+ int length = end - b;
+
+ // The idea is first to do the interpolation between the row s1 and the row s2
+ // into an intermediate buffer, then we interpolate between two pixel of this buffer.
+ // +1 for the last pixel to interpolate with, and +1 for rounding errors.
+ uint buf1[buffer_size + 2];
+ uint buf2[buffer_size + 2];
+ const uint *ptr1;
+ const uint *ptr2;
+
+ int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2;
+ Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
+
+ if (blendType == BlendTransformedBilinearTiled) {
+ x %= image.width;
+ if (x < 0)
+ x += image.width;
+ int len1 = qMin(count, image.width - x);
+ int len2 = qMin(x, count - len1);
+
+ ptr1 = fetch(buf1, s1, x, len1);
+ ptr1 = convertToARGB32PM(buf1, ptr1, len1, clut, 0);
+ ptr2 = fetch(buf2, s2, x, len1);
+ ptr2 = convertToARGB32PM(buf2, ptr2, len1, clut, 0);
+ for (int i = 0; i < len1; ++i) {
+ uint t = ptr1[i];
+ uint b = ptr2[i];
+ buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ }
+
+ if (len2) {
+ ptr1 = fetch(buf1 + len1, s1, 0, len2);
+ ptr1 = convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0);
+ ptr2 = fetch(buf2 + len1, s2, 0, len2);
+ ptr2 = convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0);
+ for (int i = 0; i < len2; ++i) {
+ uint t = ptr1[i];
+ uint b = ptr2[i];
+ buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ }
+ }
+ for (int i = image.width; i < count; ++i) {
+ buf1[i] = buf1[i - image.width];
+ buf2[i] = buf2[i - image.width];
+ }
+ } else {
+ int start = qMax(x, image.x1);
+ int end = qMin(x + count, image.x2);
+ int len = qMax(1, end - start);
+ int leading = start - x;
+
+ ptr1 = fetch(buf1 + leading, s1, start, len);
+ ptr1 = convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0);
+ ptr2 = fetch(buf2 + leading, s2, start, len);
+ ptr2 = convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0);
+
+ for (int i = 0; i < len; ++i) {
+ uint t = ptr1[i];
+ uint b = ptr2[i];
+ buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ }
+
+ for (int i = 0; i < leading; ++i) {
+ buf1[i] = buf1[leading];
+ buf2[i] = buf2[leading];
+ }
+ for (int i = leading + len; i < count; ++i) {
+ buf1[i] = buf1[i - 1];
+ buf2[i] = buf2[i - 1];
+ }
+ }
+
+ // Now interpolate the values from the intermediate_buffer to get the final result.
+ fx &= fixed_scale - 1;
+ Q_ASSERT((fx >> 16) == 0);
+ while (b < end) {
+ int x1 = (fx >> 16);
+ int x2 = x1 + 1;
+ Q_ASSERT(x1 >= 0);
+ Q_ASSERT(x2 < count);
+
+ int distx = (fx & 0x0000ffff) >> 8;
+ int idistx = 256 - distx;
+ int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff;
+ int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00;
+ *b++ = rb | ag;
+ fx += fdx;
+ }
+}
+
+
+typedef void (QT_FASTCALL *BilinearFastTransformFetcher)(uint *buf1, uint *buf2, const int len, const QTextureData &image,
+ int fx, int fy, const int fdx, const int fdy);
+
+template<TextureBlendType blendType, QPixelLayout::BPP bpp>
+static void QT_FASTCALL fetchTransformedBilinear_fetcher(uint *buf1, uint *buf2, const int len, const QTextureData &image,
+ int fx, int fy, const int fdx, const int fdy)
+{
+ const QPixelLayout &layout = qPixelLayouts[image.format];
+ Q_ASSERT(bpp == QPixelLayout::BPPNone || bpp == layout.bpp);
+ // When templated 'fetch1' should be inlined at compile time:
+ const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>;
+ if (fdy == 0) {
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+ const uchar *s1 = image.scanLine(y1);
+ const uchar *s2 = image.scanLine(y2);
+
+ int i = 0;
+ if (blendType == BlendTransformedBilinear) {
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ if (x1 != x2)
+ break;
+ buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
+ buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
+ fx += fdx;
+ }
+ int fastLen = len;
+ if (fdx > 0)
+ fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
+ else if (fdx < 0)
+ fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
+
+ for (; i < fastLen; ++i) {
+ int x = (fx >> 16);
+ buf1[i * 2 + 0] = fetch1(s1, x);
+ buf1[i * 2 + 1] = fetch1(s1, x + 1);
+ buf2[i * 2 + 0] = fetch1(s2, x);
+ buf2[i * 2 + 1] = fetch1(s2, x + 1);
+ fx += fdx;
+ }
+ }
+
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ buf1[i * 2 + 0] = fetch1(s1, x1);
+ buf1[i * 2 + 1] = fetch1(s1, x2);
+ buf2[i * 2 + 0] = fetch1(s2, x1);
+ buf2[i * 2 + 1] = fetch1(s2, x2);
+ fx += fdx;
+ }
+ } else {
+ int i = 0;
+ if (blendType == BlendTransformedBilinear) {
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+ if (x1 != x2 && y1 != y2)
+ break;
+ const uchar *s1 = image.scanLine(y1);
+ const uchar *s2 = image.scanLine(y2);
+ buf1[i * 2 + 0] = fetch1(s1, x1);
+ buf1[i * 2 + 1] = fetch1(s1, x2);
+ buf2[i * 2 + 0] = fetch1(s2, x1);
+ buf2[i * 2 + 1] = fetch1(s2, x2);
+ fx += fdx;
+ fy += fdy;
+ }
+ int fastLen = len;
+ if (fdx > 0)
+ fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
+ else if (fdx < 0)
+ fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
+ if (fdy > 0)
+ fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
+ else if (fdy < 0)
+ fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
+
+ for (; i < fastLen; ++i) {
+ int x = (fx >> 16);
+ int y = (fy >> 16);
+ const uchar *s1 = image.scanLine(y);
+ const uchar *s2 = s1 + image.bytesPerLine;
+ buf1[i * 2 + 0] = fetch1(s1, x);
+ buf1[i * 2 + 1] = fetch1(s1, x + 1);
+ buf2[i * 2 + 0] = fetch1(s2, x);
+ buf2[i * 2 + 1] = fetch1(s2, x + 1);
+ fx += fdx;
+ fy += fdy;
+ }
+ }
+
+ for (; i < len; ++i) {
+ int x1 = (fx >> 16);
+ int x2;
+ int y1 = (fy >> 16);
+ int y2;
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
+
+ const uchar *s1 = image.scanLine(y1);
+ const uchar *s2 = image.scanLine(y2);
+ buf1[i * 2 + 0] = fetch1(s1, x1);
+ buf1[i * 2 + 1] = fetch1(s1, x2);
+ buf2[i * 2 + 0] = fetch1(s2, x1);
+ buf2[i * 2 + 1] = fetch1(s2, x2);
+ fx += fdx;
+ fy += fdy;
+ }
+ }
+}
+
// blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
template<TextureBlendType blendType, QPixelLayout::BPP bpp>
static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *,
@@ -2664,19 +2899,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
{
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
const QVector<QRgb> *clut = data->texture.colorTable;
- if (bpp != QPixelLayout::BPPNone) // Like this to not ICE on GCC 5.3.1
- Q_ASSERT(layout->bpp == bpp);
- // When templated 'fetch' should be inlined at compile time:
- const FetchPixelsFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixels[layout->bpp] : FetchPixelsFunc(fetchPixels<bpp>);
- const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
-
- int image_width = data->texture.width;
- int image_height = data->texture.height;
-
- int image_x1 = data->texture.x1;
- int image_y1 = data->texture.y1;
- int image_x2 = data->texture.x2 - 1;
- int image_y2 = data->texture.y2 - 1;
+ Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp);
const qreal cx = x + qreal(0.5);
const qreal cy = y + qreal(0.5);
@@ -2692,203 +2915,80 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
fx -= half_point;
fy -= half_point;
- if (fdy == 0) { //simple scale, no rotation
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- const uchar *s1 = data->texture.scanLine(y1);
- const uchar *s2 = data->texture.scanLine(y2);
-
+ if (fdy == 0) { // simple scale, no rotation or shear
if (fdx <= fixed_scale && fdx > 0) { // scale up on X
- int disty = (fy & 0x0000ffff) >> 8;
- int idisty = 256 - disty;
- int x = fx >> 16;
-
- // The idea is first to do the interpolation between the row s1 and the row s2
- // into an intermediate buffer, then we interpolate between two pixel of this buffer.
- // +1 for the last pixel to interpolate with, and +1 for rounding errors.
- uint buf1[buffer_size + 2];
- uint buf2[buffer_size + 2];
- const uint *ptr1;
- const uint *ptr2;
-
- int count = (qint64(length) * fdx + fixed_scale - 1) / fixed_scale + 2;
- Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
-
- if (blendType == BlendTransformedBilinearTiled) {
- x %= image_width;
- if (x < 0)
- x += image_width;
- int len1 = qMin(count, image_width - x);
- int len2 = qMin(x, count - len1);
-
- ptr1 = fetch(buf1, s1, x, len1);
- ptr1 = layout->convertToARGB32PM(buf1, ptr1, len1, clut, 0);
- ptr2 = fetch(buf2, s2, x, len1);
- ptr2 = layout->convertToARGB32PM(buf2, ptr2, len1, clut, 0);
- for (int i = 0; i < len1; ++i) {
- uint t = ptr1[i];
- uint b = ptr2[i];
- buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
- buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
- }
-
- if (len2) {
- ptr1 = fetch(buf1 + len1, s1, 0, len2);
- ptr1 = layout->convertToARGB32PM(buf1 + len1, ptr1, len2, clut, 0);
- ptr2 = fetch(buf2 + len1, s2, 0, len2);
- ptr2 = layout->convertToARGB32PM(buf2 + len1, ptr2, len2, clut, 0);
- for (int i = 0; i < len2; ++i) {
- uint t = ptr1[i];
- uint b = ptr2[i];
- buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
- buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
- }
- }
- for (int i = image_width; i < count; ++i) {
- buf1[i] = buf1[i - image_width];
- buf2[i] = buf2[i - image_width];
- }
- } else {
- int start = qMax(x, image_x1);
- int end = qMin(x + count, image_x2 + 1);
- int len = qMax(1, end - start);
- int leading = start - x;
-
- ptr1 = fetch(buf1 + leading, s1, start, len);
- ptr1 = layout->convertToARGB32PM(buf1 + leading, ptr1, len, clut, 0);
- ptr2 = fetch(buf2 + leading, s2, start, len);
- ptr2 = layout->convertToARGB32PM(buf2 + leading, ptr2, len, clut, 0);
-
- for (int i = 0; i < len; ++i) {
- uint t = ptr1[i];
- uint b = ptr2[i];
- buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
- buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
- }
-
- for (int i = 0; i < leading; ++i) {
- buf1[i] = buf1[leading];
- buf2[i] = buf2[leading];
- }
- for (int i = leading + len; i < count; ++i) {
- buf1[i] = buf1[i - 1];
- buf2[i] = buf2[i - 1];
- }
- }
-
- // Now interpolate the values from the intermediate_buffer to get the final result.
- fx &= fixed_scale - 1;
- Q_ASSERT((fx >> 16) == 0);
- for (int i = 0; i < length; ++i) {
- int x1 = (fx >> 16);
- int x2 = x1 + 1;
- Q_ASSERT(x1 >= 0);
- Q_ASSERT(x2 < count);
-
- int distx = (fx & 0x0000ffff) >> 8;
- int idistx = 256 - distx;
- int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff;
- int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00;
- buffer[i] = rb | ag;
- fx += fdx;
- }
+ fetchTransformedBilinear_simple_upscale_helper<blendType, bpp>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
} else {
+ const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
+
uint buf1[buffer_size];
uint buf2[buffer_size];
uint *b = buffer;
while (length) {
int len = qMin(length, buffer_size / 2);
- int fracX = fx;
- for (int i = 0; i < len; ++i) {
- int x1 = (fx >> 16);
- int x2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- buf1[i * 2 + 0] = fetch1(s1, x1);
- buf1[i * 2 + 1] = fetch1(s1, x2);
- buf2[i * 2 + 0] = fetch1(s2, x1);
- buf2[i * 2 + 1] = fetch1(s2, x2);
- fx += fdx;
- }
+ fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
if ((fdx < 0 && fdx > -(fixed_scale / 8)) || qAbs(data->m22) < qreal(1./8.)) { // scale up more than 8x
int disty = (fy & 0x0000ffff) >> 8;
for (int i = 0; i < len; ++i) {
- int distx = (fracX & 0x0000ffff) >> 8;
+ int distx = (fx & 0x0000ffff) >> 8;
b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
- fracX += fdx;
+ fx += fdx;
}
- } else { //scale down
+ } else {
int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
for (int i = 0; i < len; ++i) {
uint tl = buf1[i * 2 + 0];
uint tr = buf1[i * 2 + 1];
uint bl = buf2[i * 2 + 0];
uint br = buf2[i * 2 + 1];
- int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
- fracX += fdx;
+ fx += fdx;
}
}
length -= len;
b += len;
}
}
- } else { //rotation
+ } else { // rotation or shear
+ const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
+
uint buf1[buffer_size];
uint buf2[buffer_size];
uint *b = buffer;
-
while (length) {
int len = qMin(length, buffer_size / 2);
- int fracX = fx;
- int fracY = fy;
- for (int i = 0; i < len; ++i) {
- int x1 = (fx >> 16);
- int x2;
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
-
- const uchar *s1 = data->texture.scanLine(y1);
- const uchar *s2 = data->texture.scanLine(y2);
- buf1[i * 2 + 0] = fetch1(s1, x1);
- buf1[i * 2 + 1] = fetch1(s1, x2);
- buf2[i * 2 + 0] = fetch1(s2, x1);
- buf2[i * 2 + 1] = fetch1(s2, x2);
- fx += fdx;
- fy += fdy;
- }
+ fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
- if (qAbs(data->m11) < qreal(1./8.) || qAbs(data->m22) < qreal(1./8.) ) {
- //if we are zooming more than 8 times, we use 8bit precision for the position.
+ if (qAbs(data->m11) < qreal(1./8.)|| qAbs(data->m22) < qreal(1./8.)) {
+ // If we are zooming more than 8 times, we use 8bit precision for the position.
for (int i = 0; i < len; ++i) {
- int distx = (fracX & 0x0000ffff) >> 8;
- int disty = (fracY & 0x0000ffff) >> 8;
+ int distx = (fx & 0x0000ffff) >> 8;
+ int disty = (fy & 0x0000ffff) >> 8;
b[i] = interpolate_4_pixels(buf1 + i * 2, buf2 + i * 2, distx, disty);
- fracX += fdx;
- fracY += fdy;
+ fx += fdx;
+ fy += fdy;
}
} else {
- //we are zooming less than 8x, use 4bit precision
+ // We are zooming less than 8x, use 4bit precision
for (int i = 0; i < len; ++i) {
uint tl = buf1[i * 2 + 0];
uint tr = buf1[i * 2 + 1];
uint bl = buf2[i * 2 + 0];
uint br = buf2[i * 2 + 1];
- int distx = ((fracX & 0x0000ffff) + 0x0800) >> 12;
- int disty = ((fracY & 0x0000ffff) + 0x0800) >> 12;
+ int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
+ int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
- fracX += fdx;
- fracY += fdy;
+ fx += fdx;
+ fy += fdy;
}
}
@@ -2897,6 +2997,11 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
}
}
} else {
+ // When templated 'fetch' should be inlined at compile time:
+ const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : fetchPixel<bpp>;
+
+ const QTextureData &image = data->texture;
+
const qreal fdx = data->m11;
const qreal fdy = data->m12;
const qreal fdw = data->m13;
@@ -2927,8 +3032,8 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
distxs[i] = int((px - x1) * 256);
distys[i] = int((py - y1) * 256);
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
const uchar *s1 = data->texture.scanLine(y1);
const uchar *s2 = data->texture.scanLine(y2);
@@ -2969,21 +3074,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
const QVector<QRgb> *clut = data->texture.colorTable;
- int image_width = data->texture.width;
- int image_height = data->texture.height;
-
- int image_x1 = data->texture.x1;
- int image_y1 = data->texture.y1;
- int image_x2 = data->texture.x2 - 1;
- int image_y2 = data->texture.y2 - 1;
-
const qreal cx = x + qreal(0.5);
const qreal cy = y + qreal(0.5);
- const qreal fdx = data->m11;
- const qreal fdy = data->m12;
- const qreal fdw = data->m13;
-
if (data->fast_matrix) {
// The increment pr x in the scanline
int fdx = (int)(data->m11 * fixed_scale);
@@ -2995,14 +3088,13 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
fx -= half_point;
fy -= half_point;
+ const BilinearFastTransformFetcher fetcher =
+ (layout->bpp == QPixelLayout::BPP32)
+ ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32>
+ : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone>;
+
if (fdy == 0) { //simple scale, no rotation
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- const uchar *s1 = data->texture.scanLine(y1);
- const uchar *s2 = data->texture.scanLine(y2);
- FetchPixelFunc fetch = qFetchPixel[layout->bpp];
uint sbuf1[buffer_size];
uint sbuf2[buffer_size];
quint64 buf1[buffer_size];
@@ -3010,84 +3102,19 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
QRgba64 *b = buffer;
while (length) {
int len = qMin(length, buffer_size / 2);
- int fracX = fx;
- int i = 0;
int disty = (fy & 0x0000ffff);
#if defined(__SSE2__)
const __m128i vdy = _mm_set1_epi16(disty);
const __m128i vidy = _mm_set1_epi16(0x10000 - disty);
- if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) {
- for (; i < len; ++i) {
- int x1 = (fx >> 16);
- int x2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- if (x1 != x2)
- break;
- sbuf1[i * 2 + 0] = sbuf1[i * 2 + 1] = ((const uint*)s1)[x1];
- sbuf2[i * 2 + 0] = sbuf2[i * 2 + 1] = ((const uint*)s2)[x1];
- fx += fdx;
- }
- int fastLen;
- if (fdx > 0)
- fastLen = qMin(len, int((image_x2 - (fx >> 16)) / data->m11));
- else
- fastLen = qMin(len, int((image_x1 - (fx >> 16)) / data->m11));
- fastLen -= 3;
-
- const __m128i v_fdx = _mm_set1_epi32(fdx*4);
- __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
- for (; i < fastLen; i += 4) {
- int offset = _mm_extract_epi16(v_fx, 1);
- sbuf1[i * 2 + 0] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 0] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1];
- offset = _mm_extract_epi16(v_fx, 3);
- sbuf1[i * 2 + 2] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 2] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1];
- offset = _mm_extract_epi16(v_fx, 5);
- sbuf1[i * 2 + 4] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 4] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1];
- offset = _mm_extract_epi16(v_fx, 7);
- sbuf1[i * 2 + 6] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 6] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1];
- v_fx = _mm_add_epi32(v_fx, v_fdx);
- }
- fx = _mm_cvtsi128_si32(v_fx);
- }
#endif
- for (; i < len; ++i) {
- int x1 = (fx >> 16);
- int x2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
-
- if (layout->bpp == QPixelLayout::BPP32) {
- sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
- sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
- sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
- sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
-
- } else {
- sbuf1[i * 2 + 0] = fetch(s1, x1);
- sbuf1[i * 2 + 1] = fetch(s1, x2);
- sbuf2[i * 2 + 0] = fetch(s2, x1);
- sbuf2[i * 2 + 1] = fetch(s2, x2);
- }
+ fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
- fx += fdx;
- }
layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
if (disty)
layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
for (int i = 0; i < len; ++i) {
- int distx = (fracX & 0x0000ffff);
+ int distx = (fx & 0x0000ffff);
#if defined(__SSE2__)
const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
@@ -3104,13 +3131,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
#else
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
#endif
- fracX += fdx;
+ fx += fdx;
}
length -= len;
b += len;
}
} else { //rotation
- FetchPixelFunc fetch = qFetchPixel[layout->bpp];
uint sbuf1[buffer_size];
uint sbuf2[buffer_size];
quint64 buf1[buffer_size];
@@ -3120,117 +3146,18 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
while (b < end) {
int len = qMin(length, buffer_size / 2);
- int fracX = fx;
- int fracY = fy;
- int i = 0;
-#if defined(__SSE2__)
- if (blendType != BlendTransformedBilinearTiled && layout->bpp == QPixelLayout::BPP32) {
- for (; i < len; ++i) {
- int x1 = (fx >> 16);
- int x2;
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- if (x1 != x2 && y1 != y2)
- break;
- const uchar *s1 = data->texture.scanLine(y1);
- const uchar *s2 = data->texture.scanLine(y2);
- sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
- sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
- sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
- sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
- fx += fdx;
- fy += fdy;
- }
- int fastLen = len;
- if (fdx > 0)
- fastLen = qMin(fastLen, int((qint64(image_x2) * fixed_scale - fx) / fdx));
- else if (fdx < 0)
- fastLen = qMin(fastLen, int((qint64(image_x1) * fixed_scale - fx) / fdx));
- if (fdy > 0)
- fastLen = qMin(fastLen, int((qint64(image_y2) * fixed_scale - fy) / fdy));
- else if (fdy < 0)
- fastLen = qMin(fastLen, int((qint64(image_y1) * fixed_scale - fy) / fdy));
- fastLen -= 3;
-
- const __m128i v_fdx = _mm_set1_epi32(fdx*4);
- const __m128i v_fdy = _mm_set1_epi32(fdy*4);
- __m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
- __m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
- const int bytesPerLine = data->texture.bytesPerLine;
- const uchar *s1 = data->texture.imageData;
- const uchar *s2 = s1 + bytesPerLine;
- const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
- for (; i < fastLen; i += 4) {
- const __m128i vy = _mm_packs_epi32(_mm_srai_epi32(v_fy, 16), _mm_setzero_si128());
- __m128i voffset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epu16(vy, vbpl));
- voffset = _mm_add_epi32(voffset, _mm_srli_epi32(v_fx, 16));
-
- int offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
- sbuf1[i * 2 + 0] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 1] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 0] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 1] = ((const uint*)s2)[offset + 1];
- offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
- sbuf1[i * 2 + 2] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 3] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 2] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 3] = ((const uint*)s2)[offset + 1];
- offset = _mm_cvtsi128_si32(voffset); voffset = _mm_srli_si128(voffset, 4);
- sbuf1[i * 2 + 4] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 5] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 4] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 5] = ((const uint*)s2)[offset + 1];
- offset = _mm_cvtsi128_si32(voffset);
- sbuf1[i * 2 + 6] = ((const uint*)s1)[offset];
- sbuf1[i * 2 + 7] = ((const uint*)s1)[offset + 1];
- sbuf2[i * 2 + 6] = ((const uint*)s2)[offset];
- sbuf2[i * 2 + 7] = ((const uint*)s2)[offset + 1];
-
- v_fx = _mm_add_epi32(v_fx, v_fdx);
- v_fy = _mm_add_epi32(v_fy, v_fdy);
- }
- fx = _mm_cvtsi128_si32(v_fx);
- fy = _mm_cvtsi128_si32(v_fy);
- }
-#endif
- for (; i < len; ++i) {
- int x1 = (fx >> 16);
- int x2;
- int y1 = (fy >> 16);
- int y2;
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
- const uchar *s1 = data->texture.scanLine(y1);
- const uchar *s2 = data->texture.scanLine(y2);
-
- if (layout->bpp == QPixelLayout::BPP32) {
- sbuf1[i * 2 + 0] = ((const uint*)s1)[x1];
- sbuf1[i * 2 + 1] = ((const uint*)s1)[x2];
- sbuf2[i * 2 + 0] = ((const uint*)s2)[x1];
- sbuf2[i * 2 + 1] = ((const uint*)s2)[x2];
-
- } else {
- sbuf1[i * 2 + 0] = fetch(s1, x1);
- sbuf1[i * 2 + 1] = fetch(s1, x2);
- sbuf2[i * 2 + 0] = fetch(s2, x1);
- sbuf2[i * 2 + 1] = fetch(s2, x2);
- }
+ fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
- fx += fdx;
- fy += fdy;
- }
layout->convertToARGB64PM((QRgba64 *)buf1, sbuf1, len * 2, clut, 0);
layout->convertToARGB64PM((QRgba64 *)buf2, sbuf2, len * 2, clut, 0);
for (int i = 0; i < len; ++i) {
- int distx = (fracX & 0x0000ffff);
- int disty = (fracY & 0x0000ffff);
+ int distx = (fx & 0x0000ffff);
+ int disty = (fy & 0x0000ffff);
b[i] = interpolate_4_pixels_rgb64((QRgba64 *)buf1 + i*2, (QRgba64 *)buf2 + i*2, distx, disty);
- fracX += fdx;
- fracY += fdy;
+ fx += fdx;
+ fy += fdy;
}
length -= len;
@@ -3238,6 +3165,12 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
}
}
} else {
+ const QTextureData &image = data->texture;
+
+ const qreal fdx = data->m11;
+ const qreal fdy = data->m12;
+ const qreal fdw = data->m13;
+
qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
@@ -3267,8 +3200,8 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
distxs[i] = int((px - x1) * (1<<16));
distys[i] = int((py - y1) * (1<<16));
- fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2);
- fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
+ fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
const uchar *s1 = data->texture.scanLine(y1);
const uchar *s2 = data->texture.scanLine(y2);