summaryrefslogtreecommitdiffstats
path: root/src/gui
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui')
-rw-r--r--src/gui/image/qimage.cpp10
-rw-r--r--src/gui/image/qimage_conversions.cpp10
-rw-r--r--src/gui/painting/qdrawhelper.cpp321
-rw-r--r--src/gui/painting/qdrawhelper_avx2.cpp56
-rw-r--r--src/gui/painting/qdrawhelper_p.h16
5 files changed, 214 insertions, 199 deletions
diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp
index 5ef65dd0cc..469ae8b97e 100644
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@@ -2582,15 +2582,14 @@ bool QImage::allGray() const
break;
}
- const int buffer_size = 2048;
- uint buffer[buffer_size];
+ uint buffer[BufferSize];
const QPixelLayout *layout = &qPixelLayouts[d->format];
FetchPixelsFunc fetch = qFetchPixels[layout->bpp];
for (int j = 0; j < d->height; ++j) {
const uchar *b = constScanLine(j);
int x = 0;
while (x < d->width) {
- int l = qMin(d->width - x, buffer_size);
+ int l = qMin(d->width - x, BufferSize);
const uint *ptr = fetch(buffer, b, x, l);
ptr = layout->convertToARGB32PM(buffer, ptr, l, 0, 0);
for (int i = 0; i < l; ++i) {
@@ -3218,14 +3217,13 @@ inline void rgbSwapped_generic(int width, int height, const QImage *src, QImage
const uint alphaGreenMask = (((1 << layout->alphaWidth) - 1) << layout->alphaShift)
| (((1 << layout->greenWidth) - 1) << layout->greenShift);
- const int buffer_size = 2048;
- uint buffer[buffer_size];
+ uint buffer[BufferSize];
for (int i = 0; i < height; ++i) {
uchar *q = dst->scanLine(i);
const uchar *p = src->constScanLine(i);
int x = 0;
while (x < width) {
- int l = qMin(width - x, buffer_size);
+ int l = qMin(width - x, BufferSize);
const uint *ptr = fetch(buffer, p, x, l);
for (int j = 0; j < l; ++j) {
uint red = (ptr[j] >> layout->redShift) & redBlueMask;
diff --git a/src/gui/image/qimage_conversions.cpp b/src/gui/image/qimage_conversions.cpp
index 4eef617336..9a8cbf43c9 100644
--- a/src/gui/image/qimage_conversions.cpp
+++ b/src/gui/image/qimage_conversions.cpp
@@ -145,8 +145,7 @@ void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversio
// Cannot be used with indexed formats.
Q_ASSERT(dest->format > QImage::Format_Indexed8);
Q_ASSERT(src->format > QImage::Format_Indexed8);
- const int buffer_size = 2048;
- uint buf[buffer_size];
+ uint buf[BufferSize];
uint *buffer = buf;
const QPixelLayout *srcLayout = &qPixelLayouts[src->format];
const QPixelLayout *destLayout = &qPixelLayouts[dest->format];
@@ -196,7 +195,7 @@ void convert_generic(QImageData *dest, const QImageData *src, Qt::ImageConversio
if (destLayout->bpp == QPixelLayout::BPP32)
buffer = reinterpret_cast<uint *>(destData) + x;
else
- l = qMin(l, buffer_size);
+ l = qMin(l, BufferSize);
const uint *ptr = fetch(buffer, srcData, x, l);
ptr = convertToARGB32PM(buffer, ptr, l, 0, ditherPtr);
ptr = convertFromARGB32PM(buffer, ptr, l, 0, ditherPtr);
@@ -217,8 +216,7 @@ bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::Im
if (data->depth != qt_depthForFormat(dst_format))
return false;
- const int buffer_size = 2048;
- uint buffer[buffer_size];
+ uint buffer[BufferSize];
const QPixelLayout *srcLayout = &qPixelLayouts[data->format];
const QPixelLayout *destLayout = &qPixelLayouts[dst_format];
uchar *srcData = data->data;
@@ -261,7 +259,7 @@ bool convert_generic_inplace(QImageData *data, QImage::Format dst_format, Qt::Im
int x = 0;
while (x < data->width) {
dither.x = x;
- int l = qMin(data->width - x, buffer_size);
+ int l = qMin(data->width - x, BufferSize);
const uint *ptr = fetch(buffer, srcData, x, l);
ptr = convertToARGB32PM(buffer, ptr, l, 0, ditherPtr);
ptr = convertFromARGB32PM(buffer, ptr, l, 0, ditherPtr);
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 23c8e42ded..5ecb2d44a2 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -73,9 +73,6 @@ enum {
half_point = 1 << 15
};
-// must be multiple of 4 for easier SIMD implementations
-static const int buffer_size = 2048;
-
template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth();
template<QImage::Format> Q_DECL_CONSTEXPR uint redShift();
template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth();
@@ -1131,7 +1128,7 @@ static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, in
static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
{
const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
- uint buffer32[buffer_size];
+ uint buffer32[BufferSize];
const uint *ptr = qFetchPixels[layout->bpp](buffer32, rasterBuffer->scanLine(y), x, length);
return const_cast<QRgba64 *>(layout->convertToARGB64PM(buffer, ptr, length, 0, 0));
}
@@ -1304,12 +1301,12 @@ static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y
static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
{
- uint buf[buffer_size];
+ uint buf[BufferSize];
const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
StorePixelsFunc store = qStorePixels[layout->bpp];
uchar *dest = rasterBuffer->scanLine(y);
while (length) {
- int l = qMin(length, buffer_size);
+ int l = qMin(length, BufferSize);
const uint *ptr = 0;
if (!layout->premultiplied && !layout->alphaWidth)
ptr = layout->convertFromRGB32(buf, buffer, l, 0, 0);
@@ -1331,12 +1328,12 @@ static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int len
static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
{
- uint buf[buffer_size];
+ uint buf[BufferSize];
const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
StorePixelsFunc store = qStorePixels[layout->bpp];
uchar *dest = rasterBuffer->scanLine(y);
while (length) {
- int l = qMin(length, buffer_size);
+ int l = qMin(length, BufferSize);
const uint *ptr = 0;
convertFromRgb64(buf, buffer, l);
if (!layout->premultiplied && !layout->alphaWidth)
@@ -1554,7 +1551,7 @@ static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Op
{
const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
if (layout->bpp != QPixelLayout::BPP32) {
- uint buffer32[buffer_size];
+ uint buffer32[BufferSize];
const uint *ptr = qFetchPixels[layout->bpp](buffer32, data->texture.scanLine(y), x, length);
return layout->convertToARGB64PM(buffer, ptr, length, data->texture.colorTable, 0);
} else {
@@ -1673,7 +1670,7 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
const QVector<QRgb> *clut = data->texture.colorTable;
- uint buffer32[buffer_size];
+ uint buffer32[BufferSize];
QRgba64 *b = buffer;
if (data->fast_matrix) {
// The increment pr x in the scanline
@@ -1687,9 +1684,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper
int i = 0, j = 0;
while (i < length) {
- if (j == buffer_size) {
- layout->convertToARGB64PM(b, buffer32, buffer_size, clut, 0);
- b += buffer_size;
+ if (j == BufferSize) {
+ layout->convertToARGB64PM(b, buffer32, BufferSize, clut, 0);
+ b += BufferSize;
j = 0;
}
int px = fx >> 16;
@@ -1725,9 +1722,9 @@ static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Oper
int i = 0, j = 0;
while (i < length) {
- if (j == buffer_size) {
- layout->convertToARGB64PM(b, buffer32, buffer_size, clut, 0);
- b += buffer_size;
+ if (j == BufferSize) {
+ layout->convertToARGB64PM(b, buffer32, BufferSize, clut, 0);
+ b += BufferSize;
j = 0;
}
const qreal iw = fw == 0 ? 1 : 1 / fw;
@@ -1921,7 +1918,7 @@ inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int,
}
enum FastTransformTypes {
- SimpleUpscaleTransform,
+ SimpleScaleTransform,
UpscaleTransform,
DownscaleTransform,
RotateTransform,
@@ -1929,11 +1926,38 @@ enum FastTransformTypes {
NFastTransformTypes
};
+// Completes the partial interpolation stored in IntermediateBuffer.
+// by performing the x-axis interpolation and joining the RB and AG buffers.
+static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
+{
+#if defined(QT_COMPILER_SUPPORTS_AVX2)
+ extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
+ if (qCpuHasFeature(AVX2))
+ return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
+#endif
+
+ // Switch to intermediate buffer coordinates
+ fx -= offset * fixed_scale;
+
+ while (b < end) {
+ const int x = (fx >> 16);
+
+ const uint distx = (fx & 0x0000ffff) >> 8;
+ const uint idistx = 256 - distx;
+ const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00;
+ const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00;
+ *b = (rb >> 8) | ag;
+ b++;
+ fx += fdx;
+ }
+ fx += offset * fixed_scale;
+}
+
typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy);
template<TextureBlendType blendType>
-static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(uint *b, uint *end, const QTextureData &image,
- int &fx, int &fy, int fdx, int /*fdy*/)
+static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/)
{
int y1 = (fy >> 16);
int y2;
@@ -1950,16 +1974,12 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u
const int offset = (fx + adjust) >> 16;
int x = offset;
- // The idea is first to do the interpolation between the row s1 and the row s2
- // into an intermediate buffer, then we interpolate between two pixel of this buffer.
-
- // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB
- // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG
- // +1 for the last pixel to interpolate with, and +1 for rounding errors.
- quint32 intermediate_buffer[2][buffer_size + 2];
- // count is the size used in the intermediate_buffer.
+ IntermediateBuffer intermediate;
+ // count is the size used in the intermediate.buffer.
int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2;
- Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
+ // length is supposed to be <= BufferSize either because data->m11 < 1 or
+ // data->m11 < 2, and any larger buffers split
+ Q_ASSERT(count <= BufferSize + 2);
int f = 0;
int lim = count;
if (blendType == BlendTransformedBilinearTiled) {
@@ -1974,8 +1994,8 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u
quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
do {
- intermediate_buffer[0][f] = rb;
- intermediate_buffer[1][f] = ag;
+ intermediate.buffer_rb[f] = rb;
+ intermediate.buffer_ag[f] = ag;
f++;
x++;
} while (x < image.x1 && f < lim);
@@ -2008,10 +2028,10 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u
// Add the values, and shift to only keep 8 significant bits per colors
__m128i rAG =_mm_add_epi16(topAG, bottomAG);
rAG = _mm_srli_epi16(rAG, 8);
- _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG);
+ _mm_storeu_si128((__m128i*)(&intermediate.buffer_ag[f]), rAG);
__m128i rRB =_mm_add_epi16(topRB, bottomRB);
rRB = _mm_srli_epi16(rRB, 8);
- _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB);
+ _mm_storeu_si128((__m128i*)(&intermediate.buffer_rb[f]), rRB);
}
#elif defined(__ARM_NEON__)
const int16x8_t disty_ = vdupq_n_s16(disty);
@@ -2038,10 +2058,10 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u
// Add the values, and shift to only keep 8 significant bits per colors
int16x8_t rAG = vaddq_s16(topAG, bottomAG);
rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
- vst1q_s16((int16_t*)(&intermediate_buffer[1][f]), rAG);
+ vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
int16x8_t rRB = vaddq_s16(topRB, bottomRB);
rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
- vst1q_s16((int16_t*)(&intermediate_buffer[0][f]), rRB);
+ vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
}
#endif
}
@@ -2055,28 +2075,13 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper(u
uint t = s1[x];
uint b = s2[x];
- intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
- intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
x++;
}
- // Now interpolate the values from the intermediate_buffer to get the final result.
- fx -= offset * fixed_scale; // Switch to intermediate buffer coordinates
-
- while (b < end) {
- int x1 = (fx >> 16);
- int x2 = x1 + 1;
- Q_ASSERT(x1 >= 0);
- Q_ASSERT(x2 < count);
-
- int distx = (fx & 0x0000ffff) >> 8;
- int idistx = 256 - distx;
- int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff;
- int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00;
- *b = rb | ag;
- b++;
- fx += fdx;
- }
+ // Now interpolate the values from the intermediate.buffer to get the final result.
+ intermediate_adder(b, end, intermediate, offset, fx, fdx);
}
template<TextureBlendType blendType>
@@ -2552,14 +2557,14 @@ static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint
static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = {
{
- fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinear>,
+ fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
},
{
- fetchTransformedBilinearARGB32PM_simple_upscale_helper<BlendTransformedBilinearTiled>,
+ fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
@@ -2594,7 +2599,13 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
if (fdy == 0) { // simple scale, no rotation or shear
if (qAbs(fdx) <= fixed_scale) {
// simple scale up on X
- bilinearFastTransformHelperARGB32PM[tiled][SimpleUpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
+ bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
+ } else if (qAbs(fdx) <= 2 * fixed_scale) {
+ // simple scale down on X, less than 2x
+ const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
+ bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
+ if (mid != length)
+ bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
} else if (qAbs(data->m22) < qreal(1./8.)) {
// scale up more than 8x (on Y)
bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
@@ -2663,8 +2674,8 @@ static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, c
}
template<TextureBlendType blendType, QPixelLayout::BPP bpp>
-static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b, uint *end, const QTextureData &image,
- int &fx, int &fy, int fdx, int /*fdy*/)
+static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/)
{
const QPixelLayout *layout = &qPixelLayouts[image.format];
const QVector<QRgb> *clut = image.colorTable;
@@ -2688,16 +2699,14 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b,
const int offset = (fx + adjust) >> 16;
int x = offset;
- // The idea is first to do the interpolation between the row s1 and the row s2
- // into an intermediate buffer, then we interpolate between two pixel of this buffer.
- // +1 for the last pixel to interpolate with, and +1 for rounding errors.
- uint buf1[buffer_size + 2];
- uint buf2[buffer_size + 2];
+ IntermediateBuffer intermediate;
+ uint *buf1 = intermediate.buffer_rb;
+ uint *buf2 = intermediate.buffer_ag;
const uint *ptr1;
const uint *ptr2;
int count = (qint64(length) * qAbs(fdx) + fixed_scale - 1) / fixed_scale + 2;
- Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
+ Q_ASSERT(count <= BufferSize + 2);
if (blendType == BlendTransformedBilinearTiled) {
x %= image.width;
@@ -2729,6 +2738,7 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b,
buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
}
}
+ // Generate the rest by repeatedly repeating the previous set of pixels
for (int i = image.width; i < count; ++i) {
buf1[i] = buf1[i - image.width];
buf2[i] = buf2[i - image.width];
@@ -2761,22 +2771,8 @@ static void QT_FASTCALL fetchTransformedBilinear_simple_upscale_helper(uint *b,
}
}
- // Now interpolate the values from the intermediate_buffer to get the final result.
- fx -= offset * fixed_scale; // Switch to intermediate buffer coordinates
-
- while (b < end) {
- int x1 = (fx >> 16);
- int x2 = x1 + 1;
- Q_ASSERT(x1 >= 0);
- Q_ASSERT(x2 < count);
-
- int distx = (fx & 0x0000ffff) >> 8;
- int idistx = 256 - distx;
- int rb = ((buf1[x1] * idistx + buf1[x2] * distx) >> 8) & 0xff00ff;
- int ag = (buf2[x1] * idistx + buf2[x2] * distx) & 0xff00ff00;
- *b++ = rb | ag;
- fx += fdx;
- }
+ // Now interpolate the values from the intermediate.buffer to get the final result.
+ intermediate_adder(b, end, intermediate, offset, fx, fdx);
}
@@ -2926,15 +2922,20 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
if (fdy == 0) { // simple scale, no rotation or shear
if (qAbs(fdx) <= fixed_scale) { // scale up on X
- fetchTransformedBilinear_simple_upscale_helper<blendType, bpp>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
+ fetchTransformedBilinear_simple_scale_helper<blendType, bpp>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
+ } else if (qAbs(fdx) <= 2 * fixed_scale) { // scale down on X less than 2x
+ const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
+ fetchTransformedBilinear_simple_scale_helper<blendType, bpp>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
+ if (mid != length)
+ fetchTransformedBilinear_simple_scale_helper<blendType, bpp>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
} else {
const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
- uint buf1[buffer_size];
- uint buf2[buffer_size];
+ uint buf1[BufferSize];
+ uint buf2[BufferSize];
uint *b = buffer;
while (length) {
- int len = qMin(length, buffer_size / 2);
+ int len = qMin(length, BufferSize / 2);
fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
@@ -2965,11 +2966,11 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
} else { // rotation or shear
const BilinearFastTransformFetcher fetcher = fetchTransformedBilinear_fetcher<blendType,bpp>;
- uint buf1[buffer_size];
- uint buf2[buffer_size];
+ uint buf1[BufferSize];
+ uint buf2[BufferSize];
uint *b = buffer;
while (length) {
- int len = qMin(length, buffer_size / 2);
+ int len = qMin(length, BufferSize / 2);
fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
layout->convertToARGB32PM(buf1, buf1, len * 2, clut, 0);
layout->convertToARGB32PM(buf2, buf2, len * 2, clut, 0);
@@ -3019,15 +3020,15 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
- uint buf1[buffer_size];
- uint buf2[buffer_size];
+ uint buf1[BufferSize];
+ uint buf2[BufferSize];
uint *b = buffer;
- int distxs[buffer_size / 2];
- int distys[buffer_size / 2];
+ int distxs[BufferSize / 2];
+ int distys[BufferSize / 2];
while (length) {
- int len = qMin(length, buffer_size / 2);
+ int len = qMin(length, BufferSize / 2);
for (int i = 0; i < len; ++i) {
const qreal iw = fw == 0 ? 1 : 1 / fw;
const qreal px = fx * iw - qreal(0.5);
@@ -3104,13 +3105,13 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
if (fdy == 0) { //simple scale, no rotation
- uint sbuf1[buffer_size];
- uint sbuf2[buffer_size];
- quint64 buf1[buffer_size];
- quint64 buf2[buffer_size];
+ uint sbuf1[BufferSize];
+ uint sbuf2[BufferSize];
+ quint64 buf1[BufferSize];
+ quint64 buf2[BufferSize];
QRgba64 *b = buffer;
while (length) {
- int len = qMin(length, buffer_size / 2);
+ int len = qMin(length, BufferSize / 2);
int disty = (fy & 0x0000ffff);
#if defined(__SSE2__)
const __m128i vdy = _mm_set1_epi16(disty);
@@ -3148,15 +3149,15 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
b += len;
}
} else { //rotation
- uint sbuf1[buffer_size];
- uint sbuf2[buffer_size];
- quint64 buf1[buffer_size];
- quint64 buf2[buffer_size];
+ uint sbuf1[BufferSize];
+ uint sbuf2[BufferSize];
+ quint64 buf1[BufferSize];
+ quint64 buf2[BufferSize];
QRgba64 *end = buffer + length;
QRgba64 *b = buffer;
while (b < end) {
- int len = qMin(length, buffer_size / 2);
+ int len = qMin(length, BufferSize / 2);
fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
@@ -3187,17 +3188,17 @@ static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, co
qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
FetchPixelFunc fetch = qFetchPixel[layout->bpp];
- uint sbuf1[buffer_size];
- uint sbuf2[buffer_size];
- quint64 buf1[buffer_size];
- quint64 buf2[buffer_size];
+ uint sbuf1[BufferSize];
+ uint sbuf2[BufferSize];
+ quint64 buf1[BufferSize];
+ quint64 buf2[BufferSize];
QRgba64 *b = buffer;
- int distxs[buffer_size / 2];
- int distys[buffer_size / 2];
+ int distxs[BufferSize / 2];
+ int distys[BufferSize / 2];
while (length) {
- int len = qMin(length, buffer_size / 2);
+ int len = qMin(length, BufferSize / 2);
for (int i = 0; i < len; ++i) {
const qreal iw = fw == 0 ? 1 : 1 / fw;
const qreal px = fx * iw - qreal(0.5);
@@ -3727,7 +3728,7 @@ static
void blend_color_generic(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
- uint buffer[buffer_size];
+ uint buffer[BufferSize];
Operator op = getOperator(data, spans, count);
const uint color = data->solid.color.toArgb32();
@@ -3735,7 +3736,7 @@ void blend_color_generic(int count, const QSpan *spans, void *userData)
int x = spans->x;
int length = spans->len;
while (length) {
- int l = qMin(buffer_size, length);
+ int l = qMin(BufferSize, length);
uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
op.funcSolid(dest, l, color, spans->coverage);
if (op.destStore)
@@ -3787,14 +3788,14 @@ void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
return blend_color_generic(count, spans, userData);
}
- quint64 buffer[buffer_size];
+ quint64 buffer[BufferSize];
const QRgba64 color = data->solid.color;
while (count--) {
int x = spans->x;
int length = spans->len;
while (length) {
- int l = qMin(buffer_size, length);
+ int l = qMin(BufferSize, length);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.funcSolid64(dest, l, color, spans->coverage);
op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
@@ -3899,7 +3900,7 @@ void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handle
int length = right - x;
while (length) {
- int l = qMin(buffer_size, length);
+ int l = qMin(BufferSize, length);
length -= l;
int process_length = l;
@@ -3946,8 +3947,8 @@ struct QBlendBase
BlendType *dest;
- BlendType buffer[buffer_size];
- BlendType src_buffer[buffer_size];
+ BlendType buffer[BufferSize];
+ BlendType src_buffer[BufferSize];
};
class BlendSrcGeneric : public QBlendBase<uint>
@@ -4031,8 +4032,8 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
- uint buffer[buffer_size];
- uint src_buffer[buffer_size];
+ uint buffer[BufferSize];
+ uint src_buffer[BufferSize];
Operator op = getOperator(data, spans, count);
const int image_width = data->texture.width;
@@ -4056,7 +4057,7 @@ static void blend_untransformed_generic(int count, const QSpan *spans, void *use
if (length > 0) {
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
- int l = qMin(buffer_size, length);
+ int l = qMin(BufferSize, length);
const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
op.func(dest, src, l, coverage);
@@ -4081,8 +4082,8 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
return blend_untransformed_generic(count, spans, userData);
}
- quint64 buffer[buffer_size];
- quint64 src_buffer[buffer_size];
+ quint64 buffer[BufferSize];
+ quint64 src_buffer[BufferSize];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
@@ -4105,7 +4106,7 @@ static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, voi
if (length > 0) {
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
- int l = qMin(buffer_size, length);
+ int l = qMin(BufferSize, length);
const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.func64(dest, src, l, coverage);
@@ -4269,8 +4270,8 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
{
QSpanData *data = reinterpret_cast<QSpanData *>(userData);
- uint buffer[buffer_size];
- uint src_buffer[buffer_size];
+ uint buffer[BufferSize];
+ uint src_buffer[BufferSize];
Operator op = getOperator(data, spans, count);
const int image_width = data->texture.width;
@@ -4296,8 +4297,8 @@ static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
int l = qMin(image_width - sx, length);
- if (buffer_size < l)
- l = buffer_size;
+ if (BufferSize < l)
+ l = BufferSize;
const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
uint *dest = op.destFetch ? op.destFetch(buffer, data->rasterBuffer, x, spans->y, l) : buffer;
op.func(dest, src, l, coverage);
@@ -4322,8 +4323,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
return blend_tiled_generic(count, spans, userData);
}
- quint64 buffer[buffer_size];
- quint64 src_buffer[buffer_size];
+ quint64 buffer[BufferSize];
+ quint64 src_buffer[BufferSize];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
@@ -4348,8 +4349,8 @@ static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userD
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
int l = qMin(image_width - sx, length);
- if (buffer_size < l)
- l = buffer_size;
+ if (BufferSize < l)
+ l = BufferSize;
const QRgba64 *src = op.srcFetch64((QRgba64 *)src_buffer, &op, data, sy, sx, l);
QRgba64 *dest = op.destFetch64((QRgba64 *)buffer, data->rasterBuffer, x, spans->y, l);
op.func64(dest, src, l, coverage);
@@ -4398,8 +4399,8 @@ static void blend_tiled_argb(int count, const QSpan *spans, void *userData)
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
int l = qMin(image_width - sx, length);
- if (buffer_size < l)
- l = buffer_size;
+ if (BufferSize < l)
+ l = BufferSize;
const uint *src = (const uint *)data->texture.scanLine(sy) + sx;
uint *dest = ((uint *)data->rasterBuffer->scanLine(spans->y)) + x;
op.func(dest, src, l, coverage);
@@ -4458,8 +4459,8 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
int tx = x;
while (length) {
int l = qMin(image_width - sx, length);
- if (buffer_size < l)
- l = buffer_size;
+ if (BufferSize < l)
+ l = BufferSize;
quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + tx;
const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
memcpy(dest, src, l * sizeof(quint16));
@@ -4494,8 +4495,8 @@ static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
if (alpha > 0) {
while (length) {
int l = qMin(image_width - sx, length);
- if (buffer_size < l)
- l = buffer_size;
+ if (BufferSize < l)
+ l = BufferSize;
quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(spans->y)) + x;
const quint16 *src = (const quint16 *)data->texture.scanLine(sy) + sx;
blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha);
@@ -4524,7 +4525,7 @@ static void blend_transformed_bilinear_rgb565(int count, const QSpan *spans, voi
return;
}
- quint16 buffer[buffer_size];
+ quint16 buffer[BufferSize];
const int src_minx = data->texture.x1;
const int src_miny = data->texture.y1;
@@ -4561,7 +4562,7 @@ static void blend_transformed_bilinear_rgb565(int count, const QSpan *spans, voi
l = length;
b = dest;
} else {
- l = qMin(length, buffer_size);
+ l = qMin(length, BufferSize);
b = buffer;
}
const quint16 *end = b + l;
@@ -4644,7 +4645,7 @@ static void blend_transformed_bilinear_rgb565(int count, const QSpan *spans, voi
l = length;
b = dest;
} else {
- l = qMin(length, buffer_size);
+ l = qMin(length, BufferSize);
b = buffer;
}
const quint16 *end = b + l;
@@ -4714,7 +4715,7 @@ static void blend_transformed_argb(int count, const QSpan *spans, void *userData
}
CompositionFunction func = functionForMode[data->rasterBuffer->compositionMode];
- uint buffer[buffer_size];
+ uint buffer[BufferSize];
quint32 mask = (data->texture.format == QImage::Format_RGB32) ? 0xff000000 : 0;
const int image_x1 = data->texture.x1;
@@ -4743,7 +4744,7 @@ static void blend_transformed_argb(int count, const QSpan *spans, void *userData
int length = spans->len;
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
- int l = qMin(length, buffer_size);
+ int l = qMin(length, BufferSize);
const uint *end = buffer + l;
uint *b = buffer;
while (b < end) {
@@ -4780,7 +4781,7 @@ static void blend_transformed_argb(int count, const QSpan *spans, void *userData
int length = spans->len;
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
while (length) {
- int l = qMin(length, buffer_size);
+ int l = qMin(length, BufferSize);
const uint *end = buffer + l;
uint *b = buffer;
while (b < end) {
@@ -4819,7 +4820,7 @@ static void blend_transformed_rgb565(int count, const QSpan *spans, void *userDa
return;
}
- quint16 buffer[buffer_size];
+ quint16 buffer[BufferSize];
const int image_x1 = data->texture.x1;
const int image_y1 = data->texture.y1;
const int image_x2 = data->texture.x2 - 1;
@@ -4855,7 +4856,7 @@ static void blend_transformed_rgb565(int count, const QSpan *spans, void *userDa
l = length;
b = dest;
} else {
- l = qMin(length, buffer_size);
+ l = qMin(length, BufferSize);
b = buffer;
}
const quint16 *end = b + l;
@@ -4910,7 +4911,7 @@ static void blend_transformed_rgb565(int count, const QSpan *spans, void *userDa
l = length;
b = dest;
} else {
- l = qMin(length, buffer_size);
+ l = qMin(length, BufferSize);
b = buffer;
}
const quint16 *end = b + l;
@@ -4952,7 +4953,7 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us
}
CompositionFunction func = functionForMode[data->rasterBuffer->compositionMode];
- uint buffer[buffer_size];
+ uint buffer[BufferSize];
int image_width = data->texture.width;
int image_height = data->texture.height;
@@ -4980,7 +4981,7 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
int length = spans->len;
while (length) {
- int l = qMin(length, buffer_size);
+ int l = qMin(length, BufferSize);
const uint *end = buffer + l;
uint *b = buffer;
int px16 = x % (image_width << 16);
@@ -5034,7 +5035,7 @@ static void blend_transformed_tiled_argb(int count, const QSpan *spans, void *us
const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
int length = spans->len;
while (length) {
- int l = qMin(length, buffer_size);
+ int l = qMin(length, BufferSize);
const uint *end = buffer + l;
uint *b = buffer;
while (b < end) {
@@ -5085,7 +5086,7 @@ static void blend_transformed_tiled_rgb565(int count, const QSpan *spans, void *
return;
}
- quint16 buffer[buffer_size];
+ quint16 buffer[BufferSize];
const int image_width = data->texture.width;
const int image_height = data->texture.height;
@@ -5119,7 +5120,7 @@ static void blend_transformed_tiled_rgb565(int count, const QSpan *spans, void *
l = length;
b = dest;
} else {
- l = qMin(length, buffer_size);
+ l = qMin(length, BufferSize);
b = buffer;
}
const quint16 *end = b + l;
@@ -5179,7 +5180,7 @@ static void blend_transformed_tiled_rgb565(int count, const QSpan *spans, void *
l = length;
b = dest;
} else {
- l = qMin(length, buffer_size);
+ l = qMin(length, BufferSize);
b = buffer;
}
const quint16 *end = b + l;
@@ -5533,7 +5534,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied();
}
- quint64 buffer[buffer_size];
+ quint64 buffer[BufferSize];
const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
@@ -5542,7 +5543,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
int i = x;
int length = mapWidth;
while (length > 0) {
- int l = qMin(buffer_size, length);
+ int l = qMin(BufferSize, length);
QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l);
for (int j=0; j < l; ++j) {
const int coverage = map[j + (i - x)];
@@ -5571,7 +5572,7 @@ static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
int end = qMin<int>(x + mapWidth, clip.x + clip.len);
if (end <= start)
continue;
- Q_ASSERT(end - start <= buffer_size);
+ Q_ASSERT(end - start <= BufferSize);
QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, end - start);
for (int xp=start; xp<end; ++xp) {
@@ -5808,7 +5809,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
srcColor = colorProfile->toLinear(srcColor.unpremultiplied()).premultiplied();
}
- quint64 buffer[buffer_size];
+ quint64 buffer[BufferSize];
const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
@@ -5817,7 +5818,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
int i = x;
int length = mapWidth;
while (length > 0) {
- int l = qMin(buffer_size, length);
+ int l = qMin(BufferSize, length);
QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, i, y + ly, l);
for (int j=0; j < l; ++j) {
const uint coverage = src[j + (i - x)];
@@ -5846,7 +5847,7 @@ static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
int end = qMin<int>(x + mapWidth, clip.x + clip.len);
if (end <= start)
continue;
- Q_ASSERT(end - start <= buffer_size);
+ Q_ASSERT(end - start <= BufferSize);
QRgba64 *dest = destFetch64((QRgba64*)buffer, rasterBuffer, start, clip.y, end - start);
for (int xp=start; xp<end; ++xp) {
@@ -6409,14 +6410,14 @@ static void qInitDrawhelperFunctions()
qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
- extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
- int &fx, int &fy, int fdx, int /*fdy*/);
+ extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/);
extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
int &fx, int &fy, int fdx, int /*fdy*/);
extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image,
int &fx, int &fy, int fdx, int fdy);
- bilinearFastTransformHelperARGB32PM[0][SimpleUpscaleTransform] = fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2;
+ bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
}
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp
index 3a70524a9d..1c225b4568 100644
--- a/src/gui/painting/qdrawhelper_avx2.cpp
+++ b/src/gui/painting/qdrawhelper_avx2.cpp
@@ -45,8 +45,6 @@
QT_BEGIN_NAMESPACE
-static Q_CONSTEXPR int BufferSize = 2048;
-
enum {
FixedScale = 1 << 16,
HalfPoint = 1 << 15
@@ -576,8 +574,10 @@ inline void fetchTransformedBilinear_pixelBounds(int, int l1, int l2, int &v1, i
Q_ASSERT(v2 >= l1 && v2 <= l2);
}
-void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
- int &fx, int &fy, int fdx, int /*fdy*/)
+void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
+
+void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image,
+ int &fx, int &fy, int fdx, int /*fdy*/)
{
int y1 = (fy >> 16);
int y2;
@@ -594,16 +594,12 @@ void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uin
const int offset = (fx + adjust) >> 16;
int x = offset;
- // The idea is first to do the interpolation between the row s1 and the row s2
- // into an intermediate buffer, then we interpolate between two pixel of this buffer.
-
- // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB
- // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG
- // +1 for the last pixel to interpolate with, and +1 for rounding errors.
- quint32 intermediate_buffer[2][BufferSize + 2];
+ IntermediateBuffer intermediate;
// count is the size used in the intermediate_buffer.
int count = (qint64(length) * qAbs(fdx) + FixedScale - 1) / FixedScale + 2;
- Q_ASSERT(count <= BufferSize + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case
+ // length is supposed to be <= BufferSize either because data->m11 < 1 or
+ // data->m11 < 2, and any larger buffers split
+ Q_ASSERT(count <= BufferSize + 2);
int f = 0;
int lim = qMin(count, image.x2 - x);
if (x < image.x1) {
@@ -613,8 +609,8 @@ void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uin
quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
do {
- intermediate_buffer[0][f] = rb;
- intermediate_buffer[1][f] = ag;
+ intermediate.buffer_rb[f] = rb;
+ intermediate.buffer_ag[f] = ag;
f++;
x++;
} while (x < image.x1 && f < lim);
@@ -644,10 +640,10 @@ void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uin
// Add the values, and shift to only keep 8 significant bits per colors
__m256i rAG =_mm256_add_epi16(topAG, bottomAG);
rAG = _mm256_srli_epi16(rAG, 8);
- _mm256_storeu_si256((__m256i*)(&intermediate_buffer[1][f]), rAG);
+ _mm256_storeu_si256((__m256i*)(&intermediate.buffer_ag[f]), rAG);
__m256i rRB =_mm256_add_epi16(topRB, bottomRB);
rRB = _mm256_srli_epi16(rRB, 8);
- _mm256_storeu_si256((__m256i*)(&intermediate_buffer[0][f]), rRB);
+ _mm256_storeu_si256((__m256i*)(&intermediate.buffer_rb[f]), rRB);
}
for (; f < count; f++) { // Same as above but without simd
@@ -656,11 +652,17 @@ void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uin
uint t = s1[x];
uint b = s2[x];
- intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
- intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
+ intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
x++;
}
+
// Now interpolate the values from the intermediate_buffer to get the final result.
+ intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
+}
+
+void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
+{
fx -= offset * FixedScale;
const __m128i v_fdx = _mm_set1_epi32(fdx * 4);
@@ -669,8 +671,8 @@ void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uin
while (b < end - 3) {
const __m128i offset = _mm_srli_epi32(v_fx, 16);
- __m256i vrb = _mm256_i32gather_epi64((const long long *)intermediate_buffer[0], offset, 4);
- __m256i vag = _mm256_i32gather_epi64((const long long *)intermediate_buffer[1], offset, 4);
+ __m256i vrb = _mm256_i32gather_epi64((const long long *)intermediate.buffer_rb, offset, 4);
+ __m256i vag = _mm256_i32gather_epi64((const long long *)intermediate.buffer_ag, offset, 4);
__m128i vdx = _mm_and_si128(v_fx, _mm_set1_epi32(0x0000ffff));
vdx = _mm_srli_epi16(vdx, 8);
@@ -695,17 +697,17 @@ void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_upscale_helper_avx2(uin
v_fx = _mm_add_epi32(v_fx, v_fdx);
}
while (b < end) {
- int x = (fx >> 16);
-
- uint distx = (fx & 0x0000ffff) >> 8;
- uint idistx = 256 - distx;
+ const int x = (fx >> 16);
- uint rb = ((intermediate_buffer[0][x] * idistx + intermediate_buffer[0][x + 1] * distx) >> 8) & 0xff00ff;
- uint ag = (intermediate_buffer[1][x] * idistx + intermediate_buffer[1][x + 1] * distx) & 0xff00ff00;
- *b = rb | ag;
+ const uint distx = (fx & 0x0000ffff) >> 8;
+ const uint idistx = 256 - distx;
+ const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00;
+ const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00;
+ *b = (rb >> 8) | ag;
b++;
fx += fdx;
}
+ fx += offset * FixedScale;
}
void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index ebf215a3eb..bf395ca595 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -1191,6 +1191,22 @@ inline uint comp_func_Plus_one_pixel(uint d, const uint s)
#undef MIX
#undef AMIX
+// must be multiple of 4 for easier SIMD implementations
+static Q_CONSTEXPR int BufferSize = 2048;
+
+// A buffer of intermediate results used by simple bilinear scaling.
+struct IntermediateBuffer
+{
+ // The idea is first to do the interpolation between the row s1 and the row s2
+ // into this intermediate buffer, then later interpolate between two pixel of this buffer.
+ //
+ // buffer_rb is a buffer of red-blue component of the pixel, in the form 0x00RR00BB
+ // buffer_ag is the alpha-green component of the pixel, in the form 0x00AA00GG
+ // +1 for the last pixel to interpolate with, and +1 for rounding errors.
+ quint32 buffer_rb[BufferSize+2];
+ quint32 buffer_ag[BufferSize+2];
+};
+
struct QDitherInfo {
int x;
int y;