summaryrefslogtreecommitdiffstats
path: root/src/gui/painting/qdrawhelper_sse2.cpp
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2016-10-07 17:03:48 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2016-10-11 07:38:29 +0000
commita6dc28486910bde21d6854c1a64caadb0f663e1c (patch)
treedf4e2b450a9280842c8c39103cef62d5d76b54f1 /src/gui/painting/qdrawhelper_sse2.cpp
parent17ac3b2c146d1f48b88dbdc09927ddc3dd3aef81 (diff)
Avoid auto-vectorization of epilogues of manual vectorization
Defines a structure that tells the compiler in no uncertain terms the maximum number of times a loop can be run. The reduces the size of qdrawhelper_avx2.o from 22kbytes to 11kbytes. Change-Id: Ie3d6281b04b4be3332497c15f3dfe9f185e20507 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/gui/painting/qdrawhelper_sse2.cpp')
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp13
1 files changed, 6 insertions, 7 deletions
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index 03abeed440..5ff08e8153 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -126,9 +126,8 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
_mm_store_si128((__m128i *)&dst[x], result);
}
}
- for (; x<w; ++x) {
+ SIMD_EPILOGUE(x, w, 3)
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
- }
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
@@ -177,7 +176,7 @@ void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uin
}
// 3) Epilogue:
- for (; x < length; ++x)
+ SIMD_EPILOGUE(x, length, 3)
dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
} else {
const int one_minus_const_alpha = 255 - const_alpha;
@@ -201,7 +200,7 @@ void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uin
}
// 3) Epilogue:
- for (; x < length; ++x)
+ SIMD_EPILOGUE(x, length, 3)
dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
}
}
@@ -232,7 +231,7 @@ void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, u
}
// 3) Epilogue
- for (; x < length; ++x)
+ SIMD_EPILOGUE(x, length, 3)
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
}
}
@@ -313,7 +312,7 @@ void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, u
dstVector = _mm_add_epi8(colorVector, dstVector);
_mm_store_si128((__m128i *)&dst[x], dstVector);
}
- for (;x < length; ++x)
+ SIMD_EPILOGUE(x, length, 3)
destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
}
}
@@ -592,7 +591,7 @@ void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
BLEND_SOURCE_OVER_ARGB32_SSE2_helper(dst, srcVector, nullVector, half, one, colorMask, alphaMask);
}
- for (; x<w; x++) {
+ SIMD_EPILOGUE(x, w, 3) {
uint s = src[(basex + x*ix) >> 16];
dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
}