diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2016-10-07 17:03:48 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2016-10-11 07:38:29 +0000 |
commit | a6dc28486910bde21d6854c1a64caadb0f663e1c (patch) | |
tree | df4e2b450a9280842c8c39103cef62d5d76b54f1 /src/gui/painting/qdrawhelper_avx2.cpp | |
parent | 17ac3b2c146d1f48b88dbdc09927ddc3dd3aef81 (diff) |
Avoid auto-vectorization of epilogues of manual vectorization
Defines a structure that tells the compiler in no uncertain terms the
maximum number of times a loop can be run.
The reduces the size of qdrawhelper_avx2.o from 22kbytes to 11kbytes.
Change-Id: Ie3d6281b04b4be3332497c15f3dfe9f185e20507
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/gui/painting/qdrawhelper_avx2.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper_avx2.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/gui/painting/qdrawhelper_avx2.cpp b/src/gui/painting/qdrawhelper_avx2.cpp index b3fa380dc0..acc9bc7ba1 100644 --- a/src/gui/painting/qdrawhelper_avx2.cpp +++ b/src/gui/painting/qdrawhelper_avx2.cpp @@ -201,7 +201,7 @@ inline static void BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_AVX2(quint32 *dst, _mm256_store_si256((__m256i *)&dst[x], dstVector); } } - for (; x < length; ++x) + SIMD_EPILOGUE(x, length, 7) blend_pixel(dst[x], src[x], const_alpha); } @@ -275,7 +275,7 @@ void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl, } // 3) Epilogue - for (; x < w; ++x) + SIMD_EPILOGUE(x, w, 7) dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); srcPixels += sbpl; @@ -322,7 +322,7 @@ void QT_FASTCALL comp_func_Source_avx2(uint *dst, const uint *src, int length, u } // 3) Epilogue - for (; x < length; ++x) + SIMD_EPILOGUE(x, length, 7) dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); } } @@ -353,7 +353,7 @@ void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint *destPixels, int length, u dstVector = _mm256_add_epi8(colorVector, dstVector); _mm256_store_si256((__m256i *)&dst[x], dstVector); } - for (; x < length; ++x) + SIMD_EPILOGUE(x, length, 7) destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); } } |