diff options
Diffstat (limited to 'src/gui/painting/qcolortransform.cpp')
-rw-r--r-- | src/gui/painting/qcolortransform.cpp | 1275 |
1 files changed, 945 insertions, 330 deletions
diff --git a/src/gui/painting/qcolortransform.cpp b/src/gui/painting/qcolortransform.cpp index 579d99d09c..52fbebfa5d 100644 --- a/src/gui/painting/qcolortransform.cpp +++ b/src/gui/painting/qcolortransform.cpp @@ -1,9 +1,11 @@ -// Copyright (C) 2022 The Qt Company Ltd. +// Copyright (C) 2024 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qcolortransform.h" #include "qcolortransform_p.h" +#include "qcmyk_p.h" +#include "qcolorclut_p.h" #include "qcolormatrix_p.h" #include "qcolorspace_p.h" #include "qcolortrc_p.h" @@ -20,16 +22,6 @@ QT_BEGIN_NAMESPACE -std::shared_ptr<QColorTrcLut> lutFromTrc(const QColorTrc &trc) -{ - if (trc.m_type == QColorTrc::Type::Table) - return QColorTrcLut::fromTransferTable(trc.m_table); - if (trc.m_type == QColorTrc::Type::Function) - return QColorTrcLut::fromTransferFunction(trc.m_fun); - qWarning() << "TRC uninitialized"; - return nullptr; -} - void QColorTransformPrivate::updateLutsIn() const { if (colorSpaceIn->lut.generated.loadAcquire()) @@ -44,12 +36,12 @@ void QColorTransformPrivate::updateLutsIn() const } if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) { - colorSpaceIn->lut[0] = lutFromTrc(colorSpaceIn->trc[0]); + colorSpaceIn->lut[0] = QColorTrcLut::fromTrc(colorSpaceIn->trc[0]); colorSpaceIn->lut[1] = colorSpaceIn->lut[0]; colorSpaceIn->lut[2] = colorSpaceIn->lut[0]; } else { for (int i = 0; i < 3; ++i) - colorSpaceIn->lut[i] = lutFromTrc(colorSpaceIn->trc[i]); + colorSpaceIn->lut[i] = QColorTrcLut::fromTrc(colorSpaceIn->trc[i]); } colorSpaceIn->lut.generated.storeRelease(1); @@ -68,12 +60,12 @@ void QColorTransformPrivate::updateLutsOut() const } if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) { - colorSpaceOut->lut[0] = lutFromTrc(colorSpaceOut->trc[0]); + colorSpaceOut->lut[0] = QColorTrcLut::fromTrc(colorSpaceOut->trc[0]); colorSpaceOut->lut[1] = colorSpaceOut->lut[0]; colorSpaceOut->lut[2] = colorSpaceOut->lut[0]; } else { for (int i = 0; i < 3; ++i) - colorSpaceOut->lut[i] = lutFromTrc(colorSpaceOut->trc[i]); + colorSpaceOut->lut[i] = QColorTrcLut::fromTrc(colorSpaceOut->trc[i]); } colorSpaceOut->lut.generated.storeRelease(1); @@ -139,11 +131,31 @@ bool QColorTransform::compare(const QColorTransform &other) const return false; if (bool(d->colorSpaceOut) != bool(other.d->colorSpaceOut)) return false; - for (int i = 0; i < 3; ++i) { - if (d->colorSpaceIn && d->colorSpaceIn->trc[i] != other.d->colorSpaceIn->trc[i]) + if (d->colorSpaceIn) { + if (d->colorSpaceIn->transformModel != other.d->colorSpaceIn->transformModel) return false; - if (d->colorSpaceOut && d->colorSpaceOut->trc[i] != other.d->colorSpaceOut->trc[i]) + if (d->colorSpaceIn->isThreeComponentMatrix()) { + for (int i = 0; i < 3; ++i) { + if (d->colorSpaceIn && d->colorSpaceIn->trc[i] != other.d->colorSpaceIn->trc[i]) + return false; + } + } else { + if (!d->colorSpaceIn->equals(other.d->colorSpaceIn.constData())) + return false; + } + } + if (d->colorSpaceOut) { + if (d->colorSpaceOut->transformModel != other.d->colorSpaceOut->transformModel) return false; + if (d->colorSpaceOut->isThreeComponentMatrix()) { + for (int i = 0; i < 3; ++i) { + if (d->colorSpaceOut && d->colorSpaceOut->trc[i] != other.d->colorSpaceOut->trc[i]) + return false; + } + } else { + if (!d->colorSpaceOut->equals(other.d->colorSpaceOut.constData())) + return false; + } } return true; } @@ -159,29 +171,7 @@ QRgb QColorTransform::map(QRgb argb) const return argb; constexpr float f = 1.0f / 255.0f; QColorVector c = { qRed(argb) * f, qGreen(argb) * f, qBlue(argb) * f }; - if (d->colorSpaceIn->lut.generated.loadAcquire()) { - c.x = d->colorSpaceIn->lut[0]->toLinear(c.x); - c.y = d->colorSpaceIn->lut[1]->toLinear(c.y); - c.z = d->colorSpaceIn->lut[2]->toLinear(c.z); - } else { - c.x = d->colorSpaceIn->trc[0].apply(c.x); - c.y = d->colorSpaceIn->trc[1].apply(c.y); - c.z = d->colorSpaceIn->trc[2].apply(c.z); - } - c = d->colorMatrix.map(c); - c.x = std::max(0.0f, std::min(1.0f, c.x)); - c.y = std::max(0.0f, std::min(1.0f, c.y)); - c.z = std::max(0.0f, std::min(1.0f, c.z)); - if (d->colorSpaceOut->lut.generated.loadAcquire()) { - c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x); - c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y); - c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z); - } else { - c.x = d->colorSpaceOut->trc[0].applyInverse(c.x); - c.y = d->colorSpaceOut->trc[1].applyInverse(c.y); - c.z = d->colorSpaceOut->trc[2].applyInverse(c.z); - } - + c = d->map(c); return qRgba(c.x * 255 + 0.5f, c.y * 255 + 0.5f, c.z * 255 + 0.5f, qAlpha(argb)); } @@ -196,29 +186,7 @@ QRgba64 QColorTransform::map(QRgba64 rgba64) const return rgba64; constexpr float f = 1.0f / 65535.0f; QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f }; - if (d->colorSpaceIn->lut.generated.loadAcquire()) { - c.x = d->colorSpaceIn->lut[0]->toLinear(c.x); - c.y = d->colorSpaceIn->lut[1]->toLinear(c.y); - c.z = d->colorSpaceIn->lut[2]->toLinear(c.z); - } else { - c.x = d->colorSpaceIn->trc[0].apply(c.x); - c.y = d->colorSpaceIn->trc[1].apply(c.y); - c.z = d->colorSpaceIn->trc[2].apply(c.z); - } - c = d->colorMatrix.map(c); - c.x = std::max(0.0f, std::min(1.0f, c.x)); - c.y = std::max(0.0f, std::min(1.0f, c.y)); - c.z = std::max(0.0f, std::min(1.0f, c.z)); - if (d->colorSpaceOut->lut.generated.loadAcquire()) { - c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x); - c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y); - c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z); - } else { - c.x = d->colorSpaceOut->trc[0].applyInverse(c.x); - c.y = d->colorSpaceOut->trc[1].applyInverse(c.y); - c.z = d->colorSpaceOut->trc[2].applyInverse(c.z); - } - + c = d->map(c); return QRgba64::fromRgba64(c.x * 65535.f + 0.5f, c.y * 65535.f + 0.5f, c.z * 65535.f + 0.5f, rgba64.alpha()); } @@ -232,14 +200,11 @@ QRgbaFloat16 QColorTransform::map(QRgbaFloat16 rgbafp16) const { if (!d) return rgbafp16; - QColorVector c; - c.x = d->colorSpaceIn->trc[0].applyExtended(rgbafp16.r); - c.y = d->colorSpaceIn->trc[1].applyExtended(rgbafp16.g); - c.z = d->colorSpaceIn->trc[2].applyExtended(rgbafp16.b); - c = d->colorMatrix.map(c); - rgbafp16.r = qfloat16(d->colorSpaceOut->trc[0].applyInverseExtended(c.x)); - rgbafp16.g = qfloat16(d->colorSpaceOut->trc[1].applyInverseExtended(c.y)); - rgbafp16.b = qfloat16(d->colorSpaceOut->trc[2].applyInverseExtended(c.z)); + QColorVector c(rgbafp16.r, rgbafp16.g, rgbafp16.b); + c = d->mapExtended(c); + rgbafp16.r = qfloat16(c.x); + rgbafp16.g = qfloat16(c.y); + rgbafp16.b = qfloat16(c.z); return rgbafp16; } @@ -253,14 +218,11 @@ QRgbaFloat32 QColorTransform::map(QRgbaFloat32 rgbafp32) const { if (!d) return rgbafp32; - QColorVector c; - c.x = d->colorSpaceIn->trc[0].applyExtended(rgbafp32.r); - c.y = d->colorSpaceIn->trc[1].applyExtended(rgbafp32.g); - c.z = d->colorSpaceIn->trc[2].applyExtended(rgbafp32.b); - c = d->colorMatrix.map(c); - rgbafp32.r = d->colorSpaceOut->trc[0].applyInverseExtended(c.x); - rgbafp32.g = d->colorSpaceOut->trc[1].applyInverseExtended(c.y); - rgbafp32.b = d->colorSpaceOut->trc[2].applyInverseExtended(c.z); + QColorVector c(rgbafp32.r, rgbafp32.g, rgbafp32.b); + c = d->mapExtended(c); + rgbafp32.r = c.x; + rgbafp32.g = c.y; + rgbafp32.b = c.z; return rgbafp32; } @@ -273,44 +235,42 @@ QColor QColorTransform::map(const QColor &color) const if (!d) return color; QColor clr = color; - if (color.spec() != QColor::ExtendedRgb || color.spec() != QColor::Rgb) - clr = clr.toRgb(); - - QColorVector c = { (float)clr.redF(), (float)clr.greenF(), (float)clr.blueF() }; - if (clr.spec() == QColor::ExtendedRgb) { - c.x = d->colorSpaceIn->trc[0].applyExtended(c.x); - c.y = d->colorSpaceIn->trc[1].applyExtended(c.y); - c.z = d->colorSpaceIn->trc[2].applyExtended(c.z); - } else { - c.x = d->colorSpaceIn->trc[0].apply(c.x); - c.y = d->colorSpaceIn->trc[1].apply(c.y); - c.z = d->colorSpaceIn->trc[2].apply(c.z); - } - c = d->colorMatrix.map(c); - bool inGamut = c.x >= 0.0f && c.x <= 1.0f && c.y >= 0.0f && c.y <= 1.0f && c.z >= 0.0f && c.z <= 1.0f; - if (inGamut) { - if (d->colorSpaceOut->lut.generated.loadAcquire()) { - c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x); - c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y); - c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z); - } else { - c.x = d->colorSpaceOut->trc[0].applyInverse(c.x); - c.y = d->colorSpaceOut->trc[1].applyInverse(c.y); - c.z = d->colorSpaceOut->trc[2].applyInverse(c.z); - } - } else { - c.x = d->colorSpaceOut->trc[0].applyInverseExtended(c.x); - c.y = d->colorSpaceOut->trc[1].applyInverseExtended(c.y); - c.z = d->colorSpaceOut->trc[2].applyInverseExtended(c.z); + if (d->colorSpaceIn->colorModel == QColorSpace::ColorModel::Rgb) { + if (color.spec() != QColor::ExtendedRgb && color.spec() != QColor::Rgb) + clr = clr.toRgb(); + } else if (d->colorSpaceIn->colorModel == QColorSpace::ColorModel::Cmyk) { + if (color.spec() != QColor::Cmyk) + clr = clr.toCmyk(); } + + QColorVector c = + (clr.spec() == QColor::Cmyk) + ? QColorVector(clr.cyanF(), clr.magentaF(), clr.yellowF(), clr.blackF()) + : QColorVector(clr.redF(), clr.greenF(), clr.blueF()); + + c = d->mapExtended(c); + QColor out; - out.setRgbF(c.x, c.y, c.z, color.alphaF()); + if (d->colorSpaceOut->colorModel == QColorSpace::ColorModel::Cmyk) { + c.x = std::clamp(c.x, 0.f, 1.f); + c.y = std::clamp(c.y, 0.f, 1.f); + c.z = std::clamp(c.z, 0.f, 1.f); + c.w = std::clamp(c.w, 0.f, 1.f); + out.setCmykF(c.x, c.y, c.z, c.w, color.alphaF()); + } else { + out.setRgbF(c.x, c.y, c.z, color.alphaF()); + } return out; } // Optimized sub-routines for fast block based conversion: -template<bool DoClamp = true> +enum ApplyMatrixForm { + DoNotClamp = 0, + DoClamp = 1 +}; + +template<ApplyMatrixForm doClamp = DoClamp> static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix) { #if defined(__SSE2__) @@ -330,7 +290,7 @@ static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorM cx = _mm_add_ps(cx, cy); cx = _mm_add_ps(cx, cz); // Clamp: - if (DoClamp) { + if (doClamp) { cx = _mm_min_ps(cx, maxV); cx = _mm_max_ps(cx, minV); } @@ -350,19 +310,19 @@ static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorM cx = vaddq_f32(cx, cy); cx = vaddq_f32(cx, cz); // Clamp: - if (DoClamp) { + if (doClamp) { cx = vminq_f32(cx, maxV); cx = vmaxq_f32(cx, minV); } vst1q_f32(&buffer[j].x, cx); } #else - for (int j = 0; j < len; ++j) { + for (qsizetype j = 0; j < len; ++j) { const QColorVector cv = colorMatrix.map(buffer[j]); - if (DoClamp) { - buffer[j].x = std::max(0.0f, std::min(1.0f, cv.x)); - buffer[j].y = std::max(0.0f, std::min(1.0f, cv.y)); - buffer[j].z = std::max(0.0f, std::min(1.0f, cv.z)); + if (doClamp) { + buffer[j].x = std::clamp(cv.x, 0.f, 1.f); + buffer[j].y = std::clamp(cv.y, 0.f, 1.f); + buffer[j].z = std::clamp(cv.z, 0.f, 1.f); } else { buffer[j] = cv; } @@ -370,6 +330,39 @@ static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorM #endif } +template<ApplyMatrixForm doClamp = DoClamp> +static void clampIfNeeded(QColorVector *buffer, const qsizetype len) +{ + if constexpr (doClamp != DoClamp) + return; +#if defined(__SSE2__) + const __m128 minV = _mm_set1_ps(0.0f); + const __m128 maxV = _mm_set1_ps(1.0f); + for (qsizetype j = 0; j < len; ++j) { + __m128 c = _mm_loadu_ps(&buffer[j].x); + c = _mm_min_ps(c, maxV); + c = _mm_max_ps(c, minV); + _mm_storeu_ps(&buffer[j].x, c); + } +#elif defined(__ARM_NEON__) + const float32x4_t minV = vdupq_n_f32(0.0f); + const float32x4_t maxV = vdupq_n_f32(1.0f); + for (qsizetype j = 0; j < len; ++j) { + float32x4_t c = vld1q_f32(&buffer[j].x); + c = vminq_f32(c, maxV); + c = vmaxq_f32(c, minV); + vst1q_f32(&buffer[j].x, c); + } +#else + for (qsizetype j = 0; j < len; ++j) { + const QColorVector cv = buffer[j]; + buffer[j].x = std::clamp(cv.x, 0.f, 1.f); + buffer[j].y = std::clamp(cv.y, 0.f, 1.f); + buffer[j].z = std::clamp(cv.z, 0.f, 1.f); + } +#endif +} + #if defined(__SSE2__) || defined(__ARM_NEON__) template<typename T> static constexpr inline bool isArgb(); @@ -386,9 +379,37 @@ inline int getAlpha<QRgb>(const QRgb &p) template<> inline int getAlpha<QRgba64>(const QRgba64 &p) { return p.alpha(); } + +template<typename T> +static inline constexpr int getFactor(); +template<> +inline constexpr int getFactor<QRgb>() +{ return 255; } +template<> +inline constexpr int getFactor<QRgba64>() +{ return 65535; } #endif template<typename T> +static float getAlphaF(const T &); +template<> float getAlphaF(const QRgb &r) +{ + return qAlpha(r) * (1.f / 255.f); +} +template<> float getAlphaF(const QCmyk32 &) +{ + return 1.f; +} +template<> float getAlphaF(const QRgba64 &r) +{ + return r.alpha() * (1.f / 65535.f); +} +template<> float getAlphaF(const QRgbaFloat32 &r) +{ + return r.a; +} + +template<typename T> static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); template<typename T> static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); @@ -424,9 +445,15 @@ inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v) template<typename T> static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) { - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); constexpr bool isARGB = isArgb<T>(); + const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution); for (qsizetype i = 0; i < len; ++i) { __m128i v; loadP<T>(src[i], v); @@ -443,26 +470,37 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp vf = _mm_andnot_ps(vAlphaMask, vf); // LUT - v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); - vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); + if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) { + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); + vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); - _mm_storeu_ps(&buffer[i].x, vf); + _mm_storeu_ps(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } template<> void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) { - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256)); const __m128 vZero = _mm_set1_ps(0.0f); - const __m128 vOne = _mm_set1_ps(1.0f); + const float factor = 1.f / float(QColorTrcLut::Resolution); + const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor, + INFINITY); for (qsizetype i = 0; i < len; ++i) { __m128 vf = _mm_loadu_ps(&src[i].r); // Approximate 1/a: @@ -478,10 +516,10 @@ void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *s // LUT const __m128 under = _mm_cmplt_ps(vf, vZero); - const __m128 over = _mm_cmpgt_ps(vf, vOne); + const __m128 over = _mm_cmpgt_ps(vf, vRangeMax); if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) { // Within gamut - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = _mm_extract_epi16(v, 4); @@ -500,7 +538,7 @@ void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *s } } -// Load to [0-4080] in 4x32 SIMD +// Load to [0->TrcResolution] in 4x32 SIMD template<typename T> static inline void loadPU(const T &p, __m128i &v); @@ -514,7 +552,7 @@ inline void loadPU<QRgb>(const QRgb &p, __m128i &v) v = _mm_unpacklo_epi8(v, _mm_setzero_si128()); v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); #endif - v = _mm_slli_epi32(v, 4); + v = _mm_slli_epi32(v, QColorTrcLut::ShiftUp); } template<> @@ -527,7 +565,7 @@ inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v) #else v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); #endif - v = _mm_srli_epi32(v, 4); + v = _mm_srli_epi32(v, QColorTrcLut::ShiftDown); } template<typename T> @@ -535,34 +573,51 @@ void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len { constexpr bool isARGB = isArgb<T>(); const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); + const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution); for (qsizetype i = 0; i < len; ++i) { __m128i v; loadPU<T>(src[i], v); const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); - v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); - __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); - _mm_storeu_ps(&buffer[i].x, vf); + if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) { + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); + v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); + __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); + _mm_storeu_ps(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } template<> void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) { - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); const __m128 vZero = _mm_set1_ps(0.0f); - const __m128 vOne = _mm_set1_ps(1.0f); + const float factor = 1.f / float(QColorTrcLut::Resolution); + const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor, + d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor, + INFINITY); for (qsizetype i = 0; i < len; ++i) { __m128 vf = _mm_loadu_ps(&src[i].r); const __m128 under = _mm_cmplt_ps(vf, vZero); - const __m128 over = _mm_cmpgt_ps(vf, vOne); + const __m128 over = _mm_cmpgt_ps(vf, vRangeMax); if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) { // Within gamut - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = _mm_extract_epi16(v, 4); @@ -597,11 +652,28 @@ inline void loadP<QRgba64>(const QRgba64 &p, uint32x4_t &v) v = vmovl_u16(vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p)))); } +static inline bool test_all_zero(uint32x4_t p) +{ +#if defined(Q_PROCESSOR_ARM_64) + return vaddvq_u32(p) == 0; +#else + const uint32x2_t tmp = vpadd_u32(vget_low_u32(p), vget_high_u32(p)); + return vget_lane_u32(vpadd_u32(tmp, tmp), 0) == 0; +#endif +} + template<typename T> static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) { constexpr bool isARGB = isArgb<T>(); const float iFF00 = 1.0f / (255 * 256); + const uint32x4_t vRangeMax = { + isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution }; for (qsizetype i = 0; i < len; ++i) { uint32x4_t v; loadP<T>(src[i], v); @@ -623,20 +695,27 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp vf = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf), vAlphaMask)); // LUT - v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f))); + v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))); const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0); const int gidx = vgetq_lane_u32(v, 1); const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); - vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); + if (test_all_zero(vcgtq_u32(v, vRangeMax))) { + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); + vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); - vst1q_f32(&buffer[i].x, vf); + vst1q_f32(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } -// Load to [0-4080] in 4x32 SIMD +// Load to [0->TrcResultion] in 4x32 SIMD template<typename T> static inline void loadPU(const T &p, uint32x4_t &v); @@ -644,7 +723,7 @@ template<> inline void loadPU<QRgb>(const QRgb &p, uint32x4_t &v) { v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p))))); - v = vshlq_n_u32(v, 4); + v = vshlq_n_u32(v, QColorTrcLut::ShiftUp); } template<> @@ -653,7 +732,7 @@ inline void loadPU<QRgba64>(const QRgba64 &p, uint32x4_t &v) uint16x4_t v16 = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p))); v16 = vsub_u16(v16, vshr_n_u16(v16, 8)); v = vmovl_u16(v16); - v = vshrq_n_u32(v, 4); + v = vshrq_n_u32(v, QColorTrcLut::ShiftDown); } template<typename T> @@ -661,17 +740,31 @@ void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len { constexpr bool isARGB = isArgb<T>(); const float iFF00 = 1.0f / (255 * 256); + const uint32x4_t vRangeMax = { + isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear, + d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear, + isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear + : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear, + QColorTrcLut::Resolution }; for (qsizetype i = 0; i < len; ++i) { uint32x4_t v; loadPU<T>(src[i], v); const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0); const int gidx = vgetq_lane_u32(v, 1); const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); - v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); - float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); - vst1q_f32(&buffer[i].x, vf); + if (test_all_zero(vcgtq_u32(v, vRangeMax))) { + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1); + v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2); + float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00); + vst1q_f32(&buffer[i].x, vf); + } else { + constexpr float f = 1.f / QColorTrcLut::Resolution; + buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f); + buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f); + buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f); + } } } #else @@ -682,7 +775,7 @@ void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizet const uint p = src[i]; const int a = qAlpha(p); if (a) { - const float ia = 4080.0f / a; + const float ia = float(QColorTrcLut::Resolution) / a; const int ridx = int(qRed(p) * ia + 0.5f); const int gidx = int(qGreen(p) * ia + 0.5f); const int bidx = int(qBlue(p) * ia + 0.5f); @@ -702,7 +795,7 @@ void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const const QRgba64 &p = src[i]; const int a = p.alpha(); if (a) { - const float ia = 4080.0f / a; + const float ia = float(QColorTrcLut::Resolution) / a; const int ridx = int(p.red() * ia + 0.5f); const int gidx = int(p.green() * ia + 0.5f); const int bidx = int(p.blue() * ia + 0.5f); @@ -792,17 +885,19 @@ inline void storeP<QRgba64>(QRgba64 &p, __m128i &v, int a) #endif } -template<typename T> -static void storePremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, +template<typename D, typename S, + typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>> +static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); - constexpr bool isARGB = isArgb<T>(); + constexpr bool isARGB = isArgb<D>(); + static_assert(getFactor<D>() >= getFactor<S>()); for (qsizetype i = 0; i < len; ++i) { - const int a = getAlpha<T>(src[i]); + const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>()); __m128 vf = _mm_loadu_ps(&buffer[i].x); - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); __m128 va = _mm_mul_ps(_mm_set1_ps(a), iFF00); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); @@ -813,21 +908,21 @@ static void storePremultiplied(T *dst, const T *src, const QColorVector *buffer, vf = _mm_cvtepi32_ps(v); vf = _mm_mul_ps(vf, va); v = _mm_cvtps_epi32(vf); - storeP<T>(dst[i], v, a); + storeP<D>(dst[i], v, a); } } -template<> -void storePremultiplied<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, - const QColorVector *buffer, const qsizetype len, - const QColorTransformPrivate *d_ptr) +template<typename S> +static void storePremultiplied(QRgbaFloat32 *dst, const S *src, + const QColorVector *buffer, const qsizetype len, + const QColorTransformPrivate *d_ptr) { - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 vZero = _mm_set1_ps(0.0f); const __m128 vOne = _mm_set1_ps(1.0f); const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256)); for (qsizetype i = 0; i < len; ++i) { - const float a = src[i].a; + const float a = getAlphaF<S>(src[i]); __m128 va = _mm_set1_ps(a); __m128 vf = _mm_loadu_ps(&buffer[i].x); const __m128 under = _mm_cmplt_ps(vf, vZero); @@ -835,7 +930,7 @@ void storePremultiplied<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) { // Within gamut va = _mm_mul_ps(va, viFF00); - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = _mm_extract_epi16(v, 4); @@ -874,16 +969,18 @@ inline void storePU<QRgba64>(QRgba64 &p, __m128i &v, int a) _mm_storel_epi64((__m128i *)&p, v); } -template<typename T> -static void storeUnpremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, +template<typename D, typename S, + typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>> +static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - const __m128 v4080 = _mm_set1_ps(4080.f); - constexpr bool isARGB = isArgb<T>(); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); + constexpr bool isARGB = isArgb<D>(); + static_assert(getFactor<D>() >= getFactor<S>()); for (qsizetype i = 0; i < len; ++i) { - const int a = getAlpha<T>(src[i]); + const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>()); __m128 vf = _mm_loadu_ps(&buffer[i].x); - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = _mm_extract_epi16(v, 4); @@ -891,27 +988,27 @@ static void storeUnpremultiplied(T *dst, const T *src, const QColorVector *buffe v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 2 : 0); v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 2); - storePU<T>(dst[i], v, a); + storePU<D>(dst[i], v, a); } } -template<> -void storeUnpremultiplied<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, - const QColorVector *buffer, const qsizetype len, - const QColorTransformPrivate *d_ptr) +template<typename S> +void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src, + const QColorVector *buffer, const qsizetype len, + const QColorTransformPrivate *d_ptr) { - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 vZero = _mm_set1_ps(0.0f); const __m128 vOne = _mm_set1_ps(1.0f); const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256)); for (qsizetype i = 0; i < len; ++i) { - const float a = src[i].a; + const float a = getAlphaF<S>(src[i]); __m128 vf = _mm_loadu_ps(&buffer[i].x); const __m128 under = _mm_cmplt_ps(vf, vZero); const __m128 over = _mm_cmpgt_ps(vf, vOne); if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) { // Within gamut - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = _mm_extract_epi16(v, 4); @@ -931,15 +1028,14 @@ void storeUnpremultiplied<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *s } template<typename T> -static void storeOpaque(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, +static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); constexpr bool isARGB = isArgb<T>(); for (qsizetype i = 0; i < len; ++i) { __m128 vf = _mm_loadu_ps(&buffer[i].x); - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = _mm_extract_epi16(v, 4); @@ -952,12 +1048,10 @@ static void storeOpaque(T *dst, const T *src, const QColorVector *buffer, const } template<> -void storeOpaque<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, - const QColorVector *buffer, const qsizetype len, - const QColorTransformPrivate *d_ptr) +void storeOpaque(QRgbaFloat32 *dst, const QColorVector *buffer, const qsizetype len, + const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); - const __m128 v4080 = _mm_set1_ps(4080.f); + const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution)); const __m128 vZero = _mm_set1_ps(0.0f); const __m128 vOne = _mm_set1_ps(1.0f); const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256)); @@ -967,7 +1061,7 @@ void storeOpaque<QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const __m128 over = _mm_cmpgt_ps(vf, vOne); if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) { // Within gamut - __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); + __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes)); const int ridx = _mm_extract_epi16(v, 0); const int gidx = _mm_extract_epi16(v, 2); const int bidx = _mm_extract_epi16(v, 4); @@ -1000,16 +1094,18 @@ inline void storeP<QRgba64>(QRgba64 &p, const uint16x4_t &v) vst1_u16((uint16_t *)&p, v); } -template<typename T> -static void storePremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, +template<typename D, typename S, + typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>> +static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { const float iFF00 = 1.0f / (255 * 256); - constexpr bool isARGB = isArgb<T>(); + constexpr bool isARGB = isArgb<D>(); + static_assert(getFactor<D>() >= getFactor<S>()); for (qsizetype i = 0; i < len; ++i) { - const int a = getAlpha<T>(src[i]); + const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>()); float32x4_t vf = vld1q_f32(&buffer[i].x); - uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f))); + uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))); const int ridx = vgetq_lane_u32(v, 0); const int gidx = vgetq_lane_u32(v, 1); const int bidx = vgetq_lane_u32(v, 2); @@ -1022,7 +1118,7 @@ static void storePremultiplied(T *dst, const T *src, const QColorVector *buffer, v = vcvtq_u32_f32(vf); uint16x4_t v16 = vmovn_u32(v); v16 = vset_lane_u16(a, v16, 3); - storeP<T>(dst[i], v16); + storeP<D>(dst[i], v16); } } @@ -1044,34 +1140,35 @@ inline void storePU<QRgba64>(QRgba64 &p, uint16x4_t &v, int a) vst1_u16((uint16_t *)&p, v); } -template<typename T> -static void storeUnpremultiplied(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, +template<typename D, typename S, + typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>> +static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - constexpr bool isARGB = isArgb<T>(); + constexpr bool isARGB = isArgb<D>(); + static_assert(getFactor<D>() >= getFactor<S>()); for (qsizetype i = 0; i < len; ++i) { - const int a = getAlpha<T>(src[i]); + const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>()); float32x4_t vf = vld1q_f32(&buffer[i].x); - uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f)))); + uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)))); const int ridx = vget_lane_u16(v, 0); const int gidx = vget_lane_u16(v, 1); const int bidx = vget_lane_u16(v, 2); v = vset_lane_u16(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0); v = vset_lane_u16(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1); v = vset_lane_u16(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2); - storePU<T>(dst[i], v, a); + storePU<D>(dst[i], v, a); } } template<typename T> -static void storeOpaque(T *dst, const T *src, const QColorVector *buffer, const qsizetype len, +static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); constexpr bool isARGB = isArgb<T>(); for (qsizetype i = 0; i < len; ++i) { float32x4_t vf = vld1q_f32(&buffer[i].x); - uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f)))); + uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)))); const int ridx = vget_lane_u16(v, 0); const int gidx = vget_lane_u16(v, 1); const int bidx = vget_lane_u16(v, 2); @@ -1088,9 +1185,9 @@ static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *b for (qsizetype i = 0; i < len; ++i) { const int a = qAlpha(src[i]); const float fa = a / (255.0f * 256.0f); - const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; - const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; - const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; + const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)]; + const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)]; + const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)]; dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); } } @@ -1106,10 +1203,9 @@ static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector } } -static void storeOpaque(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, +static void storeOpaque(QRgb *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); for (qsizetype i = 0; i < len; ++i) { const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); @@ -1118,34 +1214,36 @@ static void storeOpaque(QRgb *dst, const QRgb *src, const QColorVector *buffer, } } -static void storePremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, +template<typename S> +static void storePremultiplied(QRgba64 *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { for (qsizetype i = 0; i < len; ++i) { - const int a = src[i].alpha(); + const int a = getAlphaF(src[i]) * 65535.f; const float fa = a / (255.0f * 256.0f); - const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; - const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; - const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; + const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)]; + const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)]; + const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)]; dst[i] = qRgba64(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); } } -static void storeUnpremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, +template<typename S> +static void storeUnpremultiplied(QRgba64 *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { for (qsizetype i = 0; i < len; ++i) { + const int a = getAlphaF(src[i]) * 65535.f; const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x); const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y); const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z); - dst[i] = qRgba64(r, g, b, src[i].alpha()); + dst[i] = qRgba64(r, g, b, a); } } -static void storeOpaque(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, +static void storeOpaque(QRgba64 *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); for (qsizetype i = 0; i < len; ++i) { const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x); const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y); @@ -1155,11 +1253,12 @@ static void storeOpaque(QRgba64 *dst, const QRgba64 *src, const QColorVector *bu } #endif #if !defined(__SSE2__) -static void storePremultiplied(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const QColorVector *buffer, +template<typename S> +static void storePremultiplied(QRgbaFloat32 *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { for (qsizetype i = 0; i < len; ++i) { - const float a = src[i].a; + const float a = getAlphaF(src[i]); dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x) * a; dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y) * a; dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z) * a; @@ -1167,11 +1266,12 @@ static void storePremultiplied(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const } } -static void storeUnpremultiplied(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const QColorVector *buffer, +template<typename S> +static void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { for (qsizetype i = 0; i < len; ++i) { - const float a = src[i].a; + const float a = getAlphaF(src[i]); dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x); dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y); dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z); @@ -1179,10 +1279,9 @@ static void storeUnpremultiplied(QRgbaFloat32 *dst, const QRgbaFloat32 *src, con } } -static void storeOpaque(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const QColorVector *buffer, const qsizetype len, +static void storeOpaque(QRgbaFloat32 *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); for (qsizetype i = 0; i < len; ++i) { dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x); dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y); @@ -1191,20 +1290,59 @@ static void storeOpaque(QRgbaFloat32 *dst, const QRgbaFloat32 *src, const QColor } } #endif -static void storeGray(quint8 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, + +static void loadGray(QColorVector *buffer, const quint8 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) +{ + if (d_ptr->colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray || + (d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[1] && + d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[2])) { + for (qsizetype i = 0; i < len; ++i) { + const float y = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(src[i]); + buffer[i] = d_ptr->colorSpaceIn->whitePoint * y; + } + } else { + for (qsizetype i = 0; i < len; ++i) { + QColorVector v; + v.x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(src[i]); + v.y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(src[i]); + v.z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(src[i]); + buffer[i] = d_ptr->colorSpaceIn->toXyz.map(v); + } + } +} + +static void loadGray(QColorVector *buffer, const quint16 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) +{ + if (d_ptr->colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray || + (d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[1] && + d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[2])) { + for (qsizetype i = 0; i < len; ++i) { + const float y = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(src[i]); + buffer[i] = d_ptr->colorSpaceIn->whitePoint * y; + } + } else { + for (qsizetype i = 0; i < len; ++i) { + QColorVector v; + v.x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(src[i]); + v.y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(src[i]); + v.z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(src[i]); + buffer[i] = d_ptr->colorSpaceIn->toXyz.map(v); + } + } +} + +static void storeOpaque(quint8 *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); for (qsizetype i = 0; i < len; ++i) - dst[i] = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); + dst[i] = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].y); } -static void storeGray(quint16 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, +static void storeOpaque(quint16 *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr) { - Q_UNUSED(src); for (qsizetype i = 0; i < len; ++i) - dst[i] = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y); + dst[i] = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].y); } static constexpr qsizetype WorkBlockSize = 256; @@ -1218,175 +1356,648 @@ private: alignas(T) char data[sizeof(T) * Count]; }; +void loadUnpremultipliedLUT(QColorVector *buffer, const uchar *src, const qsizetype len) +{ + const float f = 1.0f / 255.f; + for (qsizetype i = 0; i < len; ++i) { + const float p = src[i] * f; + buffer[i].x = p; + buffer[i].y = p; + buffer[i].z = p; + } +} + +void loadUnpremultipliedLUT(QColorVector *buffer, const quint16 *src, const qsizetype len) +{ + const float f = 1.0f / 65535.f; + for (qsizetype i = 0; i < len; ++i) { + const float p = src[i] * f; + buffer[i].x = p; + buffer[i].y = p; + buffer[i].z = p; + } +} + +void loadUnpremultipliedLUT(QColorVector *buffer, const QRgb *src, const qsizetype len) +{ + const float f = 1.0f / 255.f; + for (qsizetype i = 0; i < len; ++i) { + const uint p = src[i]; + buffer[i].x = qRed(p) * f; + buffer[i].y = qGreen(p) * f; + buffer[i].z = qBlue(p) * f; + } +} + +void loadUnpremultipliedLUT(QColorVector *buffer, const QCmyk32 *src, const qsizetype len) +{ + const float f = 1.0f / 255.f; + for (qsizetype i = 0; i < len; ++i) { + const QCmyk32 p = src[i]; + buffer[i].x = (p.cyan() * f); + buffer[i].y = (p.magenta() * f); + buffer[i].z = (p.yellow() * f); + buffer[i].w = (p.black() * f); + } +} + +void loadUnpremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len) +{ + const float f = 1.0f / 65535.f; + for (qsizetype i = 0; i < len; ++i) { + buffer[i].x = src[i].red() * f; + buffer[i].y = src[i].green() * f; + buffer[i].z = src[i].blue() * f; + } +} + +void loadUnpremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + buffer[i].x = src[i].r; + buffer[i].y = src[i].g; + buffer[i].z = src[i].b; + } +} + +void loadPremultipliedLUT(QColorVector *, const uchar *, const qsizetype) +{ + Q_UNREACHABLE(); +} + +void loadPremultipliedLUT(QColorVector *, const quint16 *, const qsizetype) +{ + Q_UNREACHABLE(); +} + +void loadPremultipliedLUT(QColorVector *buffer, const QRgb *src, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const uint p = src[i]; + const float f = 1.0f / qAlpha(p); + buffer[i].x = (qRed(p) * f); + buffer[i].y = (qGreen(p) * f); + buffer[i].z = (qBlue(p) * f); + } +} + +void loadPremultipliedLUT(QColorVector *, const QCmyk32 *, const qsizetype) +{ + Q_UNREACHABLE(); +} + +void loadPremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const float f = 1.0f / src[i].alpha(); + buffer[i].x = (src[i].red() * f); + buffer[i].y = (src[i].green() * f); + buffer[i].z = (src[i].blue() * f); + } +} + +void loadPremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const float f = 1.0f / src[i].a; + buffer[i].x = src[i].r * f; + buffer[i].y = src[i].g * f; + buffer[i].z = src[i].b * f; + } +} template<typename T> -void QColorTransformPrivate::apply(T *dst, const T *src, qsizetype count, TransformFlags flags) const +static void storeUnpremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len) { - if (!colorMatrix.isValid()) - return; + for (qsizetype i = 0; i < len; ++i) { + const int r = buffer[i].x * 255.f; + const int g = buffer[i].y * 255.f; + const int b = buffer[i].z * 255.f; + dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0); + } +} - updateLutsIn(); - updateLutsOut(); +template<> +void storeUnpremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int r = buffer[i].x * 255.f; + const int g = buffer[i].y * 255.f; + const int b = buffer[i].z * 255.f; + dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0); + } +} - bool doApplyMatrix = !colorMatrix.isIdentity(); - constexpr bool DoClip = !std::is_same_v<T, QRgbaFloat16> && !std::is_same_v<T, QRgbaFloat32>; - QUninitialized<QColorVector, WorkBlockSize> buffer; +template<typename T> +void storeUnpremultipliedLUT(QCmyk32 *dst, const T *, const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int c = buffer[i].x * 255.f; + const int m = buffer[i].y * 255.f; + const int y = buffer[i].z * 255.f; + const int k = buffer[i].w * 255.f; + dst[i] = QCmyk32(c, m, y, k); + } +} - qsizetype i = 0; - while (i < count) { - const qsizetype len = qMin(count - i, WorkBlockSize); - if (flags & InputPremultiplied) - loadPremultiplied(buffer, src + i, len, this); - else - loadUnpremultiplied(buffer, src + i, len, this); +template<typename T> +static void storeUnpremultipliedLUT(QRgba64 *dst, const T *, + const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int r = buffer[i].x * 65535.f; + const int g = buffer[i].y * 65535.f; + const int b = buffer[i].z * 65535.f; + dst[i] = qRgba64(r, g, b, 65535); + } +} - if (doApplyMatrix) - applyMatrix<DoClip>(buffer, len, colorMatrix); +template<> +void storeUnpremultipliedLUT(QRgba64 *dst, const QRgb *src, + const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int a = qAlpha(src[i]) * 257; + const int r = buffer[i].x * 65535.f; + const int g = buffer[i].y * 65535.f; + const int b = buffer[i].z * 65535.f; + dst[i] = qRgba64(r, g, b, a); + } +} - if (flags & InputOpaque) - storeOpaque(dst + i, src + i, buffer, len, this); - else if (flags & OutputPremultiplied) - storePremultiplied(dst + i, src + i, buffer, len, this); - else - storeUnpremultiplied(dst + i, src + i, buffer, len, this); +template<> +void storeUnpremultipliedLUT(QRgba64 *dst, const QRgba64 *src, + const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int r = buffer[i].x * 65535.f; + const int g = buffer[i].y * 65535.f; + const int b = buffer[i].z * 65535.f; + dst[i] = qRgba64(r, g, b, src[i].alpha()); + } +} - i += len; +template<typename T> +static void storeUnpremultipliedLUT(QRgbaFloat32 *dst, const T *src, + const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const float r = buffer[i].x; + const float g = buffer[i].y; + const float b = buffer[i].z; + dst[i] = QRgbaFloat32{r, g, b, getAlphaF(src[i])}; } } -template<typename D, typename S> -void QColorTransformPrivate::applyReturnGray(D *dst, const S *src, qsizetype count, TransformFlags flags) const +template<typename T> +static void storePremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len) { - if (!colorMatrix.isValid()) - return; + for (qsizetype i = 0; i < len; ++i) { + const int r = buffer[i].x * 255.f; + const int g = buffer[i].y * 255.f; + const int b = buffer[i].z * 255.f; + dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0); + } +} - updateLutsIn(); - updateLutsOut(); +template<> +void storePremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int a = qAlpha(src[i]); + const int r = buffer[i].x * a; + const int g = buffer[i].y * a; + const int b = buffer[i].z * a; + dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0); + } +} - QUninitialized<QColorVector, WorkBlockSize> buffer; +template<typename T> +static void storePremultipliedLUT(QCmyk32 *dst, const T *src, const QColorVector *buffer, const qsizetype len) +{ + storeUnpremultipliedLUT(dst, src, buffer, len); +} - qsizetype i = 0; - while (i < count) { - const qsizetype len = qMin(count - i, WorkBlockSize); - if (flags & InputPremultiplied) - loadPremultiplied(buffer, src + i, len, this); - else - loadUnpremultiplied(buffer, src + i, len, this); +template<typename T> +static void storePremultipliedLUT(QRgba64 *dst, const T *, const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int r = buffer[i].x * 65535.f; + const int g = buffer[i].y * 65535.f; + const int b = buffer[i].z * 65535.f; + dst[i] = qRgba64(r, g, b, 65535); + } +} - applyMatrix(buffer, len, colorMatrix); +template<> +void storePremultipliedLUT(QRgba64 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int a = qAlpha(src[i]) * 257; + const int r = buffer[i].x * a; + const int g = buffer[i].y * a; + const int b = buffer[i].z * a; + dst[i] = qRgba64(r, g, b, a); + } +} - storeGray(dst + i, src + i, buffer, len, this); +template<> +void storePremultipliedLUT(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const int a = src[i].alpha(); + const int r = buffer[i].x * a; + const int g = buffer[i].y * a; + const int b = buffer[i].z * a; + dst[i] = qRgba64(r, g, b, a); + } +} - i += len; +template<typename T> +static void storePremultipliedLUT(QRgbaFloat32 *dst, const T *src, const QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) { + const float a = getAlphaF(src[i]); + const float r = buffer[i].x * a; + const float g = buffer[i].y * a; + const float b = buffer[i].z * a; + dst[i] = QRgbaFloat32{r, g, b, a}; } } -/*! - \internal - \enum QColorTransformPrivate::TransformFlag +static void visitElement(const QColorSpacePrivate::TransferElement &element, QColorVector *buffer, const qsizetype len) +{ + const bool doW = element.trc[3].isValid(); + for (qsizetype i = 0; i < len; ++i) { + buffer[i].x = element.trc[0].apply(buffer[i].x); + buffer[i].y = element.trc[1].apply(buffer[i].y); + buffer[i].z = element.trc[2].apply(buffer[i].z); + if (doW) + buffer[i].w = element.trc[3].apply(buffer[i].w); + } +} - Defines how the transform is to be applied. +static void visitElement(const QColorMatrix &element, QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) + buffer[i] = element.map(buffer[i]); +} - \value Unpremultiplied The input and output should both be unpremultiplied. - \value InputOpaque The input is guaranteed to be opaque. - \value InputPremultiplied The input is premultiplied. - \value OutputPremultiplied The output should be premultiplied. - \value Premultiplied Both input and output should both be premultiplied. -*/ +static void visitElement(const QColorVector &offset, QColorVector *buffer, const qsizetype len) +{ + for (qsizetype i = 0; i < len; ++i) + buffer[i] += offset; +} + +static void visitElement(const QColorCLUT &element, QColorVector *buffer, const qsizetype len) +{ + if (element.isEmpty()) + return; + for (qsizetype i = 0; i < len; ++i) + buffer[i] = element.apply(buffer[i]); +} /*! \internal - Prepares a color transformation for fast application. You do not need to - call this explicitly as it will be called implicitly on the first transforms, but - if you want predictable performance on the first transforms, you can perform it - in advance. - - \sa QColorTransform::map(), apply() */ -void QColorTransformPrivate::prepare() +QColorVector QColorTransformPrivate::map(QColorVector c) const { - updateLutsIn(); - updateLutsOut(); + if (colorSpaceIn->isThreeComponentMatrix()) { + if (colorSpaceIn->lut.generated.loadAcquire()) { + c.x = colorSpaceIn->lut[0]->toLinear(c.x); + c.y = colorSpaceIn->lut[1]->toLinear(c.y); + c.z = colorSpaceIn->lut[2]->toLinear(c.z); + } else { + c.x = colorSpaceIn->trc[0].apply(c.x); + c.y = colorSpaceIn->trc[1].apply(c.y); + c.z = colorSpaceIn->trc[2].apply(c.z); + } + c = colorMatrix.map(c); + } else { + // Do element based conversion + for (auto &&element : colorSpaceIn->mAB) + std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element); + } + c.x = std::clamp(c.x, 0.0f, 1.0f); + c.y = std::clamp(c.y, 0.0f, 1.0f); + c.z = std::clamp(c.z, 0.0f, 1.0f); + + // Match Profile Connection Spaces (PCS): + if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab) + c = c.xyzToLab(); + else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab) + c = c.labToXyz(); + + if (colorSpaceOut->isThreeComponentMatrix()) { + if (!colorSpaceIn->isThreeComponentMatrix()) { + c = colorMatrix.map(c); + c.x = std::clamp(c.x, 0.0f, 1.0f); + c.y = std::clamp(c.y, 0.0f, 1.0f); + c.z = std::clamp(c.z, 0.0f, 1.0f); + } + if (colorSpaceOut->lut.generated.loadAcquire()) { + c.x = colorSpaceOut->lut[0]->fromLinear(c.x); + c.y = colorSpaceOut->lut[1]->fromLinear(c.y); + c.z = colorSpaceOut->lut[2]->fromLinear(c.z); + } else { + c.x = colorSpaceOut->trc[0].applyInverse(c.x); + c.y = colorSpaceOut->trc[1].applyInverse(c.y); + c.z = colorSpaceOut->trc[2].applyInverse(c.z); + } + } else { + // Do element based conversion + for (auto &&element : colorSpaceOut->mBA) + std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element); + c.x = std::clamp(c.x, 0.0f, 1.0f); + c.y = std::clamp(c.y, 0.0f, 1.0f); + c.z = std::clamp(c.z, 0.0f, 1.0f); + } + return c; } /*! \internal - Applies the color transformation on \a count QRgb pixels starting from - \a src and stores the result in \a dst. +*/ +QColorVector QColorTransformPrivate::mapExtended(QColorVector c) const +{ + if (colorSpaceIn->isThreeComponentMatrix()) { + c.x = colorSpaceIn->trc[0].applyExtended(c.x); + c.y = colorSpaceIn->trc[1].applyExtended(c.y); + c.z = colorSpaceIn->trc[2].applyExtended(c.z); + c = colorMatrix.map(c); + } else { + // Do element based conversion + for (auto &&element : colorSpaceIn->mAB) + std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element); + } + + // Match Profile Connection Spaces (PCS): + if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab) + c = c.xyzToLab(); + else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab) + c = c.labToXyz(); + + if (colorSpaceOut->isThreeComponentMatrix()) { + if (!colorSpaceIn->isThreeComponentMatrix()) + c = colorMatrix.map(c); + c.x = colorSpaceOut->trc[0].applyInverseExtended(c.x); + c.y = colorSpaceOut->trc[1].applyInverseExtended(c.y); + c.z = colorSpaceOut->trc[2].applyInverseExtended(c.z); + } else { + // Do element based conversion + for (auto &&element : colorSpaceOut->mBA) + std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element); + } + return c; +} - Thread-safe if prepare() has been called first. +template<typename T> +constexpr bool IsGrayscale = std::is_same_v<T, uchar> || std::is_same_v<T, quint16>; +template<typename T> +constexpr bool IsAlwaysOpaque = std::is_same_v<T, QCmyk32> || IsGrayscale<T>; +template<typename T> +constexpr bool CanUseThreeComponent = !std::is_same_v<T, QCmyk32>; +template<typename T> +constexpr bool UnclampedValues = std::is_same_v<T, QRgbaFloat16> || std::is_same_v<T, QRgbaFloat32>; + +// Possible combos for data and color spaces: +// DataCM ColorSpaceCM ColorSpacePM Notes +// Gray Gray ThreeMatrix +// Gray Rgb ThreeMatrix Invalid colorMatrix +// Rgb Rgb ThreeMatrix +// Rgb Rgb ElementProc +// Gray Rgb ElementProc Only possible for input data +// Cmyk Cmyk ElementProc +// +// Gray data can be uchar, quint16, and is always Opaque +// Rgb data can be QRgb, QRgba64, or QRgbaFloat32, and is Unpremultiplied, Premultiplied, or Opaque +// Cmyk data can be Cmyk32, and is always Opaque +// +// colorMatrix as setup for Gray on Gray or Rgb on Rgb, but not Gray data on Rgb colorspace. + +template<typename S> +void QColorTransformPrivate::applyConvertIn(const S *src, QColorVector *buffer, qsizetype len, TransformFlags flags) const +{ + if constexpr (IsGrayscale<S>) { + if (colorSpaceIn->isThreeComponentMatrix()) { + loadGray(buffer, src, len, this); + if (!colorSpaceOut->isThreeComponentMatrix() || colorSpaceIn->colorModel != QColorSpace::ColorModel::Gray) { + if (!colorSpaceIn->chad.isNull()) + applyMatrix<DoClamp>(buffer, len, colorSpaceIn->chad); + } + return; + } + } else if constexpr (CanUseThreeComponent<S>) { + if (colorSpaceIn->isThreeComponentMatrix()) { + if (flags & InputPremultiplied) + loadPremultiplied(buffer, src, len, this); + else + loadUnpremultiplied(buffer, src, len, this); + + if (!colorSpaceOut->isThreeComponentMatrix()) + applyMatrix<DoClamp>(buffer, len, colorMatrix); + return; + } + } + Q_ASSERT(!colorSpaceIn->isThreeComponentMatrix()); - Assumes unpremultiplied data by default. Set \a flags to change defaults. + if (flags & InputPremultiplied) + loadPremultipliedLUT(buffer, src, len); + else + loadUnpremultipliedLUT(buffer, src, len); - \sa prepare() -*/ -void QColorTransformPrivate::apply(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const -{ - apply<QRgb>(dst, src, count, flags); + // Do element based conversion + for (auto &&element : colorSpaceIn->mAB) + std::visit([&buffer, len](auto &&elm) { visitElement(elm, buffer, len); }, element); } -/*! - \internal - Applies the color transformation on \a count QRgba64 pixels starting from - \a src and stores the result in \a dst. +template<typename D, typename S> +void QColorTransformPrivate::applyConvertOut(D *dst, const S *src, QColorVector *buffer, qsizetype len, TransformFlags flags) const +{ + constexpr ApplyMatrixForm doClamp = UnclampedValues<D> ? DoNotClamp : DoClamp; + if constexpr (IsGrayscale<D>) { + Q_UNUSED(src); // dealing with buggy warnings in gcc 9 + Q_UNUSED(flags); + // Calculate the matrix for grayscale conversion + QColorMatrix grayMatrix; + if (colorSpaceIn == colorSpaceOut || + (colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray && + colorSpaceOut->colorModel == QColorSpace::ColorModel::Gray)) { + // colorMatrix already has the right form + grayMatrix = colorMatrix; + } else { + if constexpr (IsGrayscale<S>) { + if (colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray) + grayMatrix = colorSpaceIn->chad; + else + grayMatrix = QColorMatrix::identity(); // Otherwise already handled in applyConvertIn + } else { + if (colorSpaceIn->isThreeComponentMatrix()) + grayMatrix = colorSpaceIn->toXyz; + else + grayMatrix = QColorMatrix::identity(); + } + if (!colorSpaceOut->chad.isNull()) + grayMatrix = colorSpaceOut->chad.inverted() * grayMatrix; + } - Thread-safe if prepare() has been called first. + applyMatrix<doClamp>(buffer, len, grayMatrix); + storeOpaque(dst, buffer, len, this); + return; + } else if constexpr (CanUseThreeComponent<D>) { + if (colorSpaceOut->isThreeComponentMatrix()) { + if (IsGrayscale<S> && colorSpaceIn->colorModel != QColorSpace::ColorModel::Gray) + applyMatrix<doClamp>(buffer, len, colorSpaceOut->toXyz.inverted()); // colorMatrix wasnt prepared for gray input + else + applyMatrix<doClamp>(buffer, len, colorMatrix); + + if constexpr (IsAlwaysOpaque<S>) { + storeOpaque(dst, buffer, len, this); + } else { + if (flags & InputOpaque) + storeOpaque(dst, buffer, len, this); + else if (flags & OutputPremultiplied) + storePremultiplied(dst, src, buffer, len, this); + else + storeUnpremultiplied(dst, src, buffer, len, this); + } + return; + } + } + if constexpr (!IsGrayscale<D>) { + Q_ASSERT(!colorSpaceOut->isThreeComponentMatrix()); - Assumes unpremultiplied data by default. Set \a flags to change defaults. + // Do element based conversion + for (auto &&element : colorSpaceOut->mBA) + std::visit([&buffer, len](auto &&elm) { visitElement(elm, buffer, len); }, element); - \sa prepare() -*/ -void QColorTransformPrivate::apply(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const -{ - apply<QRgba64>(dst, src, count, flags); + clampIfNeeded<doClamp>(buffer, len); + + if (flags & OutputPremultiplied) + storePremultipliedLUT(dst, src, buffer, len); + else + storeUnpremultipliedLUT(dst, src, buffer, len); + } else { + Q_UNREACHABLE(); + } } /*! \internal - Applies the color transformation on \a count QRgbaFloat32 pixels starting from - \a src and stores the result in \a dst. + Adapt Profile Connection Spaces. +*/ +void QColorTransformPrivate::pcsAdapt(QColorVector *buffer, qsizetype count) const +{ + // Match Profile Connection Spaces (PCS): + if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab) { + for (qsizetype j = 0; j < count; ++j) + buffer[j] = buffer[j].xyzToLab(); + } else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab) { + for (qsizetype j = 0; j < count; ++j) + buffer[j] = buffer[j].labToXyz(); + } +} - Thread-safe if prepare() has been called first. +/*! + \internal + Applies the color transformation on \a count S pixels starting from + \a src and stores the result in \a dst as D pixels . Assumes unpremultiplied data by default. Set \a flags to change defaults. \sa prepare() */ -void QColorTransformPrivate::apply(QRgbaFloat32 *dst, const QRgbaFloat32 *src, qsizetype count, - TransformFlags flags) const +template<typename D, typename S> +void QColorTransformPrivate::apply(D *dst, const S *src, qsizetype count, TransformFlags flags) const { - apply<QRgbaFloat32>(dst, src, count, flags); + if (colorSpaceIn->isThreeComponentMatrix()) + updateLutsIn(); + if (colorSpaceOut->isThreeComponentMatrix()) + updateLutsOut(); + + QUninitialized<QColorVector, WorkBlockSize> buffer; + qsizetype i = 0; + while (i < count) { + const qsizetype len = qMin(count - i, WorkBlockSize); + + applyConvertIn(src + i, buffer, len, flags); + + pcsAdapt(buffer, len); + + applyConvertOut(dst + i, src + i, buffer, len, flags); + + i += len; + } } /*! \internal - Is to be called on a color-transform to XYZ, returns only luminance values. + \enum QColorTransformPrivate::TransformFlag + Defines how the transform should handle alpha values. + + \value Unpremultiplied The input and output should both be unpremultiplied. + \value InputOpaque The input is guaranteed to be opaque. + \value InputPremultiplied The input is premultiplied. + \value OutputPremultiplied The output should be premultiplied. + \value Premultiplied Both input and output should both be premultiplied. */ -void QColorTransformPrivate::apply(quint8 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const -{ - applyReturnGray<quint8, QRgb>(dst, src, count, flags); -} /*! \internal - Is to be called on a color-transform to XYZ, returns only luminance values. + Prepares a color transformation for fast application. You do not need to + call this explicitly as it will be called implicitly on the first transforms, but + if you want predictable performance on the first transforms, you can perform it + in advance. + \sa QColorTransform::map(), apply() */ -void QColorTransformPrivate::apply(quint16 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const +void QColorTransformPrivate::prepare() { - applyReturnGray<quint16, QRgba64>(dst, src, count, flags); + updateLutsIn(); + updateLutsOut(); } +// Only some versions increasing precision 14/36 combos +template void QColorTransformPrivate::apply<quint8, quint8>(quint8 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<quint8, QRgb>(quint8 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<quint8, QCmyk32>(quint8 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<quint16, quint8>(quint16 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<quint16, quint16>(quint16 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<quint16, QCmyk32>(quint16 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<quint16, QRgba64>(quint16 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgb, quint8>(QRgb *dst, const quint8 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgb, QRgb>(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgb, QCmyk32>(QRgb *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QCmyk32, quint8>(QCmyk32 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QCmyk32, quint16>(QCmyk32 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QCmyk32, QRgb>(QCmyk32 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QCmyk32, QCmyk32>(QCmyk32 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QCmyk32, QRgba64>(QCmyk32 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QCmyk32, QRgbaFloat32>(QCmyk32 *dst, const QRgbaFloat32 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgba64, quint16>(QRgba64 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgba64, QRgb>(QRgba64 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgba64, QCmyk32>(QRgba64 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgba64, QRgba64>(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgbaFloat32, QRgb>(QRgbaFloat32 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgbaFloat32, QCmyk32>(QRgbaFloat32 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgbaFloat32, QRgba64>(QRgbaFloat32 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const; +template void QColorTransformPrivate::apply<QRgbaFloat32, QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, qsizetype count, TransformFlags flags) const; /*! \internal */ bool QColorTransformPrivate::isIdentity() const { + if (colorSpaceIn == colorSpaceOut) + return true; if (!colorMatrix.isIdentity()) return false; if (colorSpaceIn && colorSpaceOut) { + if (colorSpaceIn->equals(colorSpaceOut.constData())) + return true; + if (!colorSpaceIn->isThreeComponentMatrix() || !colorSpaceOut->isThreeComponentMatrix()) + return false; if (colorSpaceIn->transferFunction != colorSpaceOut->transferFunction) return false; if (colorSpaceIn->transferFunction == QColorSpace::TransferFunction::Custom) { @@ -1395,6 +2006,10 @@ bool QColorTransformPrivate::isIdentity() const && colorSpaceIn->trc[2] == colorSpaceOut->trc[2]; } } else { + if (colorSpaceIn && !colorSpaceIn->isThreeComponentMatrix()) + return false; + if (colorSpaceOut && !colorSpaceOut->isThreeComponentMatrix()) + return false; if (colorSpaceIn && colorSpaceIn->transferFunction != QColorSpace::TransferFunction::Linear) return false; if (colorSpaceOut && colorSpaceOut->transferFunction != QColorSpace::TransferFunction::Linear) |