Make QColorTrcLut more flexible

Make it possible to generate one way QColorTrcLut tables, and make it easier test out different table size, Change-Id: I953c68d772699de87fdddbf15ce196e6ba8b9898 Reviewed-by: Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
author: Allan Sandfeld Jensen <allan.jensen@qt.io> 2024-03-06 11:56:15 +0100
committer: Allan Sandfeld Jensen <allan.jensen@qt.io> 2024-04-05 18:40:47 +0200
commit: 04e5b86f9e695e2ca4516179a214d2eff6a2157e (patch)
tree: 198dd7bf3897d213f05786e4ef6905edd581adaf /src/gui/painting
parent: 05b84673045a5f4432a6caa9bea08d8fba1e1a03 (diff)
3 files changed, 142 insertions, 92 deletions
diff --git a/src/gui/painting/qcolortransform.cpp b/src/gui/painting/qcolortransform.cpp
index 884e338304..a5ed529a15 100644
--- a/src/gui/painting/qcolortransform.cpp
+++ b/src/gui/painting/qcolortransform.cpp
@@ -449,7 +449,7 @@ inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v)
 template<typename T>
 static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
     constexpr bool isARGB = isArgb<T>();
     for (qsizetype i = 0; i < len; ++i) {
@@ -468,7 +468,7 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp
         vf = _mm_andnot_ps(vAlphaMask, vf);
 
         // LUT
-        v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+        v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
         const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0);
         const int gidx = _mm_extract_epi16(v, 2);
         const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4);
@@ -484,7 +484,7 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp
 template<>
 void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
     const __m128 vZero = _mm_set1_ps(0.0f);
     const __m128 vOne  = _mm_set1_ps(1.0f);
@@ -506,7 +506,7 @@ void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *s
         const __m128 over = _mm_cmpgt_ps(vf, vOne);
         if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
             // Within gamut
-            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
             const int ridx = _mm_extract_epi16(v, 0);
             const int gidx = _mm_extract_epi16(v, 2);
             const int bidx = _mm_extract_epi16(v, 4);
@@ -525,7 +525,7 @@ void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *s
     }
 }
 
-// Load to [0-4080] in 4x32 SIMD
+// Load to [0->TrcResolution] in 4x32 SIMD
 template<typename T>
 static inline void loadPU(const T &p, __m128i &v);
 
@@ -539,7 +539,7 @@ inline void loadPU<QRgb>(const QRgb &p, __m128i &v)
     v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
     v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
 #endif
-    v = _mm_slli_epi32(v, 4);
+    v = _mm_slli_epi32(v, QColorTrcLut::ShiftUp);
 }
 
 template<>
@@ -552,7 +552,7 @@ inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v)
 #else
     v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
 #endif
-    v = _mm_srli_epi32(v, 4);
+    v = _mm_srli_epi32(v, QColorTrcLut::ShiftDown);
 }
 
 template<typename T>
@@ -577,7 +577,7 @@ void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len
 template<>
 void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
     const __m128 vZero = _mm_set1_ps(0.0f);
     const __m128 vOne  = _mm_set1_ps(1.0f);
@@ -587,7 +587,7 @@ void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32
         const __m128 over = _mm_cmpgt_ps(vf, vOne);
         if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
             // Within gamut
-            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
             const int ridx = _mm_extract_epi16(v, 0);
             const int gidx = _mm_extract_epi16(v, 2);
             const int bidx = _mm_extract_epi16(v, 4);
@@ -648,7 +648,7 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp
         vf = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf), vAlphaMask));
 
         // LUT
-        v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f)));
+        v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)));
         const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0);
         const int gidx = vgetq_lane_u32(v, 1);
         const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2);
@@ -661,7 +661,7 @@ static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetyp
     }
 }
 
-// Load to [0-4080] in 4x32 SIMD
+// Load to [0->TrcResultion] in 4x32 SIMD
 template<typename T>
 static inline void loadPU(const T &p, uint32x4_t &v);
 
@@ -669,7 +669,7 @@ template<>
 inline void loadPU<QRgb>(const QRgb &p, uint32x4_t &v)
 {
     v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p)))));
-    v = vshlq_n_u32(v, 4);
+    v = vshlq_n_u32(v, QColorTrcLut::ShiftUp);
 }
 
 template<>
@@ -678,7 +678,7 @@ inline void loadPU<QRgba64>(const QRgba64 &p, uint32x4_t &v)
     uint16x4_t v16 = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p)));
     v16 = vsub_u16(v16, vshr_n_u16(v16, 8));
     v = vmovl_u16(v16);
-    v = vshrq_n_u32(v, 4);
+    v = vshrq_n_u32(v, QColorTrcLut::ShiftDown);
 }
 
 template<typename T>
@@ -707,7 +707,7 @@ void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizet
         const uint p = src[i];
         const int a = qAlpha(p);
         if (a) {
-            const float ia = 4080.0f / a;
+            const float ia = float(QColorTrcLut::Resolution) / a;
             const int ridx = int(qRed(p)   * ia + 0.5f);
             const int gidx = int(qGreen(p) * ia + 0.5f);
             const int bidx = int(qBlue(p)  * ia + 0.5f);
@@ -727,7 +727,7 @@ void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const
         const QRgba64 &p = src[i];
         const int a = p.alpha();
         if (a) {
-            const float ia = 4080.0f / a;
+            const float ia = float(QColorTrcLut::Resolution) / a;
             const int ridx = int(p.red()   * ia + 0.5f);
             const int gidx = int(p.green() * ia + 0.5f);
             const int bidx = int(p.blue()  * ia + 0.5f);
@@ -822,13 +822,13 @@ template<typename D, typename S,
 static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
                                const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
     constexpr bool isARGB = isArgb<D>();
     for (qsizetype i = 0; i < len; ++i) {
         const int a = getAlpha<S>(src[i]);
         __m128 vf = _mm_loadu_ps(&buffer[i].x);
-        __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+        __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
         __m128 va = _mm_mul_ps(_mm_set1_ps(a), iFF00);
         const int ridx = _mm_extract_epi16(v, 0);
         const int gidx = _mm_extract_epi16(v, 2);
@@ -848,7 +848,7 @@ static void storePremultiplied(QRgbaFloat32 *dst, const S *src,
                                const QColorVector *buffer, const qsizetype len,
                                const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     const __m128 vZero = _mm_set1_ps(0.0f);
     const __m128 vOne  = _mm_set1_ps(1.0f);
     const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
@@ -861,7 +861,7 @@ static void storePremultiplied(QRgbaFloat32 *dst, const S *src,
         if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
             // Within gamut
             va = _mm_mul_ps(va, viFF00);
-            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
             const int ridx = _mm_extract_epi16(v, 0);
             const int gidx = _mm_extract_epi16(v, 2);
             const int bidx = _mm_extract_epi16(v, 4);
@@ -905,12 +905,12 @@ template<typename D, typename S,
 static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
                                  const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     constexpr bool isARGB = isArgb<D>();
     for (qsizetype i = 0; i < len; ++i) {
         const int a = getAlpha<S>(src[i]);
         __m128 vf = _mm_loadu_ps(&buffer[i].x);
-        __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+        __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
         const int ridx = _mm_extract_epi16(v, 0);
         const int gidx = _mm_extract_epi16(v, 2);
         const int bidx = _mm_extract_epi16(v, 4);
@@ -927,7 +927,7 @@ void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src,
                           const QColorVector *buffer, const qsizetype len,
                           const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     const __m128 vZero = _mm_set1_ps(0.0f);
     const __m128 vOne  = _mm_set1_ps(1.0f);
     const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
@@ -938,7 +938,7 @@ void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src,
         const __m128 over = _mm_cmpgt_ps(vf, vOne);
         if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
             // Within gamut
-            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
             const int ridx = _mm_extract_epi16(v, 0);
             const int gidx = _mm_extract_epi16(v, 2);
             const int bidx = _mm_extract_epi16(v, 4);
@@ -961,11 +961,11 @@ template<typename T>
 static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len,
                         const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     constexpr bool isARGB = isArgb<T>();
     for (qsizetype i = 0; i < len; ++i) {
         __m128 vf = _mm_loadu_ps(&buffer[i].x);
-        __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+        __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
         const int ridx = _mm_extract_epi16(v, 0);
         const int gidx = _mm_extract_epi16(v, 2);
         const int bidx = _mm_extract_epi16(v, 4);
@@ -982,7 +982,7 @@ void storeOpaque<QRgbaFloat32>(QRgbaFloat32 *dst,
                                const QColorVector *buffer, const qsizetype len,
                                const QColorTransformPrivate *d_ptr)
 {
-    const __m128 v4080 = _mm_set1_ps(4080.f);
+    const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
     const __m128 vZero = _mm_set1_ps(0.0f);
     const __m128 vOne  = _mm_set1_ps(1.0f);
     const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
@@ -992,7 +992,7 @@ void storeOpaque<QRgbaFloat32>(QRgbaFloat32 *dst,
         const __m128 over = _mm_cmpgt_ps(vf, vOne);
         if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
             // Within gamut
-            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
+            __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
             const int ridx = _mm_extract_epi16(v, 0);
             const int gidx = _mm_extract_epi16(v, 2);
             const int bidx = _mm_extract_epi16(v, 4);
@@ -1035,7 +1035,7 @@ static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer,
     for (qsizetype i = 0; i < len; ++i) {
         const int a = getAlpha<S>(src[i]);
         float32x4_t vf = vld1q_f32(&buffer[i].x);
-        uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f)));
+        uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)));
         const int ridx = vgetq_lane_u32(v, 0);
         const int gidx = vgetq_lane_u32(v, 1);
         const int bidx = vgetq_lane_u32(v, 2);
@@ -1079,7 +1079,7 @@ static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffe
     for (qsizetype i = 0; i < len; ++i) {
         const int a = getAlpha<S>(src[i]);
         float32x4_t vf = vld1q_f32(&buffer[i].x);
-        uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f))));
+        uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))));
         const int ridx = vget_lane_u16(v, 0);
         const int gidx = vget_lane_u16(v, 1);
         const int bidx = vget_lane_u16(v, 2);
@@ -1097,7 +1097,7 @@ static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len,
     constexpr bool isARGB = isArgb<T>();
     for (qsizetype i = 0; i < len; ++i) {
         float32x4_t vf = vld1q_f32(&buffer[i].x);
-        uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, 4080.f), vdupq_n_f32(0.5f))));
+        uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))));
         const int ridx = vget_lane_u16(v, 0);
         const int gidx = vget_lane_u16(v, 1);
         const int bidx = vget_lane_u16(v, 2);
@@ -1114,9 +1114,9 @@ static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *b
     for (qsizetype i = 0; i < len; ++i) {
         const int a = qAlpha(src[i]);
         const float fa = a / (255.0f * 256.0f);
-        const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)];
-        const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)];
-        const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)];
+        const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)];
+        const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)];
+        const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)];
         dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
     }
 }
@@ -1150,9 +1150,9 @@ static void storePremultiplied(QRgba64 *dst, const S *src, const QColorVector *b
     for (qsizetype i = 0; i < len; ++i) {
         const int a = getAlphaF(src[i]) * 65535.f;
         const float fa = a / (255.0f * 256.0f);
-        const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)];
-        const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)];
-        const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)];
+        const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)];
+        const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)];
+        const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)];
         dst[i] = qRgba64(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
     }
 }
diff --git a/src/gui/painting/qcolortrclut.cpp b/src/gui/painting/qcolortrclut.cpp
index 6f1cacea75..8a7673bc00 100644
--- a/src/gui/painting/qcolortrclut.cpp
+++ b/src/gui/painting/qcolortrclut.cpp
@@ -13,43 +13,80 @@ std::shared_ptr<QColorTrcLut> QColorTrcLut::create()
     return std::make_shared<Access>();
 }
 
-std::shared_ptr<QColorTrcLut> QColorTrcLut::fromGamma(qreal gamma)
+std::shared_ptr<QColorTrcLut> QColorTrcLut::fromGamma(qreal gamma, Direction dir)
 {
     auto cp = create();
+    cp->setFromGamma(gamma, dir);
+    return cp;
+}
 
-    for (int i = 0; i <= (255 * 16); ++i) {
-        cp->m_toLinear[i] = ushort(qRound(qPow(i / qreal(255 * 16), gamma) * (255 * 256)));
-        cp->m_fromLinear[i] = ushort(qRound(qPow(i / qreal(255 * 16), qreal(1) / gamma) * (255 * 256)));
-    }
-
+std::shared_ptr<QColorTrcLut> QColorTrcLut::fromTransferFunction(const QColorTransferFunction &fun, Direction dir)
+{
+    auto cp = create();
+    cp->setFromTransferFunction(fun, dir);
     return cp;
 }
 
-std::shared_ptr<QColorTrcLut> QColorTrcLut::fromTransferFunction(const QColorTransferFunction &fun)
+std::shared_ptr<QColorTrcLut> QColorTrcLut::fromTransferTable(const QColorTransferTable &table, Direction dir)
 {
     auto cp = create();
-    QColorTransferFunction inv = fun.inverted();
+    cp->setFromTransferTable(table, dir);
+    return cp;
+}
 
-    for (int i = 0; i <= (255 * 16); ++i) {
-        cp->m_toLinear[i] = ushort(qRound(fun.apply(i / qreal(255 * 16)) * (255 * 256)));
-        cp->m_fromLinear[i] = ushort(qRound(inv.apply(i / qreal(255 * 16)) * (255 * 256)));
+void QColorTrcLut::setFromGamma(qreal gamma, Direction dir)
+{
+    if (dir & ToLinear) {
+        if (!m_toLinear)
+            m_toLinear.reset(new ushort[Resolution + 1]);
+        for (int i = 0; i <= Resolution; ++i)
+            m_toLinear[i] = ushort(qRound(qPow(i / qreal(Resolution), gamma) * (255 * 256)));
     }
 
-    return cp;
+    if (dir & FromLinear) {
+        if (!m_fromLinear)
+            m_fromLinear.reset(new ushort[Resolution + 1]);
+        for (int i = 0; i <= Resolution; ++i)
+            m_fromLinear[i] = ushort(qRound(qPow(i / qreal(Resolution), qreal(1) / gamma) * (255 * 256)));
+    }
 }
 
-std::shared_ptr<QColorTrcLut> QColorTrcLut::fromTransferTable(const QColorTransferTable &table)
+void QColorTrcLut::setFromTransferFunction(const QColorTransferFunction &fun, Direction dir)
 {
-    auto cp = create();
+    if (dir & ToLinear) {
+        if (!m_toLinear)
+            m_toLinear.reset(new ushort[Resolution + 1]);
+        for (int i = 0; i <= Resolution; ++i)
+            m_toLinear[i] = ushort(qRound(fun.apply(i / qreal(Resolution)) * (255 * 256)));
+    }
 
-    float minInverse = 0.0f;
-    for (int i = 0; i <= (255 * 16); ++i) {
-        cp->m_toLinear[i] = ushort(qBound(0, qRound(table.apply(i / qreal(255 * 16)) * (255 * 256)), 65280));
-        minInverse = table.applyInverse(i / qreal(255 * 16), minInverse);
-        cp->m_fromLinear[i] = ushort(qBound(0, qRound(minInverse * (255 * 256)), 65280));
+    if (dir & FromLinear) {
+        if (!m_fromLinear)
+            m_fromLinear.reset(new ushort[Resolution + 1]);
+        QColorTransferFunction inv = fun.inverted();
+        for (int i = 0; i <= Resolution; ++i)
+            m_fromLinear[i] = ushort(qRound(inv.apply(i / qreal(Resolution)) * (255 * 256)));
     }
+}
 
-    return cp;
+void QColorTrcLut::setFromTransferTable(const QColorTransferTable &table, Direction dir)
+{
+    if (dir & ToLinear) {
+        if (!m_toLinear)
+            m_toLinear.reset(new ushort[Resolution + 1]);
+        for (int i = 0; i <= Resolution; ++i)
+            m_toLinear[i] = ushort(qBound(0, qRound(table.apply(i / qreal(Resolution)) * (255 * 256)), 65280));
+    }
+
+    if (dir & FromLinear) {
+        if (!m_fromLinear)
+            m_fromLinear.reset(new ushort[Resolution + 1]);
+        float minInverse = 0.0f;
+        for (int i = 0; i <= Resolution; ++i) {
+            minInverse = table.applyInverse(i / qreal(Resolution), minInverse);
+            m_fromLinear[i] = ushort(qBound(0, qRound(minInverse * (255 * 256)), 65280));
+        }
+    }
 }
 
 QT_END_NAMESPACE
diff --git a/src/gui/painting/qcolortrclut_p.h b/src/gui/painting/qcolortrclut_p.h
index c6b73d9f69..3ebab42809 100644
--- a/src/gui/painting/qcolortrclut_p.h
+++ b/src/gui/painting/qcolortrclut_p.h
@@ -36,9 +36,22 @@ class QColorTransferTable;
 class Q_GUI_EXPORT QColorTrcLut
 {
 public:
-    static std::shared_ptr<QColorTrcLut> fromGamma(qreal gamma);
-    static std::shared_ptr<QColorTrcLut> fromTransferFunction(const QColorTransferFunction &transfn);
-    static std::shared_ptr<QColorTrcLut> fromTransferTable(const QColorTransferTable &transTable);
+    static constexpr uint32_t ShiftUp = 4;                         // Amount to shift up from 1->255
+    static constexpr uint32_t ShiftDown = (8 - ShiftUp);           // Amount to shift down from 1->65280
+    static constexpr qsizetype Resolution = (1 << ShiftUp) * 255;  // Number of entries in table
+
+    enum Direction {
+        ToLinear = 1,
+        FromLinear = 2,
+        BiLinear = ToLinear | FromLinear
+    };
+
+    static std::shared_ptr<QColorTrcLut> fromGamma(qreal gamma, Direction dir = BiLinear);
+    static std::shared_ptr<QColorTrcLut> fromTransferFunction(const QColorTransferFunction &transFn, Direction dir = BiLinear);
+    static std::shared_ptr<QColorTrcLut> fromTransferTable(const QColorTransferTable &transTable, Direction dir = BiLinear);
+    void setFromGamma(qreal gamma, Direction dir = BiLinear);
+    void setFromTransferFunction(const QColorTransferFunction &transFn, Direction dir = BiLinear);
+    void setFromTransferTable(const QColorTransferTable &transTable, Direction dir = BiLinear);
 
     // The following methods all convert opaque or unpremultiplied colors:
 
@@ -47,7 +60,7 @@ public:
 #if defined(__SSE2__)
         __m128i v = _mm_cvtsi32_si128(rgb32);
         v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
-        const __m128i vidx = _mm_slli_epi16(v, 4);
+        const __m128i vidx = _mm_slli_epi16(v, ShiftUp);
         const int ridx = _mm_extract_epi16(vidx, 2);
         const int gidx = _mm_extract_epi16(vidx, 1);
         const int bidx = _mm_extract_epi16(vidx, 0);
@@ -62,7 +75,7 @@ public:
 #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
         uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32));
         uint16x4_t v16 = vget_low_u16(vmovl_u8(v8));
-        const uint16x4_t vidx = vshl_n_u16(v16, 4);
+        const uint16x4_t vidx = vshl_n_u16(v16, ShiftUp);
         const int ridx = vget_lane_u16(vidx, 2);
         const int gidx = vget_lane_u16(vidx, 1);
         const int bidx = vget_lane_u16(vidx, 0);
@@ -73,9 +86,9 @@ public:
         v16 = vadd_u16(v16, vshr_n_u16(v16, 8));
         return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0));
 #else
-        uint r = m_toLinear[qRed(rgb32) << 4];
-        uint g = m_toLinear[qGreen(rgb32) << 4];
-        uint b = m_toLinear[qBlue(rgb32) << 4];
+        uint r = m_toLinear[qRed(rgb32) << ShiftUp];
+        uint g = m_toLinear[qGreen(rgb32) << ShiftUp];
+        uint b = m_toLinear[qBlue(rgb32) << ShiftUp];
         r = r + (r >> 8);
         g = g + (g >> 8);
         b = b + (b >> 8);
@@ -86,30 +99,30 @@ public:
 
     QRgb toLinear(QRgb rgb32) const
     {
-        return convertWithTable(rgb32, m_toLinear);
+        return convertWithTable(rgb32, m_toLinear.get());
     }
 
     QRgba64 toLinear(QRgba64 rgb64) const
     {
-        return convertWithTable(rgb64, m_toLinear);
+        return convertWithTable(rgb64, m_toLinear.get());
     }
 
     float u8ToLinearF32(int c) const
     {
-        ushort v = m_toLinear[c << 4];
+        ushort v = m_toLinear[c << ShiftUp];
         return v * (1.0f / (255*256));
     }
 
     float u16ToLinearF32(int c) const
     {
         c -= (c >> 8);
-        ushort v = m_toLinear[c >> 4];
+        ushort v = m_toLinear[c >> ShiftDown];
         return v * (1.0f / (255*256));
     }
 
     float toLinear(float f) const
     {
-        ushort v = m_toLinear[(int)(f * (255 * 16) + 0.5f)];
+        ushort v = m_toLinear[(int)(f * Resolution + 0.5f)];
         return v * (1.0f / (255*256));
     }
 
@@ -118,7 +131,7 @@ public:
 #if defined(__SSE2__)
         __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&rgb64));
         v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8));
-        const __m128i vidx = _mm_srli_epi16(v, 4);
+        const __m128i vidx = _mm_srli_epi16(v, ShiftDown);
         const int ridx = _mm_extract_epi16(vidx, 0);
         const int gidx = _mm_extract_epi16(vidx, 1);
         const int bidx = _mm_extract_epi16(vidx, 2);
@@ -132,7 +145,7 @@ public:
 #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
         uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
         v = vsub_u16(v, vshr_n_u16(v, 8));
-        const uint16x4_t vidx = vshr_n_u16(v, 4);
+        const uint16x4_t vidx = vshr_n_u16(v, ShiftDown);
         const int ridx = vget_lane_u16(vidx, 0);
         const int gidx = vget_lane_u16(vidx, 1);
         const int bidx = vget_lane_u16(vidx, 2);
@@ -151,56 +164,56 @@ public:
         g = g - (g >> 8);
         b = b - (b >> 8);
         a = (a + 0x80) >> 8;
-        r = (m_fromLinear[r >> 4] + 0x80) >> 8;
-        g = (m_fromLinear[g >> 4] + 0x80) >> 8;
-        b = (m_fromLinear[b >> 4] + 0x80) >> 8;
+        r = (m_fromLinear[r >> ShiftDown] + 0x80) >> 8;
+        g = (m_fromLinear[g >> ShiftDown] + 0x80) >> 8;
+        b = (m_fromLinear[b >> ShiftDown] + 0x80) >> 8;
         return (a << 24) | (r << 16) | (g << 8) | b;
 #endif
     }
 
     QRgb fromLinear(QRgb rgb32) const
     {
-        return convertWithTable(rgb32, m_fromLinear);
+        return convertWithTable(rgb32, m_fromLinear.get());
     }
 
     QRgba64 fromLinear(QRgba64 rgb64) const
     {
-        return convertWithTable(rgb64, m_fromLinear);
+        return convertWithTable(rgb64, m_fromLinear.get());
     }
 
     int u8FromLinearF32(float f) const
     {
-        ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
+        ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)];
         return (v + 0x80) >> 8;
     }
     int u16FromLinearF32(float f) const
     {
-        ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
+        ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)];
         return v + (v >> 8);
     }
     float fromLinear(float f) const
     {
-        ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
+        ushort v = m_fromLinear[(int)(f * Resolution + 0.5f)];
         return v * (1.0f / (255*256));
     }
 
     // We translate to 0-65280 (255*256) instead to 0-65535 to make simple
     // shifting an accurate conversion.
-    // We translate from 0-4080 (255*16) for the same speed up, and to keep
-    // the tables small enough to fit in most inner caches.
-    ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
-    ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
+    // We translate from 0->Resolution (4080 = 255*16) for the same speed up,
+    // and to keep the tables small enough to fit in most inner caches.
+    std::unique_ptr<ushort[]> m_toLinear; // [0->Resolution] -> [0-65280]
+    std::unique_ptr<ushort[]> m_fromLinear; // [0->Resolution] -> [0-65280]
 
 private:
-    QColorTrcLut() { } // force uninitialized members
+    QColorTrcLut() = default;
 
     static std::shared_ptr<QColorTrcLut> create();
 
     Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table)
     {
-        const int r = (table[qRed(rgb32) << 4] + 0x80) >> 8;
-        const int g = (table[qGreen(rgb32) << 4] + 0x80) >> 8;
-        const int b = (table[qBlue(rgb32) << 4] + 0x80) >> 8;
+        const int r = (table[qRed(rgb32) << ShiftUp] + 0x80) >> 8;
+        const int g = (table[qGreen(rgb32) << ShiftUp] + 0x80) >> 8;
+        const int b = (table[qBlue(rgb32) << ShiftUp] + 0x80) >> 8;
         return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b;
     }
     Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table)
@@ -208,7 +221,7 @@ private:
 #if defined(__SSE2__)
         __m128i v = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(&rgb64));
         v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8));
-        const __m128i vidx = _mm_srli_epi16(v, 4);
+        const __m128i vidx = _mm_srli_epi16(v, ShiftDown);
         const int ridx = _mm_extract_epi16(vidx, 2);
         const int gidx = _mm_extract_epi16(vidx, 1);
         const int bidx = _mm_extract_epi16(vidx, 0);
@@ -222,7 +235,7 @@ private:
 #elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
         uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
         v = vsub_u16(v, vshr_n_u16(v, 8));
-        const uint16x4_t vidx = vshr_n_u16(v, 4);
+        const uint16x4_t vidx = vshr_n_u16(v, ShiftDown);
         const int ridx = vget_lane_u16(vidx, 2);
         const int gidx = vget_lane_u16(vidx, 1);
         const int bidx = vget_lane_u16(vidx, 0);
@@ -238,9 +251,9 @@ private:
         r = r - (r >> 8);
         g = g - (g >> 8);
         b = b - (b >> 8);
-        r = table[r >> 4];
-        g = table[g >> 4];
-        b = table[b >> 4];
+        r = table[r >> ShiftDown];
+        g = table[g >> ShiftDown];
+        b = table[b >> ShiftDown];
         r = r + (r >> 8);
         g = g + (g >> 8);
         b = b + (b >> 8);
author	Allan Sandfeld Jensen <allan.jensen@qt.io>	2024-03-06 11:56:15 +0100
committer	Allan Sandfeld Jensen <allan.jensen@qt.io>	2024-04-05 18:40:47 +0200
commit	04e5b86f9e695e2ca4516179a214d2eff6a2157e (patch)
tree	198dd7bf3897d213f05786e4ef6905edd581adaf /src/gui/painting
parent	05b84673045a5f4432a6caa9bea08d8fba1e1a03 (diff)