summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtem Dyomin <artem.dyomin@qt.io>2023-07-13 14:32:37 +0200
committerQt Cherry-pick Bot <cherrypick_bot@qt-project.org>2023-08-07 16:28:34 +0000
commit4faa14cb00dd879e734fbd75d1966548accbc4b7 (patch)
treedd1d0de90b58f827d1ea66ff27973ff4c1505a63
parenteb57d3fecfb0ee2c0755c00a4af9cd22244e0b93 (diff)
Fix x11 surface capture alpha channel
We declare 0xFF alpha channel in formats XRGB, BGRX, etc, but on some linux systems it's 0xff in xImage. The patch fixes the problem and suggests SIMD optimizations for intel platforms that makes a difference with 4K displays. With the optimizations, the overhead is not so big comparing with memcpy. We have a test that covers the case, it failed on some machines. Change-Id: I78927ea58159ae67bbf993f30b8dec45f617edc1 Reviewed-by: Lars Knoll <lars@knoll.priv.no> (cherry picked from commit 3ac997f0b34a28af2e754fccdb8671d89ebe22d9) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
-rw-r--r--src/multimedia/video/qvideoframeconversionhelper.cpp75
-rw-r--r--src/multimedia/video/qvideoframeconversionhelper_avx2.cpp37
-rw-r--r--src/multimedia/video/qvideoframeconversionhelper_p.h11
-rw-r--r--src/multimedia/video/qvideoframeconversionhelper_sse2.cpp38
-rw-r--r--src/plugins/multimedia/ffmpeg/qx11surfacecapture.cpp13
5 files changed, 169 insertions, 5 deletions
diff --git a/src/multimedia/video/qvideoframeconversionhelper.cpp b/src/multimedia/video/qvideoframeconversionhelper.cpp
index 30ca146e2..1b570b74f 100644
--- a/src/multimedia/video/qvideoframeconversionhelper.cpp
+++ b/src/multimedia/video/qvideoframeconversionhelper.cpp
@@ -462,6 +462,14 @@ static void QT_FASTCALL qt_convert_Y_to_ARGB32(const QVideoFrame &frame, uchar *
MERGE_LOOPS(width, height, stride, 1)
}
+template<typename Pixel>
+static void QT_FASTCALL qt_copy_pixels_with_mask(Pixel *dst, const Pixel *src, size_t size,
+ Pixel mask)
+{
+ for (size_t x = 0; x < size; ++x)
+ dst[x] = src[x] | mask;
+}
+
static VideoFrameConvertFunc qConvertFuncs[QVideoFrameFormat::NPixelFormats] = {
/* Format_Invalid */ nullptr, // Not needed
/* Format_ARGB8888 */ qt_convert_to_ARGB32<ARGB8888>,
@@ -494,13 +502,19 @@ static VideoFrameConvertFunc qConvertFuncs[QVideoFrameFormat::NPixelFormats] = {
/* Format_Jpeg */ nullptr, // Not needed
};
-static void qInitConvertFuncsAsm()
+static PixelsCopyFunc qPixelsCopyFunc = qt_copy_pixels_with_mask<uint32_t>;
+
+static std::once_flag InitFuncsAsmFlag;
+
+static void qInitFuncsAsm()
{
#ifdef QT_COMPILER_SUPPORTS_SSE2
extern void QT_FASTCALL qt_convert_ARGB8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output);
extern void QT_FASTCALL qt_convert_ABGR8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output);
extern void QT_FASTCALL qt_convert_RGBA8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output);
extern void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_sse2(const QVideoFrame &frame, uchar *output);
+ extern void QT_FASTCALL qt_copy_pixels_with_mask_sse2(uint32_t * dst, const uint32_t *src, size_t size, uint32_t mask);
+
if (qCpuHasFeature(SSE2)){
qConvertFuncs[QVideoFrameFormat::Format_ARGB8888] = qt_convert_ARGB8888_to_ARGB32_sse2;
qConvertFuncs[QVideoFrameFormat::Format_ARGB8888_Premultiplied] = qt_convert_ARGB8888_to_ARGB32_sse2;
@@ -512,6 +526,8 @@ static void qInitConvertFuncsAsm()
qConvertFuncs[QVideoFrameFormat::Format_XBGR8888] = qt_convert_ABGR8888_to_ARGB32_sse2;
qConvertFuncs[QVideoFrameFormat::Format_RGBA8888] = qt_convert_RGBA8888_to_ARGB32_sse2;
qConvertFuncs[QVideoFrameFormat::Format_RGBX8888] = qt_convert_RGBA8888_to_ARGB32_sse2;
+
+ qPixelsCopyFunc = qt_copy_pixels_with_mask_sse2;
}
#endif
#ifdef QT_COMPILER_SUPPORTS_SSSE3
@@ -537,6 +553,7 @@ static void qInitConvertFuncsAsm()
extern void QT_FASTCALL qt_convert_ABGR8888_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output);
extern void QT_FASTCALL qt_convert_RGBA8888_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output);
extern void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_avx2(const QVideoFrame &frame, uchar *output);
+ extern void QT_FASTCALL qt_copy_pixels_with_mask_avx2(uint32_t * dst, const uint32_t *src, size_t size, uint32_t mask);
if (qCpuHasFeature(AVX2)){
qConvertFuncs[QVideoFrameFormat::Format_ARGB8888] = qt_convert_ARGB8888_to_ARGB32_avx2;
qConvertFuncs[QVideoFrameFormat::Format_ARGB8888_Premultiplied] = qt_convert_ARGB8888_to_ARGB32_avx2;
@@ -548,18 +565,70 @@ static void qInitConvertFuncsAsm()
qConvertFuncs[QVideoFrameFormat::Format_XBGR8888] = qt_convert_ABGR8888_to_ARGB32_avx2;
qConvertFuncs[QVideoFrameFormat::Format_RGBA8888] = qt_convert_RGBA8888_to_ARGB32_avx2;
qConvertFuncs[QVideoFrameFormat::Format_RGBX8888] = qt_convert_RGBA8888_to_ARGB32_avx2;
+
+ qPixelsCopyFunc = qt_copy_pixels_with_mask_avx2;
}
#endif
}
VideoFrameConvertFunc qConverterForFormat(QVideoFrameFormat::PixelFormat format)
{
- static std::once_flag once;
- std::call_once(once, &qInitConvertFuncsAsm);
+ std::call_once(InitFuncsAsmFlag, &qInitFuncsAsm);
VideoFrameConvertFunc convert = qConvertFuncs[format];
return convert;
}
+void Q_MULTIMEDIA_EXPORT qCopyPixelsWithAlphaMask(uint32_t *dst,
+ const uint32_t *src,
+ size_t pixCount,
+ QVideoFrameFormat::PixelFormat format,
+ bool srcAlphaVaries)
+{
+ if (pixCount == 0)
+ return;
+
+ const auto mask = qAlphaMask(format);
+
+ if (srcAlphaVaries || (src[0] & mask) != mask)
+ qCopyPixelsWithMask(dst, src, pixCount, mask);
+ else
+ memcpy(dst, src, pixCount * 4);
+}
+
+void qCopyPixelsWithMask(uint32_t *dst, const uint32_t *src, size_t size, uint32_t mask)
+{
+ std::call_once(InitFuncsAsmFlag, &qInitFuncsAsm);
+
+ qPixelsCopyFunc(dst, src, size, mask);
+}
+
+uint32_t qAlphaMask(QVideoFrameFormat::PixelFormat format)
+{
+ switch (format) {
+ case QVideoFrameFormat::Format_ARGB8888:
+ case QVideoFrameFormat::Format_ARGB8888_Premultiplied:
+ case QVideoFrameFormat::Format_XRGB8888:
+ case QVideoFrameFormat::Format_ABGR8888:
+ case QVideoFrameFormat::Format_XBGR8888:
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ return 0xff;
+#else
+ return 0xff000000;
+#endif
+ case QVideoFrameFormat::Format_BGRA8888:
+ case QVideoFrameFormat::Format_BGRA8888_Premultiplied:
+ case QVideoFrameFormat::Format_BGRX8888:
+ case QVideoFrameFormat::Format_RGBA8888:
+ case QVideoFrameFormat::Format_RGBX8888:
+#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
+ return 0xff000000;
+#else
+ return 0xff;
+#endif
+ default:
+ return 0;
+ }
+}
QT_END_NAMESPACE
diff --git a/src/multimedia/video/qvideoframeconversionhelper_avx2.cpp b/src/multimedia/video/qvideoframeconversionhelper_avx2.cpp
index 5ae1e41ae..2991f00c6 100644
--- a/src/multimedia/video/qvideoframeconversionhelper_avx2.cpp
+++ b/src/multimedia/video/qvideoframeconversionhelper_avx2.cpp
@@ -93,6 +93,43 @@ void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_avx2(const QVideoFrame &frame, uc
convert_to_ARGB32_avx2<3, 2, 1, 0>(frame, output);
}
+void QT_FASTCALL qt_copy_pixels_with_mask_avx2(uint32_t *dst, const uint32_t *src, size_t size, uint32_t mask)
+{
+ const auto mask256 = _mm256_set_epi32(mask, mask, mask, mask, mask, mask, mask, mask);
+
+ size_t x = 0;
+
+ ALIGN(32, dst, x, size)
+ *(dst++) = *(src++) | mask;
+
+ for (; x < size - (8 * 4 + 1); x += 8 * 4) {
+ const auto srcData1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
+ const auto srcData2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src += 8));
+ const auto srcData3 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src += 8));
+ const auto srcData4 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src += 8));
+
+ _mm256_store_si256(reinterpret_cast<__m256i *>(dst), _mm256_or_si256(srcData1, mask256));
+ _mm256_store_si256(reinterpret_cast<__m256i *>(dst += 8), _mm256_or_si256(srcData2, mask256));
+ _mm256_store_si256(reinterpret_cast<__m256i *>(dst += 8), _mm256_or_si256(srcData3, mask256));
+ _mm256_store_si256(reinterpret_cast<__m256i *>(dst += 8), _mm256_or_si256(srcData4, mask256));
+
+ src += 8;
+ dst += 8;
+ }
+
+ // leftovers
+ for (; x < size - 7; x += 8) {
+ const auto srcData = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
+ _mm256_store_si256(reinterpret_cast<__m256i *>(dst), _mm256_or_si256(srcData, mask256));
+
+ src += 8;
+ dst += 8;
+ }
+
+ for (; x < size; ++x)
+ *(dst++) = *(src++) | mask;
+}
+
QT_END_NAMESPACE
#endif
diff --git a/src/multimedia/video/qvideoframeconversionhelper_p.h b/src/multimedia/video/qvideoframeconversionhelper_p.h
index b7dd3023a..6b540840e 100644
--- a/src/multimedia/video/qvideoframeconversionhelper_p.h
+++ b/src/multimedia/video/qvideoframeconversionhelper_p.h
@@ -22,9 +22,20 @@ QT_BEGIN_NAMESPACE
// Converts to RGB32 or ARGB32_Premultiplied
typedef void (QT_FASTCALL *VideoFrameConvertFunc)(const QVideoFrame &frame, uchar *output);
+typedef void(QT_FASTCALL *PixelsCopyFunc)(uint32_t *dst, const uint32_t *src, size_t size, uint32_t mask);
VideoFrameConvertFunc qConverterForFormat(QVideoFrameFormat::PixelFormat format);
+void Q_MULTIMEDIA_EXPORT qCopyPixelsWithAlphaMask(uint32_t *dst,
+ const uint32_t *src,
+ size_t size,
+ QVideoFrameFormat::PixelFormat format,
+ bool srcAlphaVaries);
+
+void Q_MULTIMEDIA_EXPORT qCopyPixelsWithMask(uint32_t *dst, const uint32_t *src, size_t size, uint32_t mask);
+
+uint32_t Q_MULTIMEDIA_EXPORT qAlphaMask(QVideoFrameFormat::PixelFormat format);
+
template<int a, int r, int g, int b>
struct ArgbPixel
{
diff --git a/src/multimedia/video/qvideoframeconversionhelper_sse2.cpp b/src/multimedia/video/qvideoframeconversionhelper_sse2.cpp
index c856af9a0..11037c911 100644
--- a/src/multimedia/video/qvideoframeconversionhelper_sse2.cpp
+++ b/src/multimedia/video/qvideoframeconversionhelper_sse2.cpp
@@ -80,6 +80,44 @@ void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_sse2(const QVideoFrame &frame, uc
convert_to_ARGB32_sse2<3, 2, 1, 0>(frame, output);
}
+void QT_FASTCALL qt_copy_pixels_with_mask_sse2(uint32_t *dst, const uint32_t *src, size_t size, uint32_t mask)
+{
+ const auto mask128 = _mm_set_epi32(mask, mask, mask, mask);
+
+ size_t x = 0;
+
+ ALIGN(16, dst, x, size)
+ *(dst++) = *(src++) | mask;
+
+ for (; x < size - (4 * 4 - 1); x += 4 * 4) {
+ const auto srcData0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+ const auto srcData1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src += 4));
+ const auto srcData2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src += 4));
+ const auto srcData3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src += 4));
+
+ _mm_store_si128(reinterpret_cast<__m128i *>(dst), _mm_or_si128(srcData0, mask128));
+ _mm_store_si128(reinterpret_cast<__m128i *>(dst += 4), _mm_or_si128(srcData1, mask128));
+ _mm_store_si128(reinterpret_cast<__m128i *>(dst += 4), _mm_or_si128(srcData2, mask128));
+ _mm_store_si128(reinterpret_cast<__m128i *>(dst += 4), _mm_or_si128(srcData3, mask128));
+
+ src += 4;
+ dst += 4;
+ }
+
+ for (; x < size - 3; x += 4) {
+ const auto srcData = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+
+ _mm_store_si128(reinterpret_cast<__m128i *>(dst), _mm_or_si128(srcData, mask128));
+
+ src += 4;
+ dst += 4;
+ }
+
+ // leftovers
+ for (; x < size; ++x)
+ *(dst++) = *(src++) | mask;
+}
+
QT_END_NAMESPACE
#endif
diff --git a/src/plugins/multimedia/ffmpeg/qx11surfacecapture.cpp b/src/plugins/multimedia/ffmpeg/qx11surfacecapture.cpp
index 248722ef1..81adfea33 100644
--- a/src/plugins/multimedia/ffmpeg/qx11surfacecapture.cpp
+++ b/src/plugins/multimedia/ffmpeg/qx11surfacecapture.cpp
@@ -14,6 +14,7 @@
#include "private/qabstractvideobuffer_p.h"
#include "private/qcapturablewindow_p.h"
#include "private/qmemoryvideobuffer_p.h"
+#include "private/qvideoframeconversionhelper_p.h"
#include <X11/Xlib.h>
#include <sys/shm.h>
@@ -265,9 +266,17 @@ protected:
return {};
}
- const QByteArray data(m_xImage->data, m_xImage->bytes_per_line * m_xImage->height);
- auto buffer = new QMemoryVideoBuffer(data, m_xImage->bytes_per_line);
+ QByteArray data(m_xImage->bytes_per_line * m_xImage->height, Qt::Uninitialized);
+
+ const auto pixelSrc = reinterpret_cast<const uint32_t *>(m_xImage->data);
+ const auto pixelDst = reinterpret_cast<uint32_t *>(data.data());
+ const auto pixelCount = data.size() / 4;
+ const auto xImageAlphaVaries = false; // In known cases it doesn't vary - it's 0xff or 0xff
+ qCopyPixelsWithAlphaMask(pixelDst, pixelSrc, pixelCount, m_format.pixelFormat(),
+ xImageAlphaVaries);
+
+ auto buffer = new QMemoryVideoBuffer(data, m_xImage->bytes_per_line);
return QVideoFrame(buffer, m_format);
}