summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLaszlo Agocs <laszlo.agocs@qt.io>2020-09-17 15:38:40 +0200
committerLaszlo Agocs <laszlo.agocs@qt.io>2020-09-18 09:49:54 +0200
commit8fe16fef283d281454d575829c2411ec923a5c8b (patch)
tree4bebd4a80c8d78776f14cc16ab4d79f16caa9dda /src
parent6f2c7469f86785e6ba81fe0280210ef7275099de (diff)
rhi: Expose compute threadgroup limits in ResourceLimits
As OpenGL ES and Vulkan ruin the day with the spec mandated minimum value for max threads per threadgroup being only 128, clients need a way to decide if their compute shader (local_size_*) is suitable for use at run time. Change-Id: I72b4fc97032406340623add82ea4d9544ebe9fdc Reviewed-by: Andy Nichols <andy.nichols@qt.io>
Diffstat (limited to 'src')
-rw-r--r--src/gui/rhi/qrhi.cpp26
-rw-r--r--src/gui/rhi/qrhi_p.h7
-rw-r--r--src/gui/rhi/qrhid3d11.cpp10
-rw-r--r--src/gui/rhi/qrhigles2.cpp34
-rw-r--r--src/gui/rhi/qrhigles2_p_p.h11
-rw-r--r--src/gui/rhi/qrhimetal.mm14
-rw-r--r--src/gui/rhi/qrhinull.cpp10
-rw-r--r--src/gui/rhi/qrhivulkan.cpp12
8 files changed, 123 insertions, 1 deletions
diff --git a/src/gui/rhi/qrhi.cpp b/src/gui/rhi/qrhi.cpp
index 06c3903b0c..794e6a6891 100644
--- a/src/gui/rhi/qrhi.cpp
+++ b/src/gui/rhi/qrhi.cpp
@@ -675,6 +675,32 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general")
frames (including the one that contains the readback) after which an
asynchronous texture or buffer readback is guaranteed to complete upon
\l{QRhi::beginFrame()}{starting a new frame}.
+
+ \value MaxThreadGroupsPerDimension The maximum number of compute
+ work/thread groups that can be dispatched. Effectively the maximum value
+ for the arguments of QRhiCommandBuffer::dispatch(). Typically 65535.
+
+ \value MaxThreadsPerThreadGroup The maximum number of invocations in a
+ single local work group, or in other terminology, the maximum number of
+ threads in a thread group. Effectively the maximum value for the product of
+ \c local_size_x, \c local_size_y, and \c local_size_z in the compute
+ shader. Typical values are 128, 256, 512, 1024, or 1536. Watch out that
+ both OpenGL ES and Vulkan specify only 128 as the minimum required limit
+ for implementations. While uncommon for Vulkan, some OpenGL ES 3.1
+ implementations for mobile/embedded devices only support the spec-mandated
+ minimum value.
+
+ \value MaxThreadGroupX The maximum size of a work/thread group in the X
+ dimension. Effectively the maximum value of \c local_size_x in the compute
+ shader. Typically 256 or 1024.
+
+ \value MaxThreadGroupY The maximum size of a work/thread group in the Y
+ dimension. Effectively the maximum value of \c local_size_y in the compute
+ shader. Typically 256 or 1024.
+
+ \value MaxThreadGroupZ The maximum size of a work/thread group in the Z
+ dimension. Effectively the maximum value of \c local_size_z in the compute
+ shader. Typically 64 or 256.
*/
/*!
diff --git a/src/gui/rhi/qrhi_p.h b/src/gui/rhi/qrhi_p.h
index 7d719fd218..1e3540fa1f 100644
--- a/src/gui/rhi/qrhi_p.h
+++ b/src/gui/rhi/qrhi_p.h
@@ -1490,7 +1490,12 @@ public:
TextureSizeMax,
MaxColorAttachments,
FramesInFlight,
- MaxAsyncReadbackFrames
+ MaxAsyncReadbackFrames,
+ MaxThreadGroupsPerDimension,
+ MaxThreadsPerThreadGroup,
+ MaxThreadGroupX,
+ MaxThreadGroupY,
+ MaxThreadGroupZ
};
~QRhi();
diff --git a/src/gui/rhi/qrhid3d11.cpp b/src/gui/rhi/qrhid3d11.cpp
index d5c32cde2c..be79b26b9c 100644
--- a/src/gui/rhi/qrhid3d11.cpp
+++ b/src/gui/rhi/qrhid3d11.cpp
@@ -554,6 +554,16 @@ int QRhiD3D11::resourceLimit(QRhi::ResourceLimit limit) const
return 1;
case QRhi::MaxAsyncReadbackFrames:
return 1;
+ case QRhi::MaxThreadGroupsPerDimension:
+ return D3D11_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION;
+ case QRhi::MaxThreadsPerThreadGroup:
+ return D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
+ case QRhi::MaxThreadGroupX:
+ return D3D11_CS_THREAD_GROUP_MAX_X;
+ case QRhi::MaxThreadGroupY:
+ return D3D11_CS_THREAD_GROUP_MAX_Y;
+ case QRhi::MaxThreadGroupZ:
+ return D3D11_CS_THREAD_GROUP_MAX_Z;
default:
Q_UNREACHABLE();
return 0;
diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp
index 72a8cc539b..4440182264 100644
--- a/src/gui/rhi/qrhigles2.cpp
+++ b/src/gui/rhi/qrhigles2.cpp
@@ -301,6 +301,18 @@ QT_BEGIN_NAMESPACE
#define GL_TEXTURE_2D_MULTISAMPLE 0x9100
#endif
+#ifndef GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
+#endif
+
+#ifndef GL_MAX_COMPUTE_WORK_GROUP_COUNT
+#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
+#endif
+
+#ifndef GL_MAX_COMPUTE_WORK_GROUP_SIZE
+#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
+#endif
+
/*!
Constructs a new QRhiGles2InitParams.
@@ -514,6 +526,18 @@ bool QRhiGles2::create(QRhi::Flags flags)
else
caps.compute = caps.ctxMajor > 4 || (caps.ctxMajor == 4 && caps.ctxMinor >= 3); // 4.3
+ if (caps.compute) {
+ f->glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &caps.maxThreadsPerThreadGroup);
+ GLint tgPerDim[3];
+ f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &tgPerDim[0]);
+ f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &tgPerDim[1]);
+ f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &tgPerDim[2]);
+ caps.maxThreadGroupsPerDimension = qMin(tgPerDim[0], qMin(tgPerDim[1], tgPerDim[2]));
+ f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &caps.maxThreadGroupsX);
+ f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &caps.maxThreadGroupsY);
+ f->glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &caps.maxThreadGroupsZ);
+ }
+
if (caps.gles)
caps.textureCompareMode = caps.ctxMajor >= 3; // ES 3.0
else
@@ -931,6 +955,16 @@ int QRhiGles2::resourceLimit(QRhi::ResourceLimit limit) const
return 1;
case QRhi::MaxAsyncReadbackFrames:
return 1;
+ case QRhi::MaxThreadGroupsPerDimension:
+ return caps.maxThreadGroupsPerDimension;
+ case QRhi::MaxThreadsPerThreadGroup:
+ return caps.maxThreadsPerThreadGroup;
+ case QRhi::MaxThreadGroupX:
+ return caps.maxThreadGroupsX;
+ case QRhi::MaxThreadGroupY:
+ return caps.maxThreadGroupsY;
+ case QRhi::MaxThreadGroupZ:
+ return caps.maxThreadGroupsZ;
default:
Q_UNREACHABLE();
return 0;
diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h
index 9392254a78..1410e6ac38 100644
--- a/src/gui/rhi/qrhigles2_p_p.h
+++ b/src/gui/rhi/qrhigles2_p_p.h
@@ -848,6 +848,12 @@ public:
ctxMinor(0),
maxTextureSize(2048),
maxDrawBuffers(4),
+ maxSamples(16),
+ maxThreadGroupsPerDimension(0),
+ maxThreadsPerThreadGroup(0),
+ maxThreadGroupsX(0),
+ maxThreadGroupsY(0),
+ maxThreadGroupsZ(0),
msaaRenderBuffer(false),
multisampledTexture(false),
npotTextureFull(true),
@@ -882,6 +888,11 @@ public:
int maxTextureSize;
int maxDrawBuffers;
int maxSamples;
+ int maxThreadGroupsPerDimension;
+ int maxThreadsPerThreadGroup;
+ int maxThreadGroupsX;
+ int maxThreadGroupsY;
+ int maxThreadGroupsZ;
// Multisample fb and blit are supported (GLES 3.0 or OpenGL 3.x). Not
// the same as multisample textures!
uint msaaRenderBuffer : 1;
diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm
index fbb2003fb2..2736377c78 100644
--- a/src/gui/rhi/qrhimetal.mm
+++ b/src/gui/rhi/qrhimetal.mm
@@ -587,6 +587,20 @@ int QRhiMetal::resourceLimit(QRhi::ResourceLimit limit) const
return QMTL_FRAMES_IN_FLIGHT;
case QRhi::MaxAsyncReadbackFrames:
return QMTL_FRAMES_IN_FLIGHT;
+ case QRhi::MaxThreadGroupsPerDimension:
+ return 65535;
+ case QRhi::MaxThreadsPerThreadGroup:
+ Q_FALLTHROUGH();
+ case QRhi::MaxThreadGroupX:
+ Q_FALLTHROUGH();
+ case QRhi::MaxThreadGroupY:
+ Q_FALLTHROUGH();
+ case QRhi::MaxThreadGroupZ:
+#if defined(Q_OS_MACOS)
+ return 1024;
+#else
+ return 512;
+#endif
default:
Q_UNREACHABLE();
return 0;
diff --git a/src/gui/rhi/qrhinull.cpp b/src/gui/rhi/qrhinull.cpp
index a81f885d5a..48fca4e212 100644
--- a/src/gui/rhi/qrhinull.cpp
+++ b/src/gui/rhi/qrhinull.cpp
@@ -149,6 +149,16 @@ int QRhiNull::resourceLimit(QRhi::ResourceLimit limit) const
return 1;
case QRhi::MaxAsyncReadbackFrames:
return 1;
+ case QRhi::MaxThreadGroupsPerDimension:
+ return 0;
+ case QRhi::MaxThreadsPerThreadGroup:
+ return 0;
+ case QRhi::MaxThreadGroupX:
+ return 0;
+ case QRhi::MaxThreadGroupY:
+ return 0;
+ case QRhi::MaxThreadGroupZ:
+ return 0;
default:
Q_UNREACHABLE();
return 0;
diff --git a/src/gui/rhi/qrhivulkan.cpp b/src/gui/rhi/qrhivulkan.cpp
index b172a8b16b..e4ad562805 100644
--- a/src/gui/rhi/qrhivulkan.cpp
+++ b/src/gui/rhi/qrhivulkan.cpp
@@ -4124,6 +4124,18 @@ int QRhiVulkan::resourceLimit(QRhi::ResourceLimit limit) const
return QVK_FRAMES_IN_FLIGHT;
case QRhi::MaxAsyncReadbackFrames:
return QVK_FRAMES_IN_FLIGHT;
+ case QRhi::MaxThreadGroupsPerDimension:
+ return int(qMin(physDevProperties.limits.maxComputeWorkGroupCount[0],
+ qMin(physDevProperties.limits.maxComputeWorkGroupCount[1],
+ physDevProperties.limits.maxComputeWorkGroupCount[2])));
+ case QRhi::MaxThreadsPerThreadGroup:
+ return int(physDevProperties.limits.maxComputeWorkGroupInvocations);
+ case QRhi::MaxThreadGroupX:
+ return int(physDevProperties.limits.maxComputeWorkGroupSize[0]);
+ case QRhi::MaxThreadGroupY:
+ return int(physDevProperties.limits.maxComputeWorkGroupSize[1]);
+ case QRhi::MaxThreadGroupZ:
+ return int(physDevProperties.limits.maxComputeWorkGroupSize[2]);
default:
Q_UNREACHABLE();
return 0;