diff options
author | Laszlo Agocs <laszlo.agocs@qt.io> | 2019-06-12 14:22:33 +0200 |
---|---|---|
committer | Laszlo Agocs <laszlo.agocs@qt.io> | 2019-06-17 10:32:57 +0200 |
commit | 6f4aa5413183f3f18dd1b15dbc90bcee9ef85bdd (patch) | |
tree | 4e0d3d98de98f7a77cc9c52d4b11682093d94958 /src/gui | |
parent | 4c297bdca8da543c582d129f12413d29a2a520eb (diff) |
rhi: Add compute api and implement for Vulkan and Metal
D3D11 and GL (4.3+, ES 3.1+) will come separately at a
later time.
Change-Id: If30f2f3d062fa27e57e9912674669225b82a7b93
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'src/gui')
-rw-r--r-- | src/gui/rhi/qrhi.cpp | 555 | ||||
-rw-r--r-- | src/gui/rhi/qrhi_p.h | 86 | ||||
-rw-r--r-- | src/gui/rhi/qrhi_p_p.h | 43 | ||||
-rw-r--r-- | src/gui/rhi/qrhid3d11.cpp | 132 | ||||
-rw-r--r-- | src/gui/rhi/qrhid3d11_p_p.h | 25 | ||||
-rw-r--r-- | src/gui/rhi/qrhigles2.cpp | 135 | ||||
-rw-r--r-- | src/gui/rhi/qrhigles2_p_p.h | 24 | ||||
-rw-r--r-- | src/gui/rhi/qrhimetal.mm | 533 | ||||
-rw-r--r-- | src/gui/rhi/qrhimetal_p_p.h | 47 | ||||
-rw-r--r-- | src/gui/rhi/qrhinull.cpp | 50 | ||||
-rw-r--r-- | src/gui/rhi/qrhinull_p_p.h | 14 | ||||
-rw-r--r-- | src/gui/rhi/qrhivulkan.cpp | 571 | ||||
-rw-r--r-- | src/gui/rhi/qrhivulkan_p_p.h | 71 | ||||
-rw-r--r-- | src/gui/rhi/qshaderdescription.cpp | 29 | ||||
-rw-r--r-- | src/gui/rhi/qshaderdescription_p.h | 3 | ||||
-rw-r--r-- | src/gui/rhi/qshaderdescription_p_p.h | 5 |
16 files changed, 1902 insertions, 421 deletions
diff --git a/src/gui/rhi/qrhi.cpp b/src/gui/rhi/qrhi.cpp index f599f16d21..dbad63c6d1 100644 --- a/src/gui/rhi/qrhi.cpp +++ b/src/gui/rhi/qrhi.cpp @@ -266,6 +266,18 @@ QT_BEGIN_NAMESPACE transitions. Such synchronization is done implicitly by the backends, where applicable (for example, Vulkan), by tracking resource usage as necessary. + \note Resources within a render or compute pass are expected to be bound to + a single usage during that pass. For example, a buffer can be used as + vertex, index, uniform, or storage buffer, but not a combination of them + within a single pass. However, it is perfectly fine to use a buffer as a + storage buffer in a compute pass, and then as a vertex buffer in a render + pass, for example, assuming the buffer declared both usages upon creation. + + \note Textures have this rule relaxed in certain cases, because using two + subresources (typically two different mip levels) of the same texture for + different access (one for load, one for store) is supported even within the + same pass. + \section3 Resource reuse From the user's point of view a QRhiResource is reusable immediately after @@ -481,6 +493,8 @@ QT_BEGIN_NAMESPACE when running on plain OpenGL ES 2.0 implementations without the necessary extension. When false, only 16-bit unsigned elements are supported in the index buffer. + + \value Compute Indicates that compute shaders are supported. */ /*! @@ -1131,21 +1145,22 @@ QDebug operator<<(QDebug dbg, const QRhiVertexInputLayout &v) #endif /*! - \class QRhiGraphicsShaderStage + \class QRhiShaderStage \inmodule QtRhi - \brief Specifies the type and the shader code for a shader stage in the graphics pipeline. + \brief Specifies the type and the shader code for a shader stage in the pipeline. */ /*! - \enum QRhiGraphicsShaderStage::Type + \enum QRhiShaderStage::Type Specifies the type of the shader stage. \value Vertex Vertex stage \value Fragment Fragment (pixel) stage + \value Compute Compute stage (this may not always be supported at run time) */ /*! - \fn QRhiGraphicsShaderStage::QRhiGraphicsShaderStage() + \fn QRhiShaderStage::QRhiShaderStage() Constructs a shader stage description for the vertex stage with an empty QShader. @@ -1160,7 +1175,7 @@ QDebug operator<<(QDebug dbg, const QRhiVertexInputLayout &v) In addition, it can also contain variants of the shader with slightly modified code. \a v can then be used to select the desired variant. */ -QRhiGraphicsShaderStage::QRhiGraphicsShaderStage(Type type, const QShader &shader, QShader::Variant v) +QRhiShaderStage::QRhiShaderStage(Type type, const QShader &shader, QShader::Variant v) : m_type(type), m_shader(shader), m_shaderVariant(v) @@ -1168,12 +1183,12 @@ QRhiGraphicsShaderStage::QRhiGraphicsShaderStage(Type type, const QShader &shade } /*! - \return \c true if the values in the two QRhiGraphicsShaderStage objects + \return \c true if the values in the two QRhiShaderStage objects \a a and \a b are equal. - \relates QRhiGraphicsShaderStage + \relates QRhiShaderStage */ -bool operator==(const QRhiGraphicsShaderStage &a, const QRhiGraphicsShaderStage &b) Q_DECL_NOTHROW +bool operator==(const QRhiShaderStage &a, const QRhiShaderStage &b) Q_DECL_NOTHROW { return a.type() == b.type() && a.shader() == b.shader() @@ -1181,12 +1196,12 @@ bool operator==(const QRhiGraphicsShaderStage &a, const QRhiGraphicsShaderStage } /*! - \return \c false if the values in the two QRhiGraphicsShaderStage + \return \c false if the values in the two QRhiShaderStage objects \a a and \a b are equal; otherwise returns \c true. - \relates QRhiGraphicsShaderStage + \relates QRhiShaderStage */ -bool operator!=(const QRhiGraphicsShaderStage &a, const QRhiGraphicsShaderStage &b) Q_DECL_NOTHROW +bool operator!=(const QRhiShaderStage &a, const QRhiShaderStage &b) Q_DECL_NOTHROW { return !(a == b); } @@ -1194,18 +1209,18 @@ bool operator!=(const QRhiGraphicsShaderStage &a, const QRhiGraphicsShaderStage /*! \return the hash value for \a v, using \a seed to seed the calculation. - \relates QRhiGraphicsShaderStage + \relates QRhiShaderStage */ -uint qHash(const QRhiGraphicsShaderStage &v, uint seed) Q_DECL_NOTHROW +uint qHash(const QRhiShaderStage &v, uint seed) Q_DECL_NOTHROW { return v.type() + qHash(v.shader(), seed) + v.shaderVariant(); } #ifndef QT_NO_DEBUG_STREAM -QDebug operator<<(QDebug dbg, const QRhiGraphicsShaderStage &s) +QDebug operator<<(QDebug dbg, const QRhiShaderStage &s) { QDebugStateSaver saver(dbg); - dbg.nospace() << "QRhiGraphicsShaderStage(type=" << s.type() + dbg.nospace() << "QRhiShaderStage(type=" << s.type() << " shader=" << s.shader() << " variant=" << s.shaderVariant() << ')'; @@ -1781,9 +1796,25 @@ quint64 QRhiResource::globalResourceId() const \enum QRhiBuffer::UsageFlag Flag values to specify how the buffer is going to be used. - \value VertexBuffer Vertex buffer - \value IndexBuffer Index buffer - \value UniformBuffer Uniform (constant) buffer + \value VertexBuffer Vertex buffer. This allows the QRhiBuffer to be used in + \l{setVertexInput()}{QRhiCommandBuffer::setVertexInput()}. + + \value IndexBuffer Index buffer. This allows the QRhiBuffer to be used in + \l{setVertexInput()}{QRhiCommandBuffer::setVertexInput()}. + + \value UniformBuffer Uniform buffer (also called constant buffer). This + allows the QRhiBuffer to be used in combination with + \l{UniformBuffer}{QRhiShaderResourceBinding::UniformBuffer}. When + \l{QRhi::NonDynamicUniformBuffers}{NonDynamicUniformBuffers} is reported as + not supported, this usage can only be combined with the type Dynamic. + + \value StorageBuffer Storage buffer. This allows the QRhiBuffer to be used + in combination with \l{BufferLoad}{QRhiShaderResourceBinding::BufferLoad}, + \l{BufferStore}{QRhiShaderResourceBinding::BufferStore}, or + \l{BufferLoadStore}{QRhiShaderResourceBinding::BufferLoadStore}. This usage + can only be combined with the types Immutable or Static, and is only + available when the \l{QRhi::Compute}{Compute feature} is reported as + supported. */ /*! @@ -1941,6 +1972,9 @@ QRhiResource::Type QRhiRenderBuffer::resourceType() const \value UsedWithGenerateMips The texture is going to be used with QRhiResourceUpdateBatch::generateMips(). + + \value UsedWithLoadStore The texture is going to be used with image + load/store operations, for example, in a compute shader. */ /*! @@ -2438,7 +2472,26 @@ bool QRhiShaderResourceBindings::isLayoutCompatible(const QRhiShaderResourceBind Specifies type of the shader resource bound to a binding point \value UniformBuffer Uniform buffer + \value SampledTexture Combined image sampler + + \value ImageLoad Image load (with GLSL this maps to doing imageLoad() on a + single level - and either one or all layers - of a texture exposed to the + shader as an image object) + + \value ImageStore Image store (with GLSL this maps to doing imageStore() or + imageAtomic*() on a single level - and either one or all layers - of a + texture exposed to the shader as an image object) + + \value ImageLoadStore Image load and store + + \value BufferLoad Storage buffer store (with GLSL this maps to reading from + a shader storage buffer) + + \value BufferStore Storage buffer store (with GLSL this maps to writing to + a shader storage buffer) + + \value BufferLoadStore Storage buffer load and store */ /*! @@ -2447,6 +2500,7 @@ bool QRhiShaderResourceBindings::isLayoutCompatible(const QRhiShaderResourceBind \value VertexStage Vertex stage \value FragmentStage Fragment (pixel) stage + \value ComputeStage Compute stage */ /*! @@ -2513,6 +2567,8 @@ bool QRhiShaderResourceBinding::isLayoutCompatible(const QRhiShaderResourceBindi /*! \return a shader resource binding for the given binding number, pipeline stages, and buffer specified by \a binding, \a stage, and \a buf. + + \note \a buf must have been created with QRhiBuffer::UniformBuffer. */ QRhiShaderResourceBinding QRhiShaderResourceBinding::uniformBuffer( int binding, StageFlags stage, QRhiBuffer *buf) @@ -2539,21 +2595,17 @@ QRhiShaderResourceBinding QRhiShaderResourceBinding::uniformBuffer( QRhi::ubufAlignment(). \note \a size must be greater than 0. + + \note \a buf must have been created with QRhiBuffer::UniformBuffer. */ QRhiShaderResourceBinding QRhiShaderResourceBinding::uniformBuffer( int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size) { Q_ASSERT(size > 0); - QRhiShaderResourceBinding b; + QRhiShaderResourceBinding b = uniformBuffer(binding, stage, buf); QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); - Q_ASSERT(d->ref.load() == 1); - d->binding = binding; - d->stage = stage; - d->type = UniformBuffer; - d->u.ubuf.buf = buf; d->u.ubuf.offset = offset; d->u.ubuf.maybeSize = size; - d->u.ubuf.hasDynamicOffset = false; return b; } @@ -2565,19 +2617,14 @@ QRhiShaderResourceBinding QRhiShaderResourceBinding::uniformBuffer( varying offset values without creating new bindings for the buffer. The size of the bound region is specified by \a size. Like with non-dynamic offsets, \c{offset + size} cannot exceed the size of \a buf. + + \note \a buf must have been created with QRhiBuffer::UniformBuffer. */ QRhiShaderResourceBinding QRhiShaderResourceBinding::uniformBufferWithDynamicOffset( int binding, StageFlags stage, QRhiBuffer *buf, int size) { - QRhiShaderResourceBinding b; + QRhiShaderResourceBinding b = uniformBuffer(binding, stage, buf, 0, size); QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); - Q_ASSERT(d->ref.load() == 1); - d->binding = binding; - d->stage = stage; - d->type = UniformBuffer; - d->u.ubuf.buf = buf; - d->u.ubuf.offset = 0; - d->u.ubuf.maybeSize = size; d->u.ubuf.hasDynamicOffset = true; return b; } @@ -2602,6 +2649,167 @@ QRhiShaderResourceBinding QRhiShaderResourceBinding::sampledTexture( } /*! + \return a shader resource binding for a read-only storage image with the + given \a binding number and pipeline \a stage. The image load operations + will have access to all layers of the specified \a level. (so if the texture + is a cubemap, the shader must use imageCube instead of image2D) + + \note \a tex must have been created with QRhiTexture::UsedWithLoadStore. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::imageLoad( + int binding, StageFlags stage, QRhiTexture *tex, int level) +{ + QRhiShaderResourceBinding b; + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + Q_ASSERT(d->ref.load() == 1); + d->binding = binding; + d->stage = stage; + d->type = ImageLoad; + d->u.simage.tex = tex; + d->u.simage.level = level; + return b; +} + +/*! + \return a shader resource binding for a write-only storage image with the + given \a binding number and pipeline \a stage. The image store operations + will have access to all layers of the specified \a level. (so if the texture + is a cubemap, the shader must use imageCube instead of image2D) + + \note \a tex must have been created with QRhiTexture::UsedWithLoadStore. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::imageStore( + int binding, StageFlags stage, QRhiTexture *tex, int level) +{ + QRhiShaderResourceBinding b = imageLoad(binding, stage, tex, level); + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + d->type = ImageStore; + return b; +} + +/*! + \return a shader resource binding for a read/write storage image with the + given \a binding number and pipeline \a stage. The image load/store operations + will have access to all layers of the specified \a level. (so if the texture + is a cubemap, the shader must use imageCube instead of image2D) + + \note \a tex must have been created with QRhiTexture::UsedWithLoadStore. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::imageLoadStore( + int binding, StageFlags stage, QRhiTexture *tex, int level) +{ + QRhiShaderResourceBinding b = imageLoad(binding, stage, tex, level); + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + d->type = ImageLoadStore; + return b; +} + +/*! + \return a shader resource binding for a read-only storage buffer with the + given \a binding number and pipeline \a stage. + + \note \a buf must have been created with QRhiBuffer::StorageBuffer. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::bufferLoad( + int binding, StageFlags stage, QRhiBuffer *buf) +{ + QRhiShaderResourceBinding b; + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + Q_ASSERT(d->ref.load() == 1); + d->binding = binding; + d->stage = stage; + d->type = BufferLoad; + d->u.sbuf.buf = buf; + d->u.sbuf.offset = 0; + d->u.sbuf.maybeSize = 0; // entire buffer + return b; +} + +/*! + \return a shader resource binding for a read-only storage buffer with the + given \a binding number and pipeline \a stage. This overload binds a region + only, as specified by \a offset and \a size. + + \note \a buf must have been created with QRhiBuffer::StorageBuffer. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::bufferLoad( + int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size) +{ + Q_ASSERT(size > 0); + QRhiShaderResourceBinding b = bufferLoad(binding, stage, buf); + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + d->u.sbuf.offset = offset; + d->u.sbuf.maybeSize = size; + return b; +} + +/*! + \return a shader resource binding for a write-only storage buffer with the + given \a binding number and pipeline \a stage. + + \note \a buf must have been created with QRhiBuffer::StorageBuffer. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::bufferStore( + int binding, StageFlags stage, QRhiBuffer *buf) +{ + QRhiShaderResourceBinding b = bufferLoad(binding, stage, buf); + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + d->type = BufferStore; + return b; +} + +/*! + \return a shader resource binding for a write-only storage buffer with the + given \a binding number and pipeline \a stage. This overload binds a region + only, as specified by \a offset and \a size. + + \note \a buf must have been created with QRhiBuffer::StorageBuffer. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::bufferStore( + int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size) +{ + Q_ASSERT(size > 0); + QRhiShaderResourceBinding b = bufferStore(binding, stage, buf); + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + d->u.sbuf.offset = offset; + d->u.sbuf.maybeSize = size; + return b; +} + +/*! + \return a shader resource binding for a read-write storage buffer with the + given \a binding number and pipeline \a stage. + + \note \a buf must have been created with QRhiBuffer::StorageBuffer. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::bufferLoadStore( + int binding, StageFlags stage, QRhiBuffer *buf) +{ + QRhiShaderResourceBinding b = bufferLoad(binding, stage, buf); + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + d->type = BufferLoadStore; + return b; +} + +/*! + \return a shader resource binding for a read-write storage buffer with the + given \a binding number and pipeline \a stage. This overload binds a region + only, as specified by \a offset and \a size. + + \note \a buf must have been created with QRhiBuffer::StorageBuffer. + */ +QRhiShaderResourceBinding QRhiShaderResourceBinding::bufferLoadStore( + int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size) +{ + Q_ASSERT(size > 0); + QRhiShaderResourceBinding b = bufferLoadStore(binding, stage, buf); + QRhiShaderResourceBindingPrivate *d = QRhiShaderResourceBindingPrivate::get(&b); + d->u.sbuf.offset = offset; + d->u.sbuf.maybeSize = size; + return b; +} + +/*! \return \c true if the contents of the two QRhiShaderResourceBinding objects \a a and \a b are equal. This includes the resources (buffer, texture) and related parameters (offset, size) as well. To only compare @@ -2639,6 +2847,29 @@ bool operator==(const QRhiShaderResourceBinding &a, const QRhiShaderResourceBind return false; } break; + case QRhiShaderResourceBinding::ImageLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageLoadStore: + if (a.d->u.simage.tex != b.d->u.simage.tex + || a.d->u.simage.level != b.d->u.simage.level) + { + return false; + } + break; + case QRhiShaderResourceBinding::BufferLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferLoadStore: + if (a.d->u.sbuf.buf != b.d->u.sbuf.buf + || a.d->u.sbuf.offset != b.d->u.sbuf.offset + || a.d->u.sbuf.maybeSize != b.d->u.sbuf.maybeSize) + { + return false; + } + break; default: Q_UNREACHABLE(); return false; @@ -2693,6 +2924,45 @@ QDebug operator<<(QDebug dbg, const QRhiShaderResourceBinding &b) << " sampler=" << d->u.stex.sampler << ')'; break; + case QRhiShaderResourceBinding::ImageLoad: + dbg.nospace() << " ImageLoad(" + << "texture=" << d->u.simage.tex + << " level=" << d->u.simage.level + << ')'; + break; + case QRhiShaderResourceBinding::ImageStore: + dbg.nospace() << " ImageStore(" + << "texture=" << d->u.simage.tex + << " level=" << d->u.simage.level + << ')'; + break; + case QRhiShaderResourceBinding::ImageLoadStore: + dbg.nospace() << " ImageLoadStore(" + << "texture=" << d->u.simage.tex + << " level=" << d->u.simage.level + << ')'; + break; + case QRhiShaderResourceBinding::BufferLoad: + dbg.nospace() << " BufferLoad(" + << "buffer=" << d->u.sbuf.buf + << " offset=" << d->u.sbuf.offset + << " maybeSize=" << d->u.sbuf.maybeSize + << ')'; + break; + case QRhiShaderResourceBinding::BufferStore: + dbg.nospace() << " BufferStore(" + << "buffer=" << d->u.sbuf.buf + << " offset=" << d->u.sbuf.offset + << " maybeSize=" << d->u.sbuf.maybeSize + << ')'; + break; + case QRhiShaderResourceBinding::BufferLoadStore: + dbg.nospace() << " BufferLoadStore(" + << "buffer=" << d->u.sbuf.buf + << " offset=" << d->u.sbuf.offset + << " maybeSize=" << d->u.sbuf.maybeSize + << ')'; + break; default: Q_UNREACHABLE(); break; @@ -3196,6 +3466,34 @@ QRhiResource::Type QRhiSwapChain::resourceType() const */ /*! + \class QRhiComputePipeline + \inmodule QtRhi + \brief Compute pipeline state resource. + + \note Setting the shader resource bindings is mandatory. The referenced + QRhiShaderResourceBindings must already be built by the time build() is + called. + + \note Setting the shader is mandatory. + */ + +/*! + \return the resource type. + */ +QRhiResource::Type QRhiComputePipeline::resourceType() const +{ + return ComputePipeline; +} + +/*! + \internal + */ +QRhiComputePipeline::QRhiComputePipeline(QRhiImplementation *rhi) + : QRhiResource(rhi) +{ +} + +/*! \class QRhiCommandBuffer \inmodule QtRhi \brief Command buffer resource. @@ -3982,8 +4280,8 @@ void QRhiCommandBuffer::endPass(QRhiResourceUpdateBatch *resourceUpdates) therefore overoptimizing to avoid calls to this function is not necessary on the applications' side. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning + between a beginPass() and endPass() call. */ void QRhiCommandBuffer::setGraphicsPipeline(QRhiGraphicsPipeline *ps) { @@ -3994,14 +4292,13 @@ void QRhiCommandBuffer::setGraphicsPipeline(QRhiGraphicsPipeline *ps) Records binding a set of shader resources, such as, uniform buffers or textures, that are made visible to one or more shader stages. - \a srb can be null in which case the current graphics pipeline's associated - QRhiGraphicsPipeline::shaderResourceBindings() is used. When \a srb is - non-null, it must be + \a srb can be null in which case the current graphics or compute pipeline's + associated QRhiShaderResourceBindings is used. When \a srb is non-null, it + must be \l{QRhiShaderResourceBindings::isLayoutCompatible()}{layout-compatible}, meaning the layout (number of bindings, the type and binding number of each binding) must fully match the QRhiShaderResourceBindings that was - associated with the pipeline at the time of calling - QRhiGraphicsPipeline::build(). + associated with the pipeline at the time of calling the pipeline's build(). There are cases when a seemingly unnecessary setShaderResources() call is mandatory: when rebuilding a resource referenced from \a srb, for example @@ -4029,8 +4326,9 @@ void QRhiCommandBuffer::setGraphicsPipeline(QRhiGraphicsPipeline *ps) the conditions described above into account), so therefore overoptimizing to avoid calls to this function is not necessary on the applications' side. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render or compute pass, + meaning between a beginPass() and endPass(), or beginComputePass() and + endComputePass(). */ void QRhiCommandBuffer::setShaderResources(QRhiShaderResourceBindings *srb, int dynamicOffsetCount, @@ -4056,8 +4354,8 @@ void QRhiCommandBuffer::setShaderResources(QRhiShaderResourceBindings *srb, automatically with most backends and therefore applications do not need to overoptimize to avoid calls to this function. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning + between a beginPass() and endPass() call. As a simple example, take a vertex shader with two inputs: @@ -4110,8 +4408,8 @@ void QRhiCommandBuffer::setVertexInput(int startBinding, int bindingCount, const \note QRhi assumes OpenGL-style viewport coordinates, meaning x and y are bottom-left. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning + between a beginPass() and endPass() call. */ void QRhiCommandBuffer::setViewport(const QRhiViewport &viewport) { @@ -4129,8 +4427,8 @@ void QRhiCommandBuffer::setViewport(const QRhiViewport &viewport) \note QRhi assumes OpenGL-style viewport coordinates, meaning x and y are bottom-left. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning + between a beginPass() and endPass() call. */ void QRhiCommandBuffer::setScissor(const QRhiScissor &scissor) { @@ -4143,8 +4441,8 @@ void QRhiCommandBuffer::setScissor(const QRhiScissor &scissor) This can only be called when the bound pipeline has QRhiGraphicsPipeline::UsesBlendConstants set. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning + between a beginPass() and endPass() call. */ void QRhiCommandBuffer::setBlendConstants(const QColor &c) { @@ -4157,8 +4455,8 @@ void QRhiCommandBuffer::setBlendConstants(const QColor &c) This can only be called when the bound pipeline has QRhiGraphicsPipeline::UsesStencilRef set. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning between + a beginPass() and endPass() call. */ void QRhiCommandBuffer::setStencilRef(quint32 refValue) { @@ -4173,8 +4471,8 @@ void QRhiCommandBuffer::setStencilRef(quint32 refValue) the index of the first vertex to draw. \a firstInstance is the instance ID of the first instance to draw. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning + between a beginPass() and endPass() call. */ void QRhiCommandBuffer::draw(quint32 vertexCount, quint32 instanceCount, quint32 firstVertex, quint32 firstInstance) @@ -4200,8 +4498,8 @@ void QRhiCommandBuffer::draw(quint32 vertexCount, \a vertexOffset is added to the vertex index. - \note This function can only be called inside a pass, meaning between a - beginPass() end endPass() call. + \note This function can only be called inside a render pass, meaning + between a beginPass() and endPass() call. */ void QRhiCommandBuffer::drawIndexed(quint32 indexCount, quint32 instanceCount, quint32 firstIndex, @@ -4255,6 +4553,69 @@ void QRhiCommandBuffer::debugMarkMsg(const QByteArray &msg) } /*! + Records starting a new compute pass. + + \a resourceUpdates, when not null, specifies a resource update batch that + is to be committed and then released. + + \note Do not assume that any state or resource bindings persist between + passes. + + \note A compute pass can record setComputePipeline(), setShaderResources(), + and dispatch() calls, not graphics ones. General functionality, such as, + debug markers and beginExternal() is available both in render and compute + passes. + + \note Compute is only available when the \l{QRhi::Compute}{Compute} feature + is reported as supported. + */ +void QRhiCommandBuffer::beginComputePass(QRhiResourceUpdateBatch *resourceUpdates) +{ + m_rhi->beginComputePass(this, resourceUpdates); +} + +/*! + Records ending the current compute pass. + + \a resourceUpdates, when not null, specifies a resource update batch that + is to be committed and then released. + */ +void QRhiCommandBuffer::endComputePass(QRhiResourceUpdateBatch *resourceUpdates) +{ + m_rhi->endComputePass(this, resourceUpdates); +} + +/*! + Records setting a new compute pipeline \a ps. + + \note This function must be called before recording setShaderResources() or + dispatch() commands on the command buffer. + + \note QRhi will optimize out unnecessary invocations within a pass, so + therefore overoptimizing to avoid calls to this function is not necessary + on the applications' side. + + \note This function can only be called inside a compute pass, meaning + between a beginComputePass() and endComputePass() call. + */ +void QRhiCommandBuffer::setComputePipeline(QRhiComputePipeline *ps) +{ + m_rhi->setComputePipeline(this, ps); +} + +/*! + Records dispatching compute work items, with \a x, \a y, and \a z + specifying the number of local workgroups in the corresponding dimension. + + \note This function can only be called inside a compute pass, meaning + between a beginComputePass() and endComputePass() call. + */ +void QRhiCommandBuffer::dispatch(int x, int y, int z) +{ + m_rhi->dispatch(this, x, y, z); +} + +/*! \return a pointer to a backend-specific QRhiNativeHandles subclass, such as QRhiVulkanCommandBufferNativeHandles. The returned value is null when exposing the underlying native resources is not supported by, or not @@ -4480,6 +4841,19 @@ QRhiGraphicsPipeline *QRhi::newGraphicsPipeline() } /*! + \return a new compute pipeline resource. + + \note Compute is only available when the \l{QRhi::Compute}{Compute} feature + is reported as supported. + + \sa QRhiResource::release() + */ +QRhiComputePipeline *QRhi::newComputePipeline() +{ + return d->createComputePipeline(); +} + +/*! \return a new shader resource binding collection resource. \sa QRhiResource::release() @@ -4493,7 +4867,8 @@ QRhiShaderResourceBindings *QRhi::newShaderResourceBindings() \return a new buffer with the specified \a type, \a usage, and \a size. \note Some \a usage and \a type combinations may not be supported by all - backends. See \l{QRhi::NonDynamicUniformBuffers}{the feature flags}. + backends. See \l{QRhiBuffer::UsageFlag}{UsageFlags} and + \l{QRhi::NonDynamicUniformBuffers}{the feature flags}. \sa QRhiResource::release() */ @@ -4840,32 +5215,30 @@ static inline QRhiPassResourceTracker::BufferStage earlierStage(QRhiPassResource return QRhiPassResourceTracker::BufferStage(qMin(int(a), int(b))); } -void QRhiPassResourceTracker::registerBufferOnce(QRhiBuffer *buf, int slot, BufferAccess access, BufferStage stage, - const UsageState &stateAtPassBegin) +void QRhiPassResourceTracker::registerBuffer(QRhiBuffer *buf, int slot, BufferAccess *access, BufferStage *stage, + const UsageState &state) { auto it = std::find_if(m_buffers.begin(), m_buffers.end(), [buf](const Buffer &b) { return b.buf == buf; }); if (it != m_buffers.end()) { - if (it->access != access) { + if (it->access != *access) { const QByteArray name = buf->name(); qWarning("Buffer %p (%s) used with different accesses within the same pass, this is not allowed.", buf, name.constData()); return; } - if (it->stage != stage) - it->stage = earlierStage(it->stage, stage); - // Multiple registrations of the same buffer is fine as long is it is - // a compatible usage. stateAtPassBegin is not actually the state at - // pass begin in the second, third, etc. invocation but that's fine - // since we'll just return here. + if (it->stage != *stage) { + it->stage = earlierStage(it->stage, *stage); + *stage = it->stage; + } return; } Buffer b; b.buf = buf; b.slot = slot; - b.access = access; - b.stage = stage; - b.stateAtPassBegin = stateAtPassBegin; + b.access = *access; + b.stage = *stage; + b.stateAtPassBegin = state; // first use -> initial state m_buffers.append(b); } @@ -4875,30 +5248,44 @@ static inline QRhiPassResourceTracker::TextureStage earlierStage(QRhiPassResourc return QRhiPassResourceTracker::TextureStage(qMin(int(a), int(b))); } -void QRhiPassResourceTracker::registerTextureOnce(QRhiTexture *tex, TextureAccess access, TextureStage stage, - const UsageState &stateAtPassBegin) +static inline bool isImageLoadStore(QRhiPassResourceTracker::TextureAccess access) +{ + return access == QRhiPassResourceTracker::TexStorageLoad + || access == QRhiPassResourceTracker::TexStorageStore + || access == QRhiPassResourceTracker::TexStorageLoadStore; +} + +void QRhiPassResourceTracker::registerTexture(QRhiTexture *tex, TextureAccess *access, TextureStage *stage, + const UsageState &state) { auto it = std::find_if(m_textures.begin(), m_textures.end(), [tex](const Texture &t) { return t.tex == tex; }); if (it != m_textures.end()) { - if (it->access != access) { - const QByteArray name = tex->name(); - qWarning("Texture %p (%s) used with different accesses within the same pass, this is not allowed.", - tex, name.constData()); + if (it->access != *access) { + // Different subresources of a texture may be used for both load + // and store in the same pass. (think reading from one mip level + // and writing to another one in a compute shader) This we can + // handle by treating the entire resource as read-write. + if (isImageLoadStore(it->access) && isImageLoadStore(*access)) { + it->access = QRhiPassResourceTracker::TexStorageLoadStore; + *access = it->access; + } else { + const QByteArray name = tex->name(); + qWarning("Texture %p (%s) used with different accesses within the same pass, this is not allowed.", + tex, name.constData()); + } + } + if (it->stage != *stage) { + it->stage = earlierStage(it->stage, *stage); + *stage = it->stage; } - if (it->stage != stage) - it->stage = earlierStage(it->stage, stage); - // Multiple registrations of the same texture is fine as long is it is - // a compatible usage. stateAtPassBegin is not actually the state at - // pass begin in the second, third, etc. invocation but that's fine - // since we'll just return here. return; } Texture t; t.tex = tex; - t.access = access; - t.stage = stage; - t.stateAtPassBegin = stateAtPassBegin; + t.access = *access; + t.stage = *stage; + t.stateAtPassBegin = state; // first use -> initial state m_textures.append(t); } diff --git a/src/gui/rhi/qrhi_p.h b/src/gui/rhi/qrhi_p.h index b7515cb17a..0d296d370c 100644 --- a/src/gui/rhi/qrhi_p.h +++ b/src/gui/rhi/qrhi_p.h @@ -259,17 +259,18 @@ Q_GUI_EXPORT uint qHash(const QRhiVertexInputLayout &v, uint seed = 0) Q_DECL_NO Q_GUI_EXPORT QDebug operator<<(QDebug, const QRhiVertexInputLayout &); #endif -class Q_GUI_EXPORT QRhiGraphicsShaderStage +class Q_GUI_EXPORT QRhiShaderStage { public: enum Type { Vertex, - Fragment + Fragment, + Compute }; - QRhiGraphicsShaderStage() = default; - QRhiGraphicsShaderStage(Type type, const QShader &shader, - QShader::Variant v = QShader::StandardShader); + QRhiShaderStage() = default; + QRhiShaderStage(Type type, const QShader &shader, + QShader::Variant v = QShader::StandardShader); Type type() const { return m_type; } void setType(Type t) { m_type = t; } @@ -286,26 +287,35 @@ private: QShader::Variant m_shaderVariant = QShader::StandardShader; }; -Q_DECLARE_TYPEINFO(QRhiGraphicsShaderStage, Q_MOVABLE_TYPE); +Q_DECLARE_TYPEINFO(QRhiShaderStage, Q_MOVABLE_TYPE); -Q_GUI_EXPORT bool operator==(const QRhiGraphicsShaderStage &a, const QRhiGraphicsShaderStage &b) Q_DECL_NOTHROW; -Q_GUI_EXPORT bool operator!=(const QRhiGraphicsShaderStage &a, const QRhiGraphicsShaderStage &b) Q_DECL_NOTHROW; -Q_GUI_EXPORT uint qHash(const QRhiGraphicsShaderStage &s, uint seed = 0) Q_DECL_NOTHROW; +Q_GUI_EXPORT bool operator==(const QRhiShaderStage &a, const QRhiShaderStage &b) Q_DECL_NOTHROW; +Q_GUI_EXPORT bool operator!=(const QRhiShaderStage &a, const QRhiShaderStage &b) Q_DECL_NOTHROW; +Q_GUI_EXPORT uint qHash(const QRhiShaderStage &s, uint seed = 0) Q_DECL_NOTHROW; #ifndef QT_NO_DEBUG_STREAM -Q_GUI_EXPORT QDebug operator<<(QDebug, const QRhiGraphicsShaderStage &); +Q_GUI_EXPORT QDebug operator<<(QDebug, const QRhiShaderStage &); #endif +using QRhiGraphicsShaderStage = QRhiShaderStage; + class Q_GUI_EXPORT QRhiShaderResourceBinding { public: enum Type { UniformBuffer, - SampledTexture + SampledTexture, + ImageLoad, + ImageStore, + ImageLoadStore, + BufferLoad, + BufferStore, + BufferLoadStore }; enum StageFlag { VertexStage = 1 << 0, - FragmentStage = 1 << 1 + FragmentStage = 1 << 1, + ComputeStage = 1 << 2 }; Q_DECLARE_FLAGS(StageFlags, StageFlag) @@ -320,8 +330,20 @@ public: static QRhiShaderResourceBinding uniformBuffer(int binding, StageFlags stage, QRhiBuffer *buf); static QRhiShaderResourceBinding uniformBuffer(int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size); static QRhiShaderResourceBinding uniformBufferWithDynamicOffset(int binding, StageFlags stage, QRhiBuffer *buf, int size); + static QRhiShaderResourceBinding sampledTexture(int binding, StageFlags stage, QRhiTexture *tex, QRhiSampler *sampler); + static QRhiShaderResourceBinding imageLoad(int binding, StageFlags stage, QRhiTexture *tex, int level); + static QRhiShaderResourceBinding imageStore(int binding, StageFlags stage, QRhiTexture *tex, int level); + static QRhiShaderResourceBinding imageLoadStore(int binding, StageFlags stage, QRhiTexture *tex, int level); + + static QRhiShaderResourceBinding bufferLoad(int binding, StageFlags stage, QRhiBuffer *buf); + static QRhiShaderResourceBinding bufferLoad(int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size); + static QRhiShaderResourceBinding bufferStore(int binding, StageFlags stage, QRhiBuffer *buf); + static QRhiShaderResourceBinding bufferStore(int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size); + static QRhiShaderResourceBinding bufferLoadStore(int binding, StageFlags stage, QRhiBuffer *buf); + static QRhiShaderResourceBinding bufferLoadStore(int binding, StageFlags stage, QRhiBuffer *buf, int offset, int size); + private: QRhiShaderResourceBindingPrivate *d; friend class QRhiShaderResourceBindingPrivate; @@ -558,6 +580,7 @@ public: ShaderResourceBindings, GraphicsPipeline, SwapChain, + ComputePipeline, CommandBuffer }; @@ -594,7 +617,8 @@ public: enum UsageFlag { VertexBuffer = 1 << 0, IndexBuffer = 1 << 1, - UniformBuffer = 1 << 2 + UniformBuffer = 1 << 2, + StorageBuffer = 1 << 3 }; Q_DECLARE_FLAGS(UsageFlags, UsageFlag) @@ -629,7 +653,8 @@ public: MipMapped = 1 << 3, sRGB = 1 << 4, UsedAsTransferSource = 1 << 5, - UsedWithGenerateMips = 1 << 6 + UsedWithGenerateMips = 1 << 6, + UsedWithLoadStore = 1 << 7 }; Q_DECLARE_FLAGS(Flags, Flag) @@ -1043,8 +1068,8 @@ public: int sampleCount() const { return m_sampleCount; } void setSampleCount(int s) { m_sampleCount = s; } - QVector<QRhiGraphicsShaderStage> shaderStages() const { return m_shaderStages; } - void setShaderStages(const QVector<QRhiGraphicsShaderStage> &stages) { m_shaderStages = stages; } + QVector<QRhiShaderStage> shaderStages() const { return m_shaderStages; } + void setShaderStages(const QVector<QRhiShaderStage> &stages) { m_shaderStages = stages; } QRhiVertexInputLayout vertexInputLayout() const { return m_vertexInputLayout; } void setVertexInputLayout(const QRhiVertexInputLayout &layout) { m_vertexInputLayout = layout; } @@ -1073,7 +1098,7 @@ protected: quint32 m_stencilReadMask = 0xFF; quint32 m_stencilWriteMask = 0xFF; int m_sampleCount = 1; - QVector<QRhiGraphicsShaderStage> m_shaderStages; + QVector<QRhiShaderStage> m_shaderStages; QRhiVertexInputLayout m_vertexInputLayout; QRhiShaderResourceBindings *m_shaderResourceBindings = nullptr; QRhiRenderPassDescriptor *m_renderPassDesc = nullptr; @@ -1133,6 +1158,24 @@ protected: Q_DECLARE_OPERATORS_FOR_FLAGS(QRhiSwapChain::Flags) +class Q_GUI_EXPORT QRhiComputePipeline : public QRhiResource +{ +public: + QRhiResource::Type resourceType() const override; + virtual bool build() = 0; + + QRhiShaderStage shaderStage() const { return m_shaderStage; } + void setShaderStage(const QRhiShaderStage &stage) { m_shaderStage = stage; } + + QRhiShaderResourceBindings *shaderResourceBindings() const { return m_shaderResourceBindings; } + void setShaderResourceBindings(QRhiShaderResourceBindings *srb) { m_shaderResourceBindings = srb; } + +protected: + QRhiComputePipeline(QRhiImplementation *rhi); + QRhiShaderStage m_shaderStage; + QRhiShaderResourceBindings *m_shaderResourceBindings = nullptr; +}; + class Q_GUI_EXPORT QRhiCommandBuffer : public QRhiResource { public: @@ -1181,6 +1224,11 @@ public: void debugMarkEnd(); void debugMarkMsg(const QByteArray &msg); + void beginComputePass(QRhiResourceUpdateBatch *resourceUpdates = nullptr); + void endComputePass(QRhiResourceUpdateBatch *resourceUpdates = nullptr); + void setComputePipeline(QRhiComputePipeline *ps); + void dispatch(int x, int y, int z); + const QRhiNativeHandles *nativeHandles(); void beginExternal(); void endExternal(); @@ -1263,7 +1311,8 @@ public: NonFourAlignedEffectiveIndexBufferOffset, NPOTTextureRepeat, RedOrAlpha8IsRed, - ElementIndexUint + ElementIndexUint, + Compute }; enum BeginFrameFlag { @@ -1297,6 +1346,7 @@ public: void runCleanup(); QRhiGraphicsPipeline *newGraphicsPipeline(); + QRhiComputePipeline *newComputePipeline(); QRhiShaderResourceBindings *newShaderResourceBindings(); QRhiBuffer *newBuffer(QRhiBuffer::Type type, diff --git a/src/gui/rhi/qrhi_p_p.h b/src/gui/rhi/qrhi_p_p.h index de9bdae992..4fd01d3ef2 100644 --- a/src/gui/rhi/qrhi_p_p.h +++ b/src/gui/rhi/qrhi_p_p.h @@ -70,6 +70,7 @@ public: virtual void destroy() = 0; virtual QRhiGraphicsPipeline *createGraphicsPipeline() = 0; + virtual QRhiComputePipeline *createComputePipeline() = 0; virtual QRhiShaderResourceBindings *createShaderResourceBindings() = 0; virtual QRhiBuffer *createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, @@ -133,6 +134,11 @@ public: virtual void debugMarkEnd(QRhiCommandBuffer *cb) = 0; virtual void debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) = 0; + virtual void beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) = 0; + virtual void endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) = 0; + virtual void setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) = 0; + virtual void dispatch(QRhiCommandBuffer *cb, int x, int y, int z) = 0; + virtual const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) = 0; virtual void beginExternal(QRhiCommandBuffer *cb) = 0; virtual void endExternal(QRhiCommandBuffer *cb) = 0; @@ -200,6 +206,7 @@ public: protected: bool debugMarkers = false; int currentFrameSlot = 0; // for vk, mtl, and similar. unused by gl and d3d11. + bool inFrame = false; private: QRhi::Implementation implType; @@ -210,7 +217,6 @@ private: QSet<QRhiResource *> resources; QSet<QRhiResource *> pendingReleaseAndDestroyResources; QVector<QRhi::CleanupCallback> cleanupCallbacks; - bool inFrame = false; friend class QRhi; friend class QRhiResourceUpdateBatchPrivate; @@ -393,9 +399,20 @@ public: QRhiTexture *tex; QRhiSampler *sampler; }; + struct StorageImageData { + QRhiTexture *tex; + int level; + }; + struct StorageBufferData { + QRhiBuffer *buf; + int offset; + int maybeSize; + }; union { UniformBufferData ubuf; SampledTextureData stex; + StorageImageData simage; + StorageBufferData sbuf; } u; }; @@ -487,33 +504,41 @@ public: enum BufferStage { BufVertexInputStage, BufVertexStage, - BufFragmentStage + BufFragmentStage, + BufComputeStage }; enum BufferAccess { BufVertexInput, BufIndexRead, - BufUniformRead + BufUniformRead, + BufStorageLoad, + BufStorageStore, + BufStorageLoadStore }; - void registerBufferOnce(QRhiBuffer *buf, int slot, BufferAccess access, BufferStage stage, - const UsageState &stateAtPassBegin); + void registerBuffer(QRhiBuffer *buf, int slot, BufferAccess *access, BufferStage *stage, + const UsageState &state); enum TextureStage { TexVertexStage, TexFragmentStage, TexColorOutputStage, - TexDepthOutputStage + TexDepthOutputStage, + TexComputeStage }; enum TextureAccess { TexSample, TexColorOutput, - TexDepthOutput + TexDepthOutput, + TexStorageLoad, + TexStorageStore, + TexStorageLoadStore }; - void registerTextureOnce(QRhiTexture *tex, TextureAccess access, TextureStage stage, - const UsageState &stateAtPassBegin); + void registerTexture(QRhiTexture *tex, TextureAccess *access, TextureStage *stage, + const UsageState &state); struct Buffer { QRhiBuffer *buf; diff --git a/src/gui/rhi/qrhid3d11.cpp b/src/gui/rhi/qrhid3d11.cpp index 6e5b0f751f..ce1617045b 100644 --- a/src/gui/rhi/qrhid3d11.cpp +++ b/src/gui/rhi/qrhid3d11.cpp @@ -375,6 +375,8 @@ bool QRhiD3D11::isFeatureSupported(QRhi::Feature feature) const return true; case QRhi::ElementIndexUint: return true; + case QRhi::Compute: + return false; default: Q_UNREACHABLE(); return false; @@ -443,6 +445,11 @@ QRhiGraphicsPipeline *QRhiD3D11::createGraphicsPipeline() return new QD3D11GraphicsPipeline(this); } +QRhiComputePipeline *QRhiD3D11::createComputePipeline() +{ + return new QD3D11ComputePipeline(this); +} + QRhiShaderResourceBindings *QRhiD3D11::createShaderResourceBindings() { return new QD3D11ShaderResourceBindings(this); @@ -450,9 +457,8 @@ QRhiShaderResourceBindings *QRhiD3D11::createShaderResourceBindings() void QRhiD3D11::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps) { - Q_ASSERT(inPass); - QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); QD3D11GraphicsPipeline *psD = QRHI_RES(QD3D11GraphicsPipeline, ps); const bool pipelineChanged = cbD->currentPipeline != ps || cbD->currentPipelineGeneration != psD->generation; @@ -471,9 +477,8 @@ void QRhiD3D11::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind int dynamicOffsetCount, const QRhiCommandBuffer::DynamicOffset *dynamicOffsets) { - Q_ASSERT(inPass); - QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); Q_ASSERT(cbD->currentPipeline); if (!srb) @@ -568,8 +573,8 @@ void QRhiD3D11::setVertexInput(QRhiCommandBuffer *cb, int startBinding, int bindingCount, const QRhiCommandBuffer::VertexInput *bindings, QRhiBuffer *indexBuf, quint32 indexOffset, QRhiCommandBuffer::IndexFormat indexFormat) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); bool needsBindVBuf = false; for (int i = 0; i < bindingCount; ++i) { @@ -632,8 +637,8 @@ void QRhiD3D11::setVertexInput(QRhiCommandBuffer *cb, void QRhiD3D11::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); Q_ASSERT(cbD->currentTarget); const QSize outputSize = cbD->currentTarget->pixelSize(); @@ -656,8 +661,8 @@ void QRhiD3D11::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) void QRhiD3D11::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); Q_ASSERT(cbD->currentTarget); const QSize outputSize = cbD->currentTarget->pixelSize(); @@ -678,8 +683,9 @@ void QRhiD3D11::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) void QRhiD3D11::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); + QD3D11CommandBuffer::Command cmd; cmd.cmd = QD3D11CommandBuffer::Command::BlendConstants; cmd.args.blendConstants.ps = QRHI_RES(QD3D11GraphicsPipeline, cbD->currentPipeline); @@ -692,8 +698,9 @@ void QRhiD3D11::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) void QRhiD3D11::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); + QD3D11CommandBuffer::Command cmd; cmd.cmd = QD3D11CommandBuffer::Command::StencilRef; cmd.args.stencilRef.ps = QRHI_RES(QD3D11GraphicsPipeline, cbD->currentPipeline); @@ -704,8 +711,9 @@ void QRhiD3D11::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) void QRhiD3D11::draw(QRhiCommandBuffer *cb, quint32 vertexCount, quint32 instanceCount, quint32 firstVertex, quint32 firstInstance) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); + QD3D11CommandBuffer::Command cmd; cmd.cmd = QD3D11CommandBuffer::Command::Draw; cmd.args.draw.ps = QRHI_RES(QD3D11GraphicsPipeline, cbD->currentPipeline); @@ -719,8 +727,9 @@ void QRhiD3D11::draw(QRhiCommandBuffer *cb, quint32 vertexCount, void QRhiD3D11::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, quint32 instanceCount, quint32 firstIndex, qint32 vertexOffset, quint32 firstInstance) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); + QD3D11CommandBuffer::Command cmd; cmd.cmd = QD3D11CommandBuffer::Command::DrawIndexed; cmd.args.drawIndexed.ps = QRHI_RES(QD3D11GraphicsPipeline, cbD->currentPipeline); @@ -777,28 +786,23 @@ const QRhiNativeHandles *QRhiD3D11::nativeHandles(QRhiCommandBuffer *cb) void QRhiD3D11::beginExternal(QRhiCommandBuffer *cb) { - Q_ASSERT(inPass); Q_UNUSED(cb); flushCommandBuffer(); } void QRhiD3D11::endExternal(QRhiCommandBuffer *cb) { - Q_ASSERT(inPass); QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); - Q_ASSERT(cbD->currentTarget); Q_ASSERT(cbD->commands.isEmpty()); cbD->resetCachedState(); - enqueueSetRenderTarget(cbD, cbD->currentTarget); + if (cbD->currentTarget) // could be compute, no rendertarget then + enqueueSetRenderTarget(cbD, cbD->currentTarget); } QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); - Q_ASSERT(!inFrame); - inFrame = true; - QD3D11SwapChain *swapChainD = QRHI_RES(QD3D11SwapChain, swapChain); contextState.currentSwapChain = swapChainD; const int currentFrameSlot = swapChainD->currentFrameSlot; @@ -844,9 +848,6 @@ QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF QRhi::FrameOpResult QRhiD3D11::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrameFlags flags) { - Q_ASSERT(inFrame); - inFrame = false; - QD3D11SwapChain *swapChainD = QRHI_RES(QD3D11SwapChain, swapChain); Q_ASSERT(contextState.currentSwapChain = swapChainD); const int currentFrameSlot = swapChainD->currentFrameSlot; @@ -899,8 +900,6 @@ QRhi::FrameOpResult QRhiD3D11::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame QRhi::FrameOpResult QRhiD3D11::beginOffscreenFrame(QRhiCommandBuffer **cb) { - Q_ASSERT(!inFrame); - inFrame = true; ofr.active = true; ofr.cbWrapper.resetState(); @@ -911,8 +910,6 @@ QRhi::FrameOpResult QRhiD3D11::beginOffscreenFrame(QRhiCommandBuffer **cb) QRhi::FrameOpResult QRhiD3D11::endOffscreenFrame() { - Q_ASSERT(inFrame && ofr.active); - inFrame = false; ofr.active = false; executeCommandBuffer(&ofr.cbWrapper); @@ -1047,8 +1044,6 @@ static inline bool isDepthTextureFormat(QRhiTexture::Format format) QRhi::FrameOpResult QRhiD3D11::finish() { - Q_ASSERT(!inPass); - if (inFrame) flushCommandBuffer(); @@ -1379,7 +1374,7 @@ static inline QD3D11RenderTargetData *rtData(QRhiRenderTarget *rt) void QRhiD3D11::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); + Q_ASSERT(QRHI_RES(QD3D11CommandBuffer, cb)->recordingPass == QD3D11CommandBuffer::NoPass); enqueueResourceUpdates(cb, resourceUpdates); } @@ -1398,12 +1393,12 @@ void QRhiD3D11::beginPass(QRhiCommandBuffer *cb, const QRhiDepthStencilClearValue &depthStencilClearValue, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); + QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::NoPass); if (resourceUpdates) enqueueResourceUpdates(cb, resourceUpdates); - QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); bool wantsColorClear = true; bool wantsDsClear = true; QD3D11RenderTargetData *rtD = rtData(rt); @@ -1431,17 +1426,15 @@ void QRhiD3D11::beginPass(QRhiCommandBuffer *cb, clearCmd.args.clear.s = depthStencilClearValue.stencilClearValue(); cbD->commands.append(clearCmd); + cbD->recordingPass = QD3D11CommandBuffer::RenderPass; cbD->currentTarget = rt; - - inPass = true; } void QRhiD3D11::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inPass); - inPass = false; - QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::RenderPass); + if (cbD->currentTarget->resourceType() == QRhiResource::TextureRenderTarget) { QD3D11TextureRenderTarget *rtTex = QRHI_RES(QD3D11TextureRenderTarget, cbD->currentTarget); const QVector<QRhiColorAttachment> colorAttachments = rtTex->m_desc.colorAttachments(); @@ -1491,12 +1484,49 @@ void QRhiD3D11::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resource } } + cbD->recordingPass = QD3D11CommandBuffer::NoPass; cbD->currentTarget = nullptr; if (resourceUpdates) enqueueResourceUpdates(cb, resourceUpdates); } +void QRhiD3D11::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::NoPass); + + if (resourceUpdates) + enqueueResourceUpdates(cb, resourceUpdates); + + cbD->recordingPass = QD3D11CommandBuffer::ComputePass; +} + +void QRhiD3D11::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::ComputePass); + + cbD->recordingPass = QD3D11CommandBuffer::NoPass; + + if (resourceUpdates) + enqueueResourceUpdates(cb, resourceUpdates); +} + +void QRhiD3D11::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) +{ + Q_UNUSED(cb); + Q_UNUSED(ps); +} + +void QRhiD3D11::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) +{ + Q_UNUSED(cb); + Q_UNUSED(x); + Q_UNUSED(y); + Q_UNUSED(z); +} + void QRhiD3D11::updateShaderResourceBindings(QD3D11ShaderResourceBindings *srbD) { srbD->vsubufs.clear(); @@ -1709,6 +1739,8 @@ void QRhiD3D11::setRenderTarget(QRhiRenderTarget *rt) void QRhiD3D11::executeCommandBuffer(QD3D11CommandBuffer *cbD, QD3D11SwapChain *timestampSwapChain) { + Q_ASSERT(cbD->recordingPass == QD3D11CommandBuffer::NoPass); + quint32 stencilRef = 0; float blendConstants[] = { 1, 1, 1, 1 }; @@ -1911,6 +1943,11 @@ bool QD3D11Buffer::build() if (buffer) release(); + if (m_usage.testFlag(QRhiBuffer::UniformBuffer) && m_type != Dynamic) { + qWarning("UniformBuffer must always be combined with Dynamic on D3D11"); + return false; + } + const int nonZeroSize = m_size <= 0 ? 256 : m_size; const int roundedSize = m_usage.testFlag(QRhiBuffer::UniformBuffer) ? aligned(nonZeroSize, 256) : nonZeroSize; @@ -3008,7 +3045,7 @@ bool QD3D11GraphicsPipeline::build() } QByteArray vsByteCode; - for (const QRhiGraphicsShaderStage &shaderStage : qAsConst(m_shaderStages)) { + for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) { QString error; QByteArray bytecode = compileHlslShaderSource(shaderStage.shader(), shaderStage.shaderVariant(), &error); if (bytecode.isEmpty()) { @@ -3016,7 +3053,7 @@ bool QD3D11GraphicsPipeline::build() return false; } switch (shaderStage.type()) { - case QRhiGraphicsShaderStage::Vertex: + case QRhiShaderStage::Vertex: hr = rhiD->dev->CreateVertexShader(bytecode.constData(), bytecode.size(), nullptr, &vs); if (FAILED(hr)) { qWarning("Failed to create vertex shader: %s", qPrintable(comErrorMessage(hr))); @@ -3024,7 +3061,7 @@ bool QD3D11GraphicsPipeline::build() } vsByteCode = bytecode; break; - case QRhiGraphicsShaderStage::Fragment: + case QRhiShaderStage::Fragment: hr = rhiD->dev->CreatePixelShader(bytecode.constData(), bytecode.size(), nullptr, &fs); if (FAILED(hr)) { qWarning("Failed to create pixel shader: %s", qPrintable(comErrorMessage(hr))); @@ -3072,6 +3109,25 @@ bool QD3D11GraphicsPipeline::build() return true; } +QD3D11ComputePipeline::QD3D11ComputePipeline(QRhiImplementation *rhi) + : QRhiComputePipeline(rhi) +{ +} + +QD3D11ComputePipeline::~QD3D11ComputePipeline() +{ + release(); +} + +void QD3D11ComputePipeline::release() +{ +} + +bool QD3D11ComputePipeline::build() +{ + return false; +} + QD3D11CommandBuffer::QD3D11CommandBuffer(QRhiImplementation *rhi) : QRhiCommandBuffer(rhi) { diff --git a/src/gui/rhi/qrhid3d11_p_p.h b/src/gui/rhi/qrhid3d11_p_p.h index 3942fa5076..775f256cb7 100644 --- a/src/gui/rhi/qrhid3d11_p_p.h +++ b/src/gui/rhi/qrhid3d11_p_p.h @@ -254,6 +254,14 @@ struct QD3D11GraphicsPipeline : public QRhiGraphicsPipeline friend class QRhiD3D11; }; +struct QD3D11ComputePipeline : public QRhiComputePipeline +{ + QD3D11ComputePipeline(QRhiImplementation *rhi); + ~QD3D11ComputePipeline(); + void release() override; + bool build() override; +}; + struct QD3D11SwapChain; struct QD3D11CommandBuffer : public QRhiCommandBuffer @@ -387,7 +395,14 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer } args; }; + enum PassType { + NoPass, + RenderPass, + ComputePass + }; + QVector<Command> commands; + PassType recordingPass; QRhiRenderTarget *currentTarget; QRhiGraphicsPipeline *currentPipeline; uint currentPipelineGeneration; @@ -418,6 +433,7 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer } void resetState() { resetCommands(); + recordingPass = NoPass; currentTarget = nullptr; resetCachedState(); } @@ -484,6 +500,7 @@ public: void destroy() override; QRhiGraphicsPipeline *createGraphicsPipeline() override; + QRhiComputePipeline *createComputePipeline() override; QRhiShaderResourceBindings *createShaderResourceBindings() override; QRhiBuffer *createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, @@ -548,6 +565,11 @@ public: void debugMarkEnd(QRhiCommandBuffer *cb) override; void debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) override; + void beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) override; + void dispatch(QRhiCommandBuffer *cb, int x, int y, int z) override; + const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; @@ -591,9 +613,6 @@ public: bool hasDxgi2 = false; QRhiD3D11NativeHandles nativeHandlesStruct; - bool inFrame = false; - bool inPass = false; - struct { int vsHighestActiveSrvBinding = -1; int fsHighestActiveSrvBinding = -1; diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp index a6a0bb257f..32a25dd615 100644 --- a/src/gui/rhi/qrhigles2.cpp +++ b/src/gui/rhi/qrhigles2.cpp @@ -616,6 +616,8 @@ bool QRhiGles2::isFeatureSupported(QRhi::Feature feature) const return caps.coreProfile; case QRhi::ElementIndexUint: return caps.elementIndexUint; + case QRhi::Compute: + return false; default: Q_UNREACHABLE(); return false; @@ -692,11 +694,15 @@ QRhiShaderResourceBindings *QRhiGles2::createShaderResourceBindings() return new QGles2ShaderResourceBindings(this); } -void QRhiGles2::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps) +QRhiComputePipeline *QRhiGles2::createComputePipeline() { - Q_ASSERT(inPass); + return new QGles2ComputePipeline(this); +} +void QRhiGles2::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps) +{ QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); QGles2GraphicsPipeline *psD = QRHI_RES(QGles2GraphicsPipeline, ps); const bool pipelineChanged = cbD->currentPipeline != ps || cbD->currentPipelineGeneration != psD->generation; @@ -715,9 +721,8 @@ void QRhiGles2::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind int dynamicOffsetCount, const QRhiCommandBuffer::DynamicOffset *dynamicOffsets) { - Q_ASSERT(inPass); - QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); Q_ASSERT(cbD->currentPipeline); if (!srb) @@ -770,8 +775,8 @@ void QRhiGles2::setVertexInput(QRhiCommandBuffer *cb, int startBinding, int bindingCount, const QRhiCommandBuffer::VertexInput *bindings, QRhiBuffer *indexBuf, quint32 indexOffset, QRhiCommandBuffer::IndexFormat indexFormat) { - Q_ASSERT(inPass); QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); for (int i = 0; i < bindingCount; ++i) { QRhiBuffer *buf = bindings[i].first; @@ -801,7 +806,9 @@ void QRhiGles2::setVertexInput(QRhiCommandBuffer *cb, void QRhiGles2::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) { - Q_ASSERT(inPass); + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); + QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::Viewport; const std::array<float, 4> r = viewport.viewport(); @@ -811,12 +818,14 @@ void QRhiGles2::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) cmd.args.viewport.h = r[3]; cmd.args.viewport.d0 = viewport.minDepth(); cmd.args.viewport.d1 = viewport.maxDepth(); - QRHI_RES(QGles2CommandBuffer, cb)->commands.append(cmd); + cbD->commands.append(cmd); } void QRhiGles2::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) { - Q_ASSERT(inPass); + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); + QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::Scissor; const std::array<int, 4> r = scissor.scissor(); @@ -824,25 +833,27 @@ void QRhiGles2::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) cmd.args.scissor.y = qMax(0, r[1]); cmd.args.scissor.w = r[2]; cmd.args.scissor.h = r[3]; - QRHI_RES(QGles2CommandBuffer, cb)->commands.append(cmd); + cbD->commands.append(cmd); } void QRhiGles2::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) { - Q_ASSERT(inPass); + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); + QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::BlendConstants; cmd.args.blendConstants.r = c.redF(); cmd.args.blendConstants.g = c.greenF(); cmd.args.blendConstants.b = c.blueF(); cmd.args.blendConstants.a = c.alphaF(); - QRHI_RES(QGles2CommandBuffer, cb)->commands.append(cmd); + cbD->commands.append(cmd); } void QRhiGles2::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) { - Q_ASSERT(inPass); QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::StencilRef; @@ -854,10 +865,10 @@ void QRhiGles2::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) void QRhiGles2::draw(QRhiCommandBuffer *cb, quint32 vertexCount, quint32 instanceCount, quint32 firstVertex, quint32 firstInstance) { - Q_ASSERT(inPass); Q_UNUSED(instanceCount); // no instancing Q_UNUSED(firstInstance); QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::Draw; @@ -870,11 +881,11 @@ void QRhiGles2::draw(QRhiCommandBuffer *cb, quint32 vertexCount, void QRhiGles2::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, quint32 instanceCount, quint32 firstIndex, qint32 vertexOffset, quint32 firstInstance) { - Q_ASSERT(inPass); Q_UNUSED(instanceCount); // no instancing Q_UNUSED(firstInstance); Q_UNUSED(vertexOffset); // no glDrawElementsBaseVertex QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::DrawIndexed; @@ -918,19 +929,17 @@ const QRhiNativeHandles *QRhiGles2::nativeHandles(QRhiCommandBuffer *cb) void QRhiGles2::beginExternal(QRhiCommandBuffer *cb) { - Q_ASSERT(inPass); Q_UNUSED(cb); flushCommandBuffer(); // also ensures the context is current } void QRhiGles2::endExternal(QRhiCommandBuffer *cb) { - Q_ASSERT(inPass); QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); - Q_ASSERT(cbD->currentTarget); Q_ASSERT(cbD->commands.isEmpty()); cbD->resetCachedState(); - enqueueBindFramebuffer(cbD->currentTarget, cbD); + if (cbD->currentTarget) + enqueueBindFramebuffer(cbD->currentTarget, cbD); } static void addBoundaryCommand(QGles2CommandBuffer *cb, QGles2CommandBuffer::Command::Cmd type) @@ -943,13 +952,11 @@ static void addBoundaryCommand(QGles2CommandBuffer *cb, QGles2CommandBuffer::Com QRhi::FrameOpResult QRhiGles2::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); - Q_ASSERT(!inFrame); QGles2SwapChain *swapChainD = QRHI_RES(QGles2SwapChain, swapChain); if (!ensureContext(swapChainD->surface)) return QRhi::FrameOpError; - inFrame = true; currentSwapChain = swapChainD; QRhiProfilerPrivate *rhiP = profilerPrivateOrNull(); @@ -965,9 +972,6 @@ QRhi::FrameOpResult QRhiGles2::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF QRhi::FrameOpResult QRhiGles2::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrameFlags flags) { - Q_ASSERT(inFrame); - inFrame = false; - QGles2SwapChain *swapChainD = QRHI_RES(QGles2SwapChain, swapChain); Q_ASSERT(currentSwapChain == swapChainD); @@ -996,12 +1000,9 @@ QRhi::FrameOpResult QRhiGles2::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame QRhi::FrameOpResult QRhiGles2::beginOffscreenFrame(QRhiCommandBuffer **cb) { - Q_ASSERT(!inFrame); - if (!ensureContext()) return QRhi::FrameOpError; - inFrame = true; ofr.active = true; executeDeferredReleases(); @@ -1015,8 +1016,7 @@ QRhi::FrameOpResult QRhiGles2::beginOffscreenFrame(QRhiCommandBuffer **cb) QRhi::FrameOpResult QRhiGles2::endOffscreenFrame() { - Q_ASSERT(inFrame && ofr.active); - inFrame = false; + Q_ASSERT(ofr.active); ofr.active = false; addBoundaryCommand(&ofr.cbWrapper, QGles2CommandBuffer::Command::EndFrame); @@ -1031,7 +1031,6 @@ QRhi::FrameOpResult QRhiGles2::endOffscreenFrame() QRhi::FrameOpResult QRhiGles2::finish() { - Q_ASSERT(!inPass); // because that's what the QRhi docs say, even though not required by this backend return inFrame ? flushCommandBuffer() : QRhi::FrameOpSuccess; } @@ -1501,6 +1500,8 @@ static inline GLenum toGlTextureCompareFunc(QRhiSampler::CompareOp op) void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) { QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); + GLenum indexType = GL_UNSIGNED_SHORT; quint32 indexStride = sizeof(quint16); quint32 indexOffset = 0; @@ -1970,7 +1971,7 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *ps, QRhiShaderResource void QRhiGles2::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); + Q_ASSERT(QRHI_RES(QGles2CommandBuffer, cb)->recordingPass == QGles2CommandBuffer::NoPass); enqueueResourceUpdates(cb, resourceUpdates); } @@ -2018,12 +2019,12 @@ void QRhiGles2::beginPass(QRhiCommandBuffer *cb, const QRhiDepthStencilClearValue &depthStencilClearValue, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); if (resourceUpdates) enqueueResourceUpdates(cb, resourceUpdates); - QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); bool wantsColorClear, wantsDsClear; QGles2RenderTargetData *rtD = enqueueBindFramebuffer(rt, cbD, &wantsColorClear, &wantsDsClear); @@ -2042,17 +2043,15 @@ void QRhiGles2::beginPass(QRhiCommandBuffer *cb, clearCmd.args.clear.s = depthStencilClearValue.stencilClearValue(); cbD->commands.append(clearCmd); + cbD->recordingPass = QGles2CommandBuffer::RenderPass; cbD->currentTarget = rt; - - inPass = true; } void QRhiGles2::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inPass); - inPass = false; - QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); + if (cbD->currentTarget->resourceType() == QRhiResource::TextureRenderTarget) { QGles2TextureRenderTarget *rtTex = QRHI_RES(QGles2TextureRenderTarget, cbD->currentTarget); const QVector<QRhiColorAttachment> colorAttachments = rtTex->m_desc.colorAttachments(); @@ -2083,12 +2082,49 @@ void QRhiGles2::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resource } } + cbD->recordingPass = QGles2CommandBuffer::NoPass; cbD->currentTarget = nullptr; if (resourceUpdates) enqueueResourceUpdates(cb, resourceUpdates); } +void QRhiGles2::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); + + if (resourceUpdates) + enqueueResourceUpdates(cb, resourceUpdates); + + cbD->recordingPass = QGles2CommandBuffer::ComputePass; +} + +void QRhiGles2::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::ComputePass); + + cbD->recordingPass = QGles2CommandBuffer::NoPass; + + if (resourceUpdates) + enqueueResourceUpdates(cb, resourceUpdates); +} + +void QRhiGles2::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) +{ + Q_UNUSED(cb); + Q_UNUSED(ps); +} + +void QRhiGles2::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) +{ + Q_UNUSED(cb); + Q_UNUSED(x); + Q_UNUSED(y); + Q_UNUSED(z); +} + QGles2Buffer::QGles2Buffer(QRhiImplementation *rhi, Type type, UsageFlags usage, int size) : QRhiBuffer(rhi, type, usage, size) { @@ -2742,9 +2778,9 @@ bool QGles2GraphicsPipeline::build() program = rhiD->f->glCreateProgram(); int sourceVer = 0; - for (const QRhiGraphicsShaderStage &shaderStage : qAsConst(m_shaderStages)) { - const bool isVertex = shaderStage.type() == QRhiGraphicsShaderStage::Vertex; - const bool isFragment = shaderStage.type() == QRhiGraphicsShaderStage::Fragment; + for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) { + const bool isVertex = shaderStage.type() == QRhiShaderStage::Vertex; + const bool isFragment = shaderStage.type() == QRhiShaderStage::Fragment; if (!isVertex && !isFragment) continue; @@ -2902,6 +2938,25 @@ bool QGles2GraphicsPipeline::build() return true; } +QGles2ComputePipeline::QGles2ComputePipeline(QRhiImplementation *rhi) + : QRhiComputePipeline(rhi) +{ +} + +QGles2ComputePipeline::~QGles2ComputePipeline() +{ + release(); +} + +void QGles2ComputePipeline::release() +{ +} + +bool QGles2ComputePipeline::build() +{ + return false; +} + QGles2CommandBuffer::QGles2CommandBuffer(QRhiImplementation *rhi) : QRhiCommandBuffer(rhi) { diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h index 5254219bd6..fe74e2e75b 100644 --- a/src/gui/rhi/qrhigles2_p_p.h +++ b/src/gui/rhi/qrhigles2_p_p.h @@ -248,6 +248,14 @@ struct QGles2GraphicsPipeline : public QRhiGraphicsPipeline Q_DECLARE_TYPEINFO(QGles2GraphicsPipeline::Uniform, Q_MOVABLE_TYPE); Q_DECLARE_TYPEINFO(QGles2GraphicsPipeline::Sampler, Q_MOVABLE_TYPE); +struct QGles2ComputePipeline : public QRhiComputePipeline +{ + QGles2ComputePipeline(QRhiImplementation *rhi); + ~QGles2ComputePipeline(); + void release() override; + bool build() override; +}; + struct QGles2CommandBuffer : public QRhiCommandBuffer { QGles2CommandBuffer(QRhiImplementation *rhi); @@ -426,7 +434,14 @@ struct QGles2CommandBuffer : public QRhiCommandBuffer } args; }; + enum PassType { + NoPass, + RenderPass, + ComputePass + }; + QVector<Command> commands; + PassType recordingPass; QRhiRenderTarget *currentTarget; QRhiGraphicsPipeline *currentPipeline; uint currentPipelineGeneration; @@ -452,6 +467,7 @@ struct QGles2CommandBuffer : public QRhiCommandBuffer } void resetState() { resetCommands(); + recordingPass = NoPass; currentTarget = nullptr; resetCachedState(); } @@ -495,6 +511,7 @@ public: void destroy() override; QRhiGraphicsPipeline *createGraphicsPipeline() override; + QRhiComputePipeline *createComputePipeline() override; QRhiShaderResourceBindings *createShaderResourceBindings() override; QRhiBuffer *createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, @@ -559,6 +576,11 @@ public: void debugMarkEnd(QRhiCommandBuffer *cb) override; void debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) override; + void beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) override; + void dispatch(QRhiCommandBuffer *cb, int x, int y, int z) override; + const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; @@ -645,8 +667,6 @@ public: uint uniformBuffers : 1; uint elementIndexUint : 1; } caps; - bool inFrame = false; - bool inPass = false; QGles2SwapChain *currentSwapChain = nullptr; QVector<GLint> supportedCompressedFormats; mutable QVector<int> supportedSampleCountList; diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm index 6030f55d10..22d4e4e6d2 100644 --- a/src/gui/rhi/qrhimetal.mm +++ b/src/gui/rhi/qrhimetal.mm @@ -36,6 +36,7 @@ #include "qrhimetal_p_p.h" #include "qshader_p.h" +#include "qshaderdescription_p.h" #include <QGuiApplication> #include <QWindow> #include <qmath.h> @@ -51,14 +52,12 @@ QT_BEGIN_NAMESPACE /* Metal backend. Double buffers and throttles to vsync. "Dynamic" buffers are - Shared (host visible) and duplicated (due to 2 frames in flight), "static" - are Managed on macOS and Shared on iOS/tvOS, and still duplicated. - "Immutable" is like "static" but with only one native buffer underneath. + Shared (host visible) and duplicated (to help having 2 frames in flight), + "static" and "immutable" are Managed on macOS and Shared on iOS/tvOS. Textures are Private (device local) and a host visible staging buffer is used to upload data to them. Does not rely on strong objects refs from - command buffers (hence uses commandBufferWithUnretainedReferences), but - does rely on automatic dependency tracking between encoders (hence no - MTLResourceHazardTrackingModeUntracked atm). + command buffers but does rely on the automatic resource tracking of the + command encoders. */ #if __has_feature(objc_arc) @@ -173,6 +172,7 @@ struct QRhiMetalData struct { id<MTLTexture> texture; id<MTLBuffer> stagingBuffers[QMTL_FRAMES_IN_FLIGHT]; + id<MTLTexture> views[QRhi::MAX_LEVELS]; } texture; struct { id<MTLSamplerState> samplerState; @@ -213,6 +213,7 @@ Q_DECLARE_TYPEINFO(QRhiMetalData::ActiveReadback, Q_MOVABLE_TYPE); struct QMetalBufferData { bool managed; + bool slotted; id<MTLBuffer> buf[QMTL_FRAMES_IN_FLIGHT]; QVector<QRhiResourceUpdateBatchPrivate::DynamicBufferUpdate> pendingUpdates[QMTL_FRAMES_IN_FLIGHT]; }; @@ -225,10 +226,16 @@ struct QMetalRenderBufferData struct QMetalTextureData { + QMetalTextureData(QMetalTexture *t) : q(t) { } + + QMetalTexture *q; MTLPixelFormat format; id<MTLTexture> tex = nil; id<MTLBuffer> stagingBuf[QMTL_FRAMES_IN_FLIGHT]; bool owns = true; + id<MTLTexture> perLevelViews[QRhi::MAX_LEVELS]; + + id<MTLTexture> viewForLevel(int level); }; struct QMetalSamplerData @@ -239,7 +246,8 @@ struct QMetalSamplerData struct QMetalCommandBufferData { id<MTLCommandBuffer> cb; - id<MTLRenderCommandEncoder> currentPassEncoder; + id<MTLRenderCommandEncoder> currentRenderPassEncoder; + id<MTLComputeCommandEncoder> currentComputePassEncoder; MTLRenderPassDescriptor *currentPassRpDesc; int currentFirstVertexBinding; QRhiBatchedBindings<id<MTLBuffer> > currentVertexInputsBuffers; @@ -286,6 +294,14 @@ struct QMetalGraphicsPipelineData id<MTLFunction> fsFunc = nil; }; +struct QMetalComputePipelineData +{ + id<MTLComputePipelineState> ps = nil; + id<MTLLibrary> csLib = nil; + id<MTLFunction> csFunc = nil; + MTLSize localSize; +}; + struct QMetalSwapChainData { CAMetalLayer *layer = nullptr; @@ -505,6 +521,8 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const return true; case QRhi::ElementIndexUint: return true; + case QRhi::Compute: + return true; default: Q_UNREACHABLE(); return false; @@ -573,6 +591,11 @@ QRhiGraphicsPipeline *QRhiMetal::createGraphicsPipeline() return new QMetalGraphicsPipeline(this); } +QRhiComputePipeline *QRhiMetal::createComputePipeline() +{ + return new QMetalComputePipeline(this); +} + QRhiShaderResourceBindings *QRhiMetal::createShaderResourceBindings() { return new QMetalShaderResourceBindings(this); @@ -583,7 +606,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD const QRhiCommandBuffer::DynamicOffset *dynamicOffsets, bool offsetOnlyChange) { - static const int KNOWN_STAGES = 2; + static const int KNOWN_STAGES = 3; struct { QRhiBatchedBindings<id<MTLBuffer> > buffers; QRhiBatchedBindings<NSUInteger> bufferOffsets; @@ -597,7 +620,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD case QRhiShaderResourceBinding::UniformBuffer: { QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.ubuf.buf); - id<MTLBuffer> mtlbuf = bufD->d->buf[bufD->m_type == QRhiBuffer::Immutable ? 0 : currentFrameSlot]; + id<MTLBuffer> mtlbuf = bufD->d->buf[bufD->d->slotted ? currentFrameSlot : 0]; uint offset = b->u.ubuf.offset; for (int i = 0; i < dynamicOffsetCount; ++i) { const QRhiCommandBuffer::DynamicOffset &dynOfs(dynamicOffsets[i]); @@ -614,6 +637,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD res[1].buffers.feed(b->binding, mtlbuf); res[1].bufferOffsets.feed(b->binding, offset); } + if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { + res[2].buffers.feed(b->binding, mtlbuf); + res[2].bufferOffsets.feed(b->binding, offset); + } } break; case QRhiShaderResourceBinding::SampledTexture: @@ -628,6 +655,49 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD res[1].textures.feed(b->binding, texD->d->tex); res[1].samplers.feed(b->binding, samplerD->d->samplerState); } + if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { + res[2].textures.feed(b->binding, texD->d->tex); + res[2].samplers.feed(b->binding, samplerD->d->samplerState); + } + } + break; + case QRhiShaderResourceBinding::ImageLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageLoadStore: + { + QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex); + id<MTLTexture> t = texD->d->viewForLevel(b->u.simage.level); + if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) + res[0].textures.feed(b->binding, t); + if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) + res[1].textures.feed(b->binding, t); + if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) + res[2].textures.feed(b->binding, t); + } + break; + case QRhiShaderResourceBinding::BufferLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferLoadStore: + { + QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); + id<MTLBuffer> mtlbuf = bufD->d->buf[0]; + uint offset = b->u.sbuf.offset; + if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { + res[0].buffers.feed(b->binding, mtlbuf); + res[0].bufferOffsets.feed(b->binding, offset); + } + if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { + res[1].buffers.feed(b->binding, mtlbuf); + res[1].bufferOffsets.feed(b->binding, offset); + } + if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { + res[2].buffers.feed(b->binding, mtlbuf); + res[2].bufferOffsets.feed(b->binding, offset); + } } break; default: @@ -645,12 +715,17 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD const auto &offsetBatch(res[idx].bufferOffsets.batches[i]); switch (idx) { case 0: - [cbD->d->currentPassEncoder setVertexBuffers: bufferBatch.resources.constData() + [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData() offsets: offsetBatch.resources.constData() withRange: NSMakeRange(bufferBatch.startBinding, bufferBatch.resources.count())]; break; case 1: - [cbD->d->currentPassEncoder setFragmentBuffers: bufferBatch.resources.constData() + [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(bufferBatch.startBinding, bufferBatch.resources.count())]; + break; + case 2: + [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData() offsets: offsetBatch.resources.constData() withRange: NSMakeRange(bufferBatch.startBinding, bufferBatch.resources.count())]; break; @@ -670,11 +745,15 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD const auto &batch(res[idx].textures.batches[i]); switch (idx) { case 0: - [cbD->d->currentPassEncoder setVertexTextures: batch.resources.constData() + [cbD->d->currentRenderPassEncoder setVertexTextures: batch.resources.constData() withRange: NSMakeRange(batch.startBinding, batch.resources.count())]; break; case 1: - [cbD->d->currentPassEncoder setFragmentTextures: batch.resources.constData() + [cbD->d->currentRenderPassEncoder setFragmentTextures: batch.resources.constData() + withRange: NSMakeRange(batch.startBinding, batch.resources.count())]; + break; + case 2: + [cbD->d->currentComputePassEncoder setTextures: batch.resources.constData() withRange: NSMakeRange(batch.startBinding, batch.resources.count())]; break; default: @@ -686,11 +765,15 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD const auto &batch(res[idx].samplers.batches[i]); switch (idx) { case 0: - [cbD->d->currentPassEncoder setVertexSamplerStates: batch.resources.constData() + [cbD->d->currentRenderPassEncoder setVertexSamplerStates: batch.resources.constData() withRange: NSMakeRange(batch.startBinding, batch.resources.count())]; break; case 1: - [cbD->d->currentPassEncoder setFragmentSamplerStates: batch.resources.constData() + [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: batch.resources.constData() + withRange: NSMakeRange(batch.startBinding, batch.resources.count())]; + break; + case 2: + [cbD->d->currentComputePassEncoder setSamplerStates: batch.resources.constData() withRange: NSMakeRange(batch.startBinding, batch.resources.count())]; break; default: @@ -703,19 +786,19 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD void QRhiMetal::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps) { - Q_ASSERT(inPass); - QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); QMetalGraphicsPipeline *psD = QRHI_RES(QMetalGraphicsPipeline, ps); - if (cbD->currentPipeline != ps || cbD->currentPipelineGeneration != psD->generation) { - cbD->currentPipeline = ps; + if (cbD->currentGraphicsPipeline != ps || cbD->currentPipelineGeneration != psD->generation) { + cbD->currentGraphicsPipeline = ps; + cbD->currentComputePipeline = nullptr; cbD->currentPipelineGeneration = psD->generation; - [cbD->d->currentPassEncoder setRenderPipelineState: psD->d->ps]; - [cbD->d->currentPassEncoder setDepthStencilState: psD->d->ds]; - [cbD->d->currentPassEncoder setCullMode: psD->d->cullMode]; - [cbD->d->currentPassEncoder setFrontFacingWinding: psD->d->winding]; + [cbD->d->currentRenderPassEncoder setRenderPipelineState: psD->d->ps]; + [cbD->d->currentRenderPassEncoder setDepthStencilState: psD->d->ds]; + [cbD->d->currentRenderPassEncoder setCullMode: psD->d->cullMode]; + [cbD->d->currentRenderPassEncoder setFrontFacingWinding: psD->d->winding]; } psD->lastActiveFrameSlot = currentFrameSlot; @@ -725,12 +808,17 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind int dynamicOffsetCount, const QRhiCommandBuffer::DynamicOffset *dynamicOffsets) { - Q_ASSERT(inPass); - QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - Q_ASSERT(cbD->currentPipeline); - if (!srb) - srb = QRHI_RES(QMetalGraphicsPipeline, cbD->currentPipeline)->m_shaderResourceBindings; + Q_ASSERT(cbD->recordingPass != QMetalCommandBuffer::NoPass); + QMetalGraphicsPipeline *gfxPsD = QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline); + QMetalComputePipeline *compPsD = QRHI_RES(QMetalComputePipeline, cbD->currentComputePipeline); + + if (!srb) { + if (gfxPsD) + srb = gfxPsD->m_shaderResourceBindings; + else + srb = compPsD->m_shaderResourceBindings; + } QMetalShaderResourceBindings *srbD = QRHI_RES(QMetalShaderResourceBindings, srb); bool hasSlottedResourceInSrb = false; @@ -747,7 +835,7 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.ubuf.buf); Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)); executeBufferHostWritesForCurrentFrame(bufD); - if (bufD->m_type != QRhiBuffer::Immutable) + if (bufD->d->slotted) hasSlottedResourceInSrb = true; if (b->u.ubuf.hasDynamicOffset) hasDynamicOffsetInSrb = true; @@ -778,6 +866,38 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind samplerD->lastActiveFrameSlot = currentFrameSlot; } break; + case QRhiShaderResourceBinding::ImageLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageLoadStore: + { + QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex); + if (texD->generation != bd.simage.generation || texD->m_id != bd.simage.id) { + resNeedsRebind = true; + bd.simage.id = texD->m_id; + bd.simage.generation = texD->generation; + } + texD->lastActiveFrameSlot = currentFrameSlot; + } + break; + case QRhiShaderResourceBinding::BufferLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferLoadStore: + { + QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); + Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)); + executeBufferHostWritesForCurrentFrame(bufD); + if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) { + resNeedsRebind = true; + bd.sbuf.id = bufD->m_id; + bd.sbuf.generation = bufD->generation; + } + bufD->lastActiveFrameSlot = currentFrameSlot; + } + break; default: Q_UNREACHABLE(); break; @@ -789,15 +909,22 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot) resNeedsRebind = true; - const bool srbChange = cbD->currentSrb != srb || cbD->currentSrbGeneration != srbD->generation; + const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb); + const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation; // dynamic uniform buffer offsets always trigger a rebind - if (hasDynamicOffsetInSrb || resNeedsRebind || srbChange) { - cbD->currentSrb = srb; + if (hasDynamicOffsetInSrb || resNeedsRebind || srbChanged || srbRebuilt) { + if (gfxPsD) { + cbD->currentGraphicsSrb = srb; + cbD->currentComputeSrb = nullptr; + } else { + cbD->currentGraphicsSrb = nullptr; + cbD->currentComputeSrb = srb; + } cbD->currentSrbGeneration = srbD->generation; cbD->currentResSlot = resSlot; - const bool offsetOnlyChange = hasDynamicOffsetInSrb && !resNeedsRebind && !srbChange; + const bool offsetOnlyChange = hasDynamicOffsetInSrb && !resNeedsRebind && !srbChanged && !srbRebuilt; enqueueShaderResourceBindings(srbD, cbD, dynamicOffsetCount, dynamicOffsets, offsetOnlyChange); } } @@ -806,9 +933,8 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, int startBinding, int bindingCount, const QRhiCommandBuffer::VertexInput *bindings, QRhiBuffer *indexBuf, quint32 indexOffset, QRhiCommandBuffer::IndexFormat indexFormat) { - Q_ASSERT(inPass); QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - Q_ASSERT(cbD->currentPipeline); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); QRhiBatchedBindings<id<MTLBuffer> > buffers; QRhiBatchedBindings<NSUInteger> offsets; @@ -816,7 +942,7 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, bindings[i].first); executeBufferHostWritesForCurrentFrame(bufD); bufD->lastActiveFrameSlot = currentFrameSlot; - id<MTLBuffer> mtlbuf = bufD->d->buf[bufD->m_type == QRhiBuffer::Immutable ? 0 : currentFrameSlot]; + id<MTLBuffer> mtlbuf = bufD->d->buf[bufD->d->slotted ? currentFrameSlot : 0]; buffers.feed(startBinding + i, mtlbuf); offsets.feed(startBinding + i, bindings[i].second); } @@ -824,12 +950,12 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, offsets.finish(); // same binding space for vertex and constant buffers - work it around - QRhiShaderResourceBindings *srb = cbD->currentSrb; + QRhiShaderResourceBindings *srb = cbD->currentGraphicsSrb; // There's nothing guaranteeing setShaderResources() was called before // setVertexInput()... but whatever srb will get bound will have to be // layout-compatible anyways so maxBinding is the same. if (!srb) - srb = cbD->currentPipeline->shaderResourceBindings(); + srb = cbD->currentGraphicsPipeline->shaderResourceBindings(); const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, srb)->maxBinding + 1; if (firstVertexBinding != cbD->d->currentFirstVertexBinding @@ -843,7 +969,7 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, for (int i = 0, ie = buffers.batches.count(); i != ie; ++i) { const auto &bufferBatch(buffers.batches[i]); const auto &offsetBatch(offsets.batches[i]); - [cbD->d->currentPassEncoder setVertexBuffers: + [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData() offsets: offsetBatch.resources.constData() withRange: NSMakeRange(firstVertexBinding + bufferBatch.startBinding, bufferBatch.resources.count())]; @@ -864,9 +990,8 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) { - Q_ASSERT(inPass); QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - Q_ASSERT(cbD->currentPipeline && cbD->currentTarget); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); const QSize outputSize = cbD->currentTarget->pixelSize(); // x,y is top-left in MTLViewportRect but bottom-left in QRhiViewport @@ -882,24 +1007,23 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) vp.znear = viewport.minDepth(); vp.zfar = viewport.maxDepth(); - [cbD->d->currentPassEncoder setViewport: vp]; + [cbD->d->currentRenderPassEncoder setViewport: vp]; - if (!QRHI_RES(QMetalGraphicsPipeline, cbD->currentPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { + if (!QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { MTLScissorRect s; s.x = x; s.y = y; s.width = w; s.height = h; - [cbD->d->currentPassEncoder setScissorRect: s]; + [cbD->d->currentRenderPassEncoder setScissorRect: s]; } } void QRhiMetal::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) { - Q_ASSERT(inPass); QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - Q_ASSERT(cbD->currentPipeline && cbD->currentTarget); - Q_ASSERT(QRHI_RES(QMetalGraphicsPipeline, cbD->currentPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + Q_ASSERT(QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); const QSize outputSize = cbD->currentTarget->pixelSize(); // x,y is top-left in MTLScissorRect but bottom-left in QRhiScissor @@ -913,38 +1037,42 @@ void QRhiMetal::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) s.width = w; s.height = h; - [cbD->d->currentPassEncoder setScissorRect: s]; + [cbD->d->currentRenderPassEncoder setScissorRect: s]; } void QRhiMetal::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) { - Q_ASSERT(inPass); QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - [cbD->d->currentPassEncoder setBlendColorRed: c.redF() green: c.greenF() blue: c.blueF() alpha: c.alphaF()]; + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + + [cbD->d->currentRenderPassEncoder setBlendColorRed: c.redF() green: c.greenF() blue: c.blueF() alpha: c.alphaF()]; } void QRhiMetal::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) { - Q_ASSERT(inPass); QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - [cbD->d->currentPassEncoder setStencilReferenceValue: refValue]; + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + + [cbD->d->currentRenderPassEncoder setStencilReferenceValue: refValue]; } void QRhiMetal::draw(QRhiCommandBuffer *cb, quint32 vertexCount, quint32 instanceCount, quint32 firstVertex, quint32 firstInstance) { - Q_ASSERT(inPass); QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - [cbD->d->currentPassEncoder drawPrimitives: - QRHI_RES(QMetalGraphicsPipeline, cbD->currentPipeline)->d->primitiveType + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + + [cbD->d->currentRenderPassEncoder drawPrimitives: + QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance]; } void QRhiMetal::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, quint32 instanceCount, quint32 firstIndex, qint32 vertexOffset, quint32 firstInstance) { - Q_ASSERT(inPass); QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + if (!cbD->currentIndexBuffer) return; @@ -952,9 +1080,9 @@ void QRhiMetal::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, Q_ASSERT(indexOffset == aligned(indexOffset, 4)); QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, cbD->currentIndexBuffer); - id<MTLBuffer> mtlbuf = ibufD->d->buf[ibufD->m_type == QRhiBuffer::Immutable ? 0 : currentFrameSlot]; + id<MTLBuffer> mtlbuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0]; - [cbD->d->currentPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentPipeline)->d->primitiveType + [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType indexCount: indexCount indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 indexBuffer: mtlbuf @@ -971,8 +1099,8 @@ void QRhiMetal::debugMarkBegin(QRhiCommandBuffer *cb, const QByteArray &name) NSString *str = [NSString stringWithUTF8String: name.constData()]; QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - if (inPass) { - [cbD->d->currentPassEncoder pushDebugGroup: str]; + if (cbD->recordingPass != QMetalCommandBuffer::NoPass) { + [cbD->d->currentRenderPassEncoder pushDebugGroup: str]; } else { if (@available(macOS 10.13, iOS 11.0, *)) [cbD->d->cb pushDebugGroup: str]; @@ -985,8 +1113,8 @@ void QRhiMetal::debugMarkEnd(QRhiCommandBuffer *cb) return; QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - if (inPass) { - [cbD->d->currentPassEncoder popDebugGroup]; + if (cbD->recordingPass != QMetalCommandBuffer::NoPass) { + [cbD->d->currentRenderPassEncoder popDebugGroup]; } else { if (@available(macOS 10.13, iOS 11.0, *)) [cbD->d->cb popDebugGroup]; @@ -998,10 +1126,9 @@ void QRhiMetal::debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) if (!debugMarkers) return; - if (inPass) { - QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - [cbD->d->currentPassEncoder insertDebugSignpost: [NSString stringWithUTF8String: msg.constData()]]; - } + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + if (cbD->recordingPass != QMetalCommandBuffer::NoPass) + [cbD->d->currentRenderPassEncoder insertDebugSignpost: [NSString stringWithUTF8String: msg.constData()]]; } const QRhiNativeHandles *QRhiMetal::nativeHandles(QRhiCommandBuffer *cb) @@ -1023,8 +1150,6 @@ void QRhiMetal::endExternal(QRhiCommandBuffer *cb) QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); - Q_ASSERT(!inFrame); - inFrame = true; QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain); @@ -1077,9 +1202,6 @@ QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrameFlags flags) { - Q_ASSERT(inFrame); - inFrame = false; - QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain); Q_ASSERT(currentSwapChain == swapChainD); @@ -1110,9 +1232,6 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb) { - Q_ASSERT(!inFrame); - inFrame = true; - currentFrameSlot = (currentFrameSlot + 1) % QMTL_FRAMES_IN_FLIGHT; if (swapchains.count() > 1) { for (QMetalSwapChain *sc : qAsConst(swapchains)) { @@ -1140,8 +1259,6 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame() { Q_ASSERT(d->ofr.active); d->ofr.active = false; - Q_ASSERT(inFrame); - inFrame = false; [d->ofr.cbWrapper.d->cb commit]; @@ -1155,17 +1272,17 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame() QRhi::FrameOpResult QRhiMetal::finish() { - Q_ASSERT(!inPass); - id<MTLCommandBuffer> cb = nil; QMetalSwapChain *swapChainD = nullptr; if (inFrame) { if (d->ofr.active) { Q_ASSERT(!currentSwapChain); + Q_ASSERT(d->ofr.cbWrapper.recordingPass == QMetalCommandBuffer::NoPass); cb = d->ofr.cbWrapper.d->cb; } else { Q_ASSERT(currentSwapChain); swapChainD = currentSwapChain; + Q_ASSERT(swapChainD->cbWrapper.recordingPass == QMetalCommandBuffer::NoPass); cb = swapChainD->cbWrapper.d->cb; } } @@ -1373,11 +1490,13 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate bufD->d->pendingUpdates[i].append(u); } + // Due to the Metal API the handling of static and dynamic buffers is + // basically the same. So go through the same pendingUpdates machinery. for (const QRhiResourceUpdateBatchPrivate::StaticBufferUpload &u : ud->staticBufferUploads) { QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf); Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic); Q_ASSERT(u.offset + u.data.size() <= bufD->m_size); - for (int i = 0, ie = bufD->m_type == QRhiBuffer::Immutable ? 1 : QMTL_FRAMES_IN_FLIGHT; i != ie; ++i) + for (int i = 0, ie = bufD->d->slotted ? QMTL_FRAMES_IN_FLIGHT : 1; i != ie; ++i) bufD->d->pendingUpdates[i].append({ u.buf, u.offset, u.data.size(), u.data.constData() }); } @@ -1516,9 +1635,10 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate ud->free(); } +// this handles all types of buffers, not just Dynamic void QRhiMetal::executeBufferHostWritesForCurrentFrame(QMetalBuffer *bufD) { - const int idx = bufD->m_type == QRhiBuffer::Immutable ? 0 : currentFrameSlot; + const int idx = bufD->d->slotted ? currentFrameSlot : 0; QVector<QRhiResourceUpdateBatchPrivate::DynamicBufferUpdate> &updates(bufD->d->pendingUpdates[idx]); if (updates.isEmpty()) return; @@ -1542,7 +1662,7 @@ void QRhiMetal::executeBufferHostWritesForCurrentFrame(QMetalBuffer *bufD) void QRhiMetal::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); + Q_ASSERT(QRHI_RES(QMetalCommandBuffer, cb)->recordingPass == QMetalCommandBuffer::NoPass); enqueueResourceUpdates(cb, resourceUpdates); } @@ -1553,13 +1673,12 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, const QRhiDepthStencilClearValue &depthStencilClearValue, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::NoPass); if (resourceUpdates) enqueueResourceUpdates(cb, resourceUpdates); - QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - QMetalRenderTargetData *rtD = nullptr; switch (rt->resourceType()) { case QRhiResource::RenderTarget: @@ -1639,28 +1758,80 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, cbD->d->currentPassRpDesc.depthAttachment.storeAction = MTLStoreActionStore; } - cbD->d->currentPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc]; + cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc]; cbD->resetPerPassState(); + cbD->recordingPass = QMetalCommandBuffer::RenderPass; cbD->currentTarget = rt; - inPass = true; } void QRhiMetal::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inPass); - inPass = false; - QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); - [cbD->d->currentPassEncoder endEncoding]; + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + [cbD->d->currentRenderPassEncoder endEncoding]; + + cbD->recordingPass = QMetalCommandBuffer::NoPass; cbD->currentTarget = nullptr; if (resourceUpdates) enqueueResourceUpdates(cb, resourceUpdates); } +void QRhiMetal::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::NoPass); + + if (resourceUpdates) + enqueueResourceUpdates(cb, resourceUpdates); + + cbD->d->currentComputePassEncoder = [cbD->d->cb computeCommandEncoder]; + cbD->resetPerPassState(); + cbD->recordingPass = QMetalCommandBuffer::ComputePass; +} + +void QRhiMetal::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass); + + [cbD->d->currentComputePassEncoder endEncoding]; + cbD->recordingPass = QMetalCommandBuffer::NoPass; + + if (resourceUpdates) + enqueueResourceUpdates(cb, resourceUpdates); +} + +void QRhiMetal::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass); + QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, ps); + + if (cbD->currentComputePipeline != ps || cbD->currentPipelineGeneration != psD->generation) { + cbD->currentGraphicsPipeline = nullptr; + cbD->currentComputePipeline = ps; + cbD->currentPipelineGeneration = psD->generation; + + [cbD->d->currentComputePassEncoder setComputePipelineState: psD->d->ps]; + } + + psD->lastActiveFrameSlot = currentFrameSlot; +} + +void QRhiMetal::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass); + QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, cbD->currentComputePipeline); + + [cbD->d->currentComputePassEncoder dispatchThreadgroups: MTLSizeMake(x, y, z) + threadsPerThreadgroup: psD->d->localSize]; +} + static void qrhimtl_releaseBuffer(const QRhiMetalData::DeferredReleaseEntry &e) { for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) @@ -1677,6 +1848,8 @@ static void qrhimtl_releaseTexture(const QRhiMetalData::DeferredReleaseEntry &e) [e.texture.texture release]; for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) [e.texture.stagingBuffers[i] release]; + for (int i = 0; i < QRhi::MAX_LEVELS; ++i) + [e.texture.views[i] release]; } static void qrhimtl_releaseSampler(const QRhiMetalData::DeferredReleaseEntry &e) @@ -1782,6 +1955,11 @@ bool QMetalBuffer::build() if (d->buf[0]) release(); + if (m_usage.testFlag(QRhiBuffer::StorageBuffer) && m_type == Dynamic) { + qWarning("StorageBuffer cannot be combined with Dynamic"); + return false; + } + const int nonZeroSize = m_size <= 0 ? 256 : m_size; const int roundedSize = m_usage.testFlag(QRhiBuffer::UniformBuffer) ? aligned(nonZeroSize, 256) : nonZeroSize; @@ -1794,15 +1972,17 @@ bool QMetalBuffer::build() } #endif + // Immutable and Static only has buf[0] and pendingUpdates[0] in use. + // Dynamic uses all. + d->slotted = m_type == Dynamic; + QRHI_RES_RHI(QRhiMetal); for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { - // Immutable only has buf[0] and pendingUpdates[0] in use. - // Static and Dynamic use all. - if (i == 0 || m_type != Immutable) { + if (i == 0 || d->slotted) { d->buf[i] = [rhiD->d->dev newBufferWithLength: roundedSize options: opts]; d->pendingUpdates[i].reserve(16); if (!m_objectName.isEmpty()) { - if (m_type == Immutable) { + if (!d->slotted) { d->buf[i].label = [NSString stringWithUTF8String: m_objectName.constData()]; } else { const QByteArray name = m_objectName + '/' + QByteArray::number(i); @@ -1813,7 +1993,7 @@ bool QMetalBuffer::build() } QRHI_PROF; - QRHI_PROF_F(newBuffer(this, roundedSize, m_type == Immutable ? 1 : QMTL_FRAMES_IN_FLIGHT, 0)); + QRHI_PROF_F(newBuffer(this, roundedSize, d->slotted ? QMTL_FRAMES_IN_FLIGHT : 1, 0)); lastActiveFrameSlot = -1; generation += 1; @@ -1919,10 +2099,13 @@ QRhiTexture::Format QMetalRenderBuffer::backingFormat() const QMetalTexture::QMetalTexture(QRhiImplementation *rhi, Format format, const QSize &pixelSize, int sampleCount, Flags flags) : QRhiTexture(rhi, format, pixelSize, sampleCount, flags), - d(new QMetalTextureData) + d(new QMetalTextureData(this)) { for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) d->stagingBuf[i] = nil; + + for (int i = 0; i < QRhi::MAX_LEVELS; ++i) + d->perLevelViews[i] = nil; } QMetalTexture::~QMetalTexture() @@ -1949,6 +2132,11 @@ void QMetalTexture::release() d->stagingBuf[i] = nil; } + for (int i = 0; i < QRhi::MAX_LEVELS; ++i) { + e.texture.views[i] = d->perLevelViews[i]; + d->perLevelViews[i] = nil; + } + QRHI_RES_RHI(QRhiMetal); rhiD->d->releaseQueue.append(e); QRHI_PROF; @@ -2138,6 +2326,8 @@ bool QMetalTexture::build() desc.usage = MTLTextureUsageShaderRead; if (m_flags.testFlag(RenderTarget)) desc.usage |= MTLTextureUsageRenderTarget; + if (m_flags.testFlag(UsedWithLoadStore)) + desc.usage |= MTLTextureUsageShaderWrite; QRHI_RES_RHI(QRhiMetal); d->tex = [rhiD->d->dev newTextureWithDescriptor: desc]; @@ -2187,6 +2377,21 @@ const QRhiNativeHandles *QMetalTexture::nativeHandles() return &nativeHandlesStruct; } +id<MTLTexture> QMetalTextureData::viewForLevel(int level) +{ + Q_ASSERT(level >= 0 && level < int(q->mipLevelCount)); + if (perLevelViews[level]) + return perLevelViews[level]; + + const MTLTextureType type = [tex textureType]; + const bool isCube = q->m_flags.testFlag(QRhiTexture::CubeMap); + id<MTLTexture> view = [tex newTextureViewWithPixelFormat: format textureType: type + levels: NSMakeRange(level, 1) slices: NSMakeRange(0, isCube ? 6 : 1)]; + + perLevelViews[level] = view; + return view; +} + QMetalSampler::QMetalSampler(QRhiImplementation *rhi, Filter magFilter, Filter minFilter, Filter mipmapMode, AddressMode u, AddressMode v) : QRhiSampler(rhi, magFilter, minFilter, mipmapMode, u, v), @@ -2538,6 +2743,28 @@ bool QMetalShaderResourceBindings::build() bd.stex.samplerGeneration = samplerD->generation; } break; + case QRhiShaderResourceBinding::ImageLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageLoadStore: + { + QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex); + bd.simage.id = texD->m_id; + bd.simage.generation = texD->generation; + } + break; + case QRhiShaderResourceBinding::BufferLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferLoadStore: + { + QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); + bd.sbuf.id = bufD->m_id; + bd.sbuf.generation = bufD->generation; + } + break; default: Q_UNREACHABLE(); break; @@ -2874,21 +3101,12 @@ bool QMetalGraphicsPipeline::build() rpDesc.vertexDescriptor = inputLayout; - if (@available(macOS 10.13, iOS 11.0, *)) { - // Everything is immutable because we can guarantee that "neither the - // CPU nor the GPU will modify a buffer's contents between the time the - // buffer is set in a function's argument table and the time its - // associated command buffer completes execution" (as that's the point - // of our Vulkan-style buffer juggling in the first place). - const int vertexBufferCount = firstVertexBinding + bindings.count(); // cbuf + vbuf - const int fragmentBufferCount = firstVertexBinding; // cbuf - for (int i = 0; i < vertexBufferCount; ++i) - rpDesc.vertexBuffers[i].mutability = MTLMutabilityImmutable; - for (int i = 0; i < fragmentBufferCount; ++i) - rpDesc.fragmentBuffers[i].mutability = MTLMutabilityImmutable; - } - - for (const QRhiGraphicsShaderStage &shaderStage : qAsConst(m_shaderStages)) { + // mutability cannot be determined (slotted buffers could be set as + // MTLMutabilityImmutable, but then we potentially need a different + // descriptor for each buffer combination as this depends on the actual + // buffers not just the resource binding layout) so leave it at the default + + for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) { QString error; QByteArray entryPoint; id<MTLLibrary> lib = rhiD->d->createMetalLib(shaderStage.shader(), shaderStage.shaderVariant(), &error, &entryPoint); @@ -2903,12 +3121,12 @@ bool QMetalGraphicsPipeline::build() return false; } switch (shaderStage.type()) { - case QRhiGraphicsShaderStage::Vertex: + case QRhiShaderStage::Vertex: rpDesc.vertexFunction = func; d->vsLib = lib; d->vsFunc = func; break; - case QRhiGraphicsShaderStage::Fragment: + case QRhiShaderStage::Fragment: rpDesc.fragmentFunction = func; d->fsLib = lib; d->fsFunc = func; @@ -3000,6 +3218,83 @@ bool QMetalGraphicsPipeline::build() return true; } +QMetalComputePipeline::QMetalComputePipeline(QRhiImplementation *rhi) + : QRhiComputePipeline(rhi), + d(new QMetalComputePipelineData) +{ +} + +QMetalComputePipeline::~QMetalComputePipeline() +{ + release(); + delete d; +} + +void QMetalComputePipeline::release() +{ + QRHI_RES_RHI(QRhiMetal); + + if (d->csFunc) { + [d->csFunc release]; + d->csFunc = nil; + } + if (d->csLib) { + [d->csLib release]; + d->csLib = nil; + } + + if (!d->ps) + return; + + if (d->ps) { + [d->ps release]; + d->ps = nil; + } + + rhiD->unregisterResource(this); +} + +bool QMetalComputePipeline::build() +{ + if (d->ps) + release(); + + QRHI_RES_RHI(QRhiMetal); + + const QShader shader = m_shaderStage.shader(); + QString error; + QByteArray entryPoint; + id<MTLLibrary> lib = rhiD->d->createMetalLib(shader, m_shaderStage.shaderVariant(), + &error, &entryPoint); + if (!lib) { + qWarning("MSL shader compilation failed: %s", qPrintable(error)); + return false; + } + id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint); + if (!func) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [lib release]; + return false; + } + d->csLib = lib; + d->csFunc = func; + std::array<uint, 3> localSize = shader.description().computeShaderLocalSize(); + d->localSize = MTLSizeMake(localSize[0], localSize[1], localSize[2]); + + NSError *err = nil; + d->ps = [rhiD->d->dev newComputePipelineStateWithFunction: d->csFunc error: &err]; + if (!d->ps) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to create render pipeline state: %s", qPrintable(msg)); + return false; + } + + lastActiveFrameSlot = -1; + generation += 1; + rhiD->registerResource(this); + return true; +} + QMetalCommandBuffer::QMetalCommandBuffer(QRhiImplementation *rhi) : QRhiCommandBuffer(rhi), d(new QMetalCommandBufferData) @@ -3021,28 +3316,32 @@ void QMetalCommandBuffer::release() const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles() { nativeHandlesStruct.commandBuffer = d->cb; - nativeHandlesStruct.encoder = d->currentPassEncoder; + nativeHandlesStruct.encoder = d->currentRenderPassEncoder; return &nativeHandlesStruct; } void QMetalCommandBuffer::resetState() { - d->currentPassEncoder = nil; + d->currentRenderPassEncoder = nil; + d->currentComputePassEncoder = nil; d->currentPassRpDesc = nil; resetPerPassState(); } void QMetalCommandBuffer::resetPerPassState() { + recordingPass = NoPass; currentTarget = nullptr; resetPerPassCachedState(); } void QMetalCommandBuffer::resetPerPassCachedState() { - currentPipeline = nullptr; + currentGraphicsPipeline = nullptr; + currentComputePipeline = nullptr; currentPipelineGeneration = 0; - currentSrb = nullptr; + currentGraphicsSrb = nullptr; + currentComputeSrb = nullptr; currentSrbGeneration = 0; currentResSlot = -1; currentIndexBuffer = nullptr; diff --git a/src/gui/rhi/qrhimetal_p_p.h b/src/gui/rhi/qrhimetal_p_p.h index f9b9d96648..8b0256991d 100644 --- a/src/gui/rhi/qrhimetal_p_p.h +++ b/src/gui/rhi/qrhimetal_p_p.h @@ -113,6 +113,7 @@ struct QMetalTexture : public QRhiTexture int lastActiveFrameSlot = -1; friend class QRhiMetal; friend struct QMetalShaderResourceBindings; + friend struct QMetalTextureData; }; struct QMetalSamplerData; @@ -200,10 +201,20 @@ struct QMetalShaderResourceBindings : public QRhiShaderResourceBindings quint64 samplerId; uint samplerGeneration; }; + struct BoundStorageImageData { + quint64 id; + uint generation; + }; + struct BoundStorageBufferData { + quint64 id; + uint generation; + }; struct BoundResourceData { union { BoundUniformBufferData ubuf; BoundSampledTextureData stex; + BoundStorageImageData simage; + BoundStorageBufferData sbuf; }; }; QVector<BoundResourceData> boundResourceData; @@ -227,6 +238,21 @@ struct QMetalGraphicsPipeline : public QRhiGraphicsPipeline friend class QRhiMetal; }; +struct QMetalComputePipelineData; + +struct QMetalComputePipeline : public QRhiComputePipeline +{ + QMetalComputePipeline(QRhiImplementation *rhi); + ~QMetalComputePipeline(); + void release() override; + bool build() override; + + QMetalComputePipelineData *d; + uint generation = 0; + int lastActiveFrameSlot = -1; + friend class QRhiMetal; +}; + struct QMetalCommandBufferData; struct QMetalSwapChain; @@ -239,10 +265,19 @@ struct QMetalCommandBuffer : public QRhiCommandBuffer QMetalCommandBufferData *d = nullptr; QRhiMetalCommandBufferNativeHandles nativeHandlesStruct; + enum PassType { + NoPass, + RenderPass, + ComputePass + }; + + PassType recordingPass; QRhiRenderTarget *currentTarget; - QRhiGraphicsPipeline *currentPipeline; + QRhiGraphicsPipeline *currentGraphicsPipeline; + QRhiComputePipeline *currentComputePipeline; uint currentPipelineGeneration; - QRhiShaderResourceBindings *currentSrb; + QRhiShaderResourceBindings *currentGraphicsSrb; + QRhiShaderResourceBindings *currentComputeSrb; uint currentSrbGeneration; int currentResSlot; QRhiBuffer *currentIndexBuffer; @@ -296,6 +331,7 @@ public: void destroy() override; QRhiGraphicsPipeline *createGraphicsPipeline() override; + QRhiComputePipeline *createComputePipeline() override; QRhiShaderResourceBindings *createShaderResourceBindings() override; QRhiBuffer *createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, @@ -360,6 +396,11 @@ public: void debugMarkEnd(QRhiCommandBuffer *cb) override; void debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) override; + void beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) override; + void dispatch(QRhiCommandBuffer *cb, int x, int y, int z) override; + const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; @@ -393,8 +434,6 @@ public: bool importedDevice = false; bool importedCmdQueue = false; - bool inFrame = false; - bool inPass = false; QMetalSwapChain *currentSwapChain = nullptr; QSet<QMetalSwapChain *> swapchains; QRhiMetalNativeHandles nativeHandlesStruct; diff --git a/src/gui/rhi/qrhinull.cpp b/src/gui/rhi/qrhinull.cpp index c764669058..1314e53893 100644 --- a/src/gui/rhi/qrhinull.cpp +++ b/src/gui/rhi/qrhinull.cpp @@ -201,6 +201,11 @@ QRhiGraphicsPipeline *QRhiNull::createGraphicsPipeline() return new QNullGraphicsPipeline(this); } +QRhiComputePipeline *QRhiNull::createComputePipeline() +{ + return new QNullComputePipeline(this); +} + QRhiShaderResourceBindings *QRhiNull::createShaderResourceBindings() { return new QNullShaderResourceBindings(this); @@ -297,6 +302,20 @@ void QRhiNull::debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) Q_UNUSED(msg); } +void QRhiNull::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) +{ + Q_UNUSED(cb); + Q_UNUSED(ps); +} + +void QRhiNull::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) +{ + Q_UNUSED(cb); + Q_UNUSED(x); + Q_UNUSED(y); + Q_UNUSED(z); +} + const QRhiNativeHandles *QRhiNull::nativeHandles(QRhiCommandBuffer *cb) { Q_UNUSED(cb); @@ -395,6 +414,18 @@ void QRhiNull::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceU resourceUpdate(cb, resourceUpdates); } +void QRhiNull::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + if (resourceUpdates) + resourceUpdate(cb, resourceUpdates); +} + +void QRhiNull::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + if (resourceUpdates) + resourceUpdate(cb, resourceUpdates); +} + QNullBuffer::QNullBuffer(QRhiImplementation *rhi, Type type, UsageFlags usage, int size) : QRhiBuffer(rhi, type, usage, size) { @@ -647,6 +678,25 @@ bool QNullGraphicsPipeline::build() return true; } +QNullComputePipeline::QNullComputePipeline(QRhiImplementation *rhi) + : QRhiComputePipeline(rhi) +{ +} + +QNullComputePipeline::~QNullComputePipeline() +{ + release(); +} + +void QNullComputePipeline::release() +{ +} + +bool QNullComputePipeline::build() +{ + return true; +} + QNullCommandBuffer::QNullCommandBuffer(QRhiImplementation *rhi) : QRhiCommandBuffer(rhi) { diff --git a/src/gui/rhi/qrhinull_p_p.h b/src/gui/rhi/qrhinull_p_p.h index 6f79606486..b0227bc110 100644 --- a/src/gui/rhi/qrhinull_p_p.h +++ b/src/gui/rhi/qrhinull_p_p.h @@ -154,6 +154,14 @@ struct QNullGraphicsPipeline : public QRhiGraphicsPipeline bool build() override; }; +struct QNullComputePipeline : public QRhiComputePipeline +{ + QNullComputePipeline(QRhiImplementation *rhi); + ~QNullComputePipeline(); + void release() override; + bool build() override; +}; + struct QNullCommandBuffer : public QRhiCommandBuffer { QNullCommandBuffer(QRhiImplementation *rhi); @@ -189,6 +197,7 @@ public: void destroy() override; QRhiGraphicsPipeline *createGraphicsPipeline() override; + QRhiComputePipeline *createComputePipeline() override; QRhiShaderResourceBindings *createShaderResourceBindings() override; QRhiBuffer *createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, @@ -253,6 +262,11 @@ public: void debugMarkEnd(QRhiCommandBuffer *cb) override; void debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) override; + void beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) override; + void dispatch(QRhiCommandBuffer *cb, int x, int y, int z) override; + const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; diff --git a/src/gui/rhi/qrhivulkan.cpp b/src/gui/rhi/qrhivulkan.cpp index 2d7b7a16f6..f6ecd7c00e 100644 --- a/src/gui/rhi/qrhivulkan.cpp +++ b/src/gui/rhi/qrhivulkan.cpp @@ -379,30 +379,31 @@ bool QRhiVulkan::create(QRhi::Flags flags) queryQueueFamilyProps(); gfxQueue = VK_NULL_HANDLE; + + // We only support combined graphics+present queues. When it comes to + // compute, only combined graphics+compute queue is used, compute gets + // disabled otherwise. gfxQueueFamilyIdx = -1; - int presQueueFamilyIdx = -1; + int computelessGfxQueueCandidateIdx = -1; for (int i = 0; i < queueFamilyProps.count(); ++i) { qDebug("queue family %d: flags=0x%x count=%d", i, queueFamilyProps[i].queueFlags, queueFamilyProps[i].queueCount); if (gfxQueueFamilyIdx == -1 && (queueFamilyProps[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) && (!maybeWindow || inst->supportsPresent(physDev, i, maybeWindow))) { - gfxQueueFamilyIdx = i; + if (queueFamilyProps[i].queueFlags & VK_QUEUE_COMPUTE_BIT) + gfxQueueFamilyIdx = i; + else if (computelessGfxQueueCandidateIdx == -1) + computelessGfxQueueCandidateIdx = i; } } - if (gfxQueueFamilyIdx != -1) { - presQueueFamilyIdx = gfxQueueFamilyIdx; - } else { - // ### - qWarning("No graphics queue that can present. This is not supported atm."); - } if (gfxQueueFamilyIdx == -1) { - qWarning("No graphics queue family found"); - return false; - } - if (presQueueFamilyIdx == -1) { - qWarning("No present queue family found"); - return false; + if (computelessGfxQueueCandidateIdx != -1) { + gfxQueueFamilyIdx = computelessGfxQueueCandidateIdx; + } else { + qWarning("No graphics (or no graphics+present) queue family found"); + return false; + } } VkDeviceQueueCreateInfo queueInfo[2]; @@ -412,12 +413,6 @@ bool QRhiVulkan::create(QRhi::Flags flags) queueInfo[0].queueFamilyIndex = gfxQueueFamilyIdx; queueInfo[0].queueCount = 1; queueInfo[0].pQueuePriorities = prio; - if (gfxQueueFamilyIdx != presQueueFamilyIdx) { - queueInfo[1].sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - queueInfo[1].queueFamilyIndex = presQueueFamilyIdx; - queueInfo[1].queueCount = 1; - queueInfo[1].pQueuePriorities = prio; - } QVector<const char *> devLayers; if (inst->layers().contains("VK_LAYER_LUNARG_standard_validation")) @@ -449,7 +444,7 @@ bool QRhiVulkan::create(QRhi::Flags flags) VkDeviceCreateInfo devInfo; memset(&devInfo, 0, sizeof(devInfo)); devInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - devInfo.queueCreateInfoCount = gfxQueueFamilyIdx == presQueueFamilyIdx ? 1 : 2; + devInfo.queueCreateInfoCount = 1; devInfo.pQueueCreateInfos = queueInfo; devInfo.enabledLayerCount = devLayers.count(); devInfo.ppEnabledLayerNames = devLayers.constData(); @@ -478,18 +473,13 @@ bool QRhiVulkan::create(QRhi::Flags flags) } if (gfxQueueFamilyIdx != -1) { - // Will use one queue always, including when multiple QRhis use the - // same device. This has significant consequences, and cannot easily be - // changed (e.g. think pipeline barriers which create a dependency - // between commands submitted to a queue - with multiple queues - // additional synchronization would be needed) - if (!gfxQueue) df->vkGetDeviceQueue(dev, gfxQueueFamilyIdx, 0, &gfxQueue); if (queueFamilyProps.isEmpty()) queryQueueFamilyProps(); + hasCompute = (queueFamilyProps[gfxQueueFamilyIdx].queueFlags & VK_QUEUE_COMPUTE_BIT) != 0; timestampValidBits = queueFamilyProps[gfxQueueFamilyIdx].timestampValidBits; } @@ -631,7 +621,9 @@ VkResult QRhiVulkan::createDescriptorPool(VkDescriptorPool *pool) VkDescriptorPoolSize descPoolSizes[] = { { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, QVK_UNIFORM_BUFFERS_PER_POOL }, { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, QVK_UNIFORM_BUFFERS_PER_POOL }, - { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, QVK_COMBINED_IMAGE_SAMPLERS_PER_POOL } + { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, QVK_COMBINED_IMAGE_SAMPLERS_PER_POOL }, + { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, QVK_STORAGE_BUFFERS_PER_POOL }, + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, QVK_STORAGE_IMAGES_PER_POOL } }; VkDescriptorPoolCreateInfo descPoolInfo; memset(&descPoolInfo, 0, sizeof(descPoolInfo)); @@ -1353,6 +1345,8 @@ bool QRhiVulkan::recreateSwapChain(QRhiSwapChain *swapChain) qWarning("Failed to create swapchain image view %d: %d", i, err); return false; } + + image.lastUse = QVkSwapChain::ImageResources::ScImageUseNone; } swapChainD->currentImageIndex = 0; @@ -1579,9 +1573,6 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin QRhi::FrameOpResult QRhiVulkan::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrameFlags flags) { - Q_ASSERT(inFrame); - inFrame = false; - QVkSwapChain *swapChainD = QRHI_RES(QVkSwapChain, swapChain); Q_ASSERT(currentSwapChain == swapChainD); @@ -1590,23 +1581,34 @@ QRhi::FrameOpResult QRhiVulkan::endFrame(QRhiSwapChain *swapChain, QRhi::EndFram QVkSwapChain::FrameResources &frame(swapChainD->frameRes[swapChainD->currentFrameSlot]); QVkSwapChain::ImageResources &image(swapChainD->imageRes[swapChainD->currentImageIndex]); - if (image.transferSource) { - // was used in a readback as transfer source, go back to presentable layout + if (image.lastUse != QVkSwapChain::ImageResources::ScImageUseRender) { VkImageMemoryBarrier presTrans; memset(&presTrans, 0, sizeof(presTrans)); presTrans.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - presTrans.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; presTrans.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - presTrans.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; presTrans.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; presTrans.image = image.image; presTrans.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; presTrans.subresourceRange.levelCount = presTrans.subresourceRange.layerCount = 1; - df->vkCmdPipelineBarrier(frame.cmdBuf, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - 0, 0, nullptr, 0, nullptr, - 1, &presTrans); - image.transferSource = false; + + if (image.lastUse == QVkSwapChain::ImageResources::ScImageUseNone) { + // was not used at all (no render pass), just transition from undefined to presentable + presTrans.srcAccessMask = 0; + presTrans.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + df->vkCmdPipelineBarrier(frame.cmdBuf, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + 0, 0, nullptr, 0, nullptr, + 1, &presTrans); + } else if (image.lastUse == QVkSwapChain::ImageResources::ScImageUseTransferSource) { + // was used in a readback as transfer source, go back to presentable layout + presTrans.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + presTrans.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + df->vkCmdPipelineBarrier(frame.cmdBuf, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + 0, 0, nullptr, 0, nullptr, + 1, &presTrans); + } + image.lastUse = QVkSwapChain::ImageResources::ScImageUseRender; } // record another timestamp, when enabled @@ -1669,9 +1671,6 @@ QRhi::FrameOpResult QRhiVulkan::endFrame(QRhiSwapChain *swapChain, QRhi::EndFram void QRhiVulkan::prepareNewFrame(QRhiCommandBuffer *cb) { - Q_ASSERT(!inFrame); - inFrame = true; - // Now is the time to do things for frame N-F, where N is the current one, // F is QVK_FRAMES_IN_FLIGHT, because only here it is guaranteed that that // frame has completed on the GPU (due to the fence wait in beginFrame). To @@ -1810,8 +1809,6 @@ QRhi::FrameOpResult QRhiVulkan::beginOffscreenFrame(QRhiCommandBuffer **cb) QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame() { - Q_ASSERT(inFrame); - inFrame = false; Q_ASSERT(ofr.active); ofr.active = false; @@ -1845,8 +1842,6 @@ QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame() QRhi::FrameOpResult QRhiVulkan::finish() { - Q_ASSERT(!inPass); - QVkSwapChain *swapChainD = nullptr; if (inFrame) { // There is either a swapchain or an offscreen frame on-going. @@ -1942,9 +1937,10 @@ void QRhiVulkan::activateTextureRenderTarget(QVkCommandBuffer *cbD, QVkTextureRe void QRhiVulkan::resourceUpdate(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); + QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::NoPass); - enqueueResourceUpdates(QRHI_RES(QVkCommandBuffer, cb), resourceUpdates); + enqueueResourceUpdates(cbD, resourceUpdates); } void QRhiVulkan::beginPass(QRhiCommandBuffer *cb, @@ -1953,8 +1949,8 @@ void QRhiVulkan::beginPass(QRhiCommandBuffer *cb, const QRhiDepthStencilClearValue &depthStencilClearValue, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inFrame && !inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::NoPass); if (resourceUpdates) enqueueResourceUpdates(cbD, resourceUpdates); @@ -1970,6 +1966,9 @@ void QRhiVulkan::beginPass(QRhiCommandBuffer *cb, case QRhiResource::RenderTarget: rtD = &QRHI_RES(QVkReferenceRenderTarget, rt)->d; rtD->rp->lastActiveFrameSlot = currentFrameSlot; + Q_ASSERT(currentSwapChain); + currentSwapChain->imageRes[currentSwapChain->currentImageIndex].lastUse = + QVkSwapChain::ImageResources::ScImageUseRender; break; case QRhiResource::TextureRenderTarget: { @@ -1983,6 +1982,7 @@ void QRhiVulkan::beginPass(QRhiCommandBuffer *cb, break; } + cbD->recordingPass = QVkCommandBuffer::RenderPass; cbD->currentTarget = rt; // No copy operations or image layout transitions allowed after this point @@ -2022,26 +2022,83 @@ void QRhiVulkan::beginPass(QRhiCommandBuffer *cb, cmd.args.beginRenderPass.clearValueIndex = cbD->pools.clearValue.count(); cbD->pools.clearValue.append(cvs.constData(), cvs.count()); cbD->commands.append(cmd); - - inPass = true; } void QRhiVulkan::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); QVkCommandBuffer::Command cmd; cmd.cmd = QVkCommandBuffer::Command::EndRenderPass; cbD->commands.append(cmd); - inPass = false; + cbD->recordingPass = QVkCommandBuffer::NoPass; cbD->currentTarget = nullptr; if (resourceUpdates) enqueueResourceUpdates(cbD, resourceUpdates); } +void QRhiVulkan::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::NoPass); + + if (resourceUpdates) + enqueueResourceUpdates(cbD, resourceUpdates); + + enqueueTransitionPassResources(cbD); + + cbD->recordingPass = QVkCommandBuffer::ComputePass; +} + +void QRhiVulkan::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +{ + QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::ComputePass); + + cbD->recordingPass = QVkCommandBuffer::NoPass; + + if (resourceUpdates) + enqueueResourceUpdates(cbD, resourceUpdates); +} + +void QRhiVulkan::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) +{ + QVkComputePipeline *psD = QRHI_RES(QVkComputePipeline, ps); + Q_ASSERT(psD->pipeline); + QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::ComputePass); + + if (cbD->currentComputePipeline != ps || cbD->currentPipelineGeneration != psD->generation) { + QVkCommandBuffer::Command cmd; + cmd.cmd = QVkCommandBuffer::Command::BindPipeline; + cmd.args.bindPipeline.bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE; + cmd.args.bindPipeline.pipeline = psD->pipeline; + cbD->commands.append(cmd); + + cbD->currentGraphicsPipeline = nullptr; + cbD->currentComputePipeline = ps; + cbD->currentPipelineGeneration = psD->generation; + } + + psD->lastActiveFrameSlot = currentFrameSlot; +} + +void QRhiVulkan::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) +{ + QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::ComputePass); + + QVkCommandBuffer::Command cmd; + cmd.cmd = QVkCommandBuffer::Command::Dispatch; + cmd.args.dispatch.x = x; + cmd.args.dispatch.y = y; + cmd.args.dispatch.z = z; + cbD->commands.append(cmd); +} + VkShaderModule QRhiVulkan::createShader(const QByteArray &spirv) { VkShaderModuleCreateInfo shaderInfo; @@ -2133,6 +2190,45 @@ void QRhiVulkan::updateShaderResourceBindings(QRhiShaderResourceBindings *srb, i writeInfo.pImageInfo = &imageInfos.last(); } break; + case QRhiShaderResourceBinding::ImageLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageLoadStore: + { + QVkTexture *texD = QRHI_RES(QVkTexture, b->u.simage.tex); + VkImageView view = texD->imageViewForLevel(b->u.simage.level); + if (view) { + writeInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + bd.simage.id = texD->m_id; + bd.simage.generation = texD->generation; + VkDescriptorImageInfo imageInfo; + imageInfo.sampler = VK_NULL_HANDLE; + imageInfo.imageView = view; + imageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + imageInfos.append(imageInfo); + writeInfo.pImageInfo = &imageInfos.last(); + } + } + break; + case QRhiShaderResourceBinding::BufferLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferLoadStore: + { + QVkBuffer *bufD = QRHI_RES(QVkBuffer, b->u.sbuf.buf); + writeInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bd.sbuf.id = bufD->m_id; + bd.sbuf.generation = bufD->generation; + VkDescriptorBufferInfo bufInfo; + bufInfo.buffer = bufD->m_type == QRhiBuffer::Dynamic ? bufD->buffers[frameSlot] : bufD->buffers[0]; + bufInfo.offset = b->u.ubuf.offset; + bufInfo.range = b->u.ubuf.maybeSize ? b->u.ubuf.maybeSize : bufD->m_size; + bufferInfos.append(bufInfo); + writeInfo.pBufferInfo = &bufferInfos.last(); + } + break; default: continue; } @@ -2158,7 +2254,7 @@ static inline bool accessIsWrite(VkAccessFlags access) void QRhiVulkan::trackedBufferBarrier(QVkCommandBuffer *cbD, QVkBuffer *bufD, int slot, VkAccessFlags access, VkPipelineStageFlags stage) { - Q_ASSERT(!inPass); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::NoPass); Q_ASSERT(access && stage); QVkBuffer::UsageState &s(bufD->usageState[slot]); if (!s.stage) { @@ -2198,7 +2294,7 @@ void QRhiVulkan::trackedBufferBarrier(QVkCommandBuffer *cbD, QVkBuffer *bufD, in void QRhiVulkan::trackedImageBarrier(QVkCommandBuffer *cbD, QVkTexture *texD, VkImageLayout layout, VkAccessFlags access, VkPipelineStageFlags stage) { - Q_ASSERT(!inPass); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::NoPass); Q_ASSERT(layout && access && stage); QVkTexture::UsageState &s(texD->usageState); if (s.access == access && s.stage == stage && s.layout == layout) { @@ -2245,7 +2341,7 @@ void QRhiVulkan::subresourceBarrier(QVkCommandBuffer *cbD, VkImage image, int startLayer, int layerCount, int startLevel, int levelCount) { - Q_ASSERT(!inPass); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::NoPass); VkImageMemoryBarrier barrier; memset(&barrier, 0, sizeof(barrier)); barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -2678,15 +2774,20 @@ void QRhiVulkan::enqueueResourceUpdates(QVkCommandBuffer *cbD, QRhiResourceUpdat cbD->commands.append(cmd); } else { // use the swapchain image - VkImage image = swapChainD->imageRes[swapChainD->currentImageIndex].image; - if (!swapChainD->imageRes[swapChainD->currentImageIndex].transferSource) { + QVkSwapChain::ImageResources &imageRes(swapChainD->imageRes[swapChainD->currentImageIndex]); + VkImage image = imageRes.image; + if (imageRes.lastUse != QVkSwapChain::ImageResources::ScImageUseTransferSource) { + if (imageRes.lastUse != QVkSwapChain::ImageResources::ScImageUseRender) { + qWarning("Attempted to read back undefined swapchain image content, " + "results are undefined. (do a render pass first)"); + } subresourceBarrier(cbD, image, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1, 0, 1); - swapChainD->imageRes[swapChainD->currentImageIndex].transferSource = true; + imageRes.lastUse = QVkSwapChain::ImageResources::ScImageUseTransferSource; } QVkCommandBuffer::Command cmd; @@ -2847,6 +2948,10 @@ static void qrhivk_releaseTexture(const QRhiVulkan::DeferredReleaseEntry &e, VkD vmaDestroyImage(toVmaAllocator(allocator), e.texture.image, toVmaAllocation(e.texture.allocation)); for (int i = 0; i < QVK_FRAMES_IN_FLIGHT; ++i) vmaDestroyBuffer(toVmaAllocator(allocator), e.texture.stagingBuffers[i], toVmaAllocation(e.texture.stagingAllocations[i])); + for (int i = 0; i < QRhi::MAX_LEVELS; ++i) { + if (e.texture.extraImageViews[i]) + df->vkDestroyImageView(dev, e.texture.extraImageViews[i], nullptr); + } } static void qrhivk_releaseSampler(const QRhiVulkan::DeferredReleaseEntry &e, VkDevice dev, QVulkanDeviceFunctions *df) @@ -3005,6 +3110,8 @@ void QRhiVulkan::enqueueTransitionPassResources(QVkCommandBuffer *cbD) void QRhiVulkan::recordCommandBuffer(QVkCommandBuffer *cbD) { + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::NoPass); + for (QVkCommandBuffer::Command &cmd : cbD->commands) { switch (cmd.cmd) { case QVkCommandBuffer::Command::CopyBuffer: @@ -3111,6 +3218,9 @@ void QRhiVulkan::recordCommandBuffer(QVkCommandBuffer *cbD) case QVkCommandBuffer::Command::TransitionPassResources: recordTransitionPassResources(cbD, cbD->passResTrackers[cmd.args.transitionResources.trackerIndex]); break; + case QVkCommandBuffer::Command::Dispatch: + df->vkCmdDispatch(cbD->cb, cmd.args.dispatch.x, cmd.args.dispatch.y, cmd.args.dispatch.z); + break; default: break; } @@ -3128,6 +3238,12 @@ static inline VkAccessFlags toVkAccess(QRhiPassResourceTracker::BufferAccess acc return VK_ACCESS_INDEX_READ_BIT; case QRhiPassResourceTracker::BufUniformRead: return VK_ACCESS_UNIFORM_READ_BIT; + case QRhiPassResourceTracker::BufStorageLoad: + return VK_ACCESS_SHADER_READ_BIT; + case QRhiPassResourceTracker::BufStorageStore: + return VK_ACCESS_SHADER_WRITE_BIT; + case QRhiPassResourceTracker::BufStorageLoadStore: + return VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; default: Q_UNREACHABLE(); break; @@ -3144,6 +3260,8 @@ static inline VkPipelineStageFlags toVkPipelineStage(QRhiPassResourceTracker::Bu return VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; case QRhiPassResourceTracker::BufFragmentStage: return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + case QRhiPassResourceTracker::BufComputeStage: + return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; default: Q_UNREACHABLE(); break; @@ -3168,6 +3286,12 @@ static inline VkImageLayout toVkLayout(QRhiPassResourceTracker::TextureAccess ac return VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; case QRhiPassResourceTracker::TexDepthOutput: return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + case QRhiPassResourceTracker::TexStorageLoad: + Q_FALLTHROUGH(); + case QRhiPassResourceTracker::TexStorageStore: + Q_FALLTHROUGH(); + case QRhiPassResourceTracker::TexStorageLoadStore: + return VK_IMAGE_LAYOUT_GENERAL; default: Q_UNREACHABLE(); break; @@ -3184,6 +3308,12 @@ static inline VkAccessFlags toVkAccess(QRhiPassResourceTracker::TextureAccess ac return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; case QRhiPassResourceTracker::TexDepthOutput: return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + case QRhiPassResourceTracker::TexStorageLoad: + return VK_ACCESS_SHADER_READ_BIT; + case QRhiPassResourceTracker::TexStorageStore: + return VK_ACCESS_SHADER_WRITE_BIT; + case QRhiPassResourceTracker::TexStorageLoadStore: + return VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; default: Q_UNREACHABLE(); break; @@ -3202,6 +3332,8 @@ static inline VkPipelineStageFlags toVkPipelineStage(QRhiPassResourceTracker::Te return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; case QRhiPassResourceTracker::TexDepthOutputStage: return VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + case QRhiPassResourceTracker::TexComputeStage: + return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; default: Q_UNREACHABLE(); break; @@ -3225,10 +3357,7 @@ void QRhiVulkan::trackedRegisterBuffer(QRhiPassResourceTracker *passResTracker, QRhiPassResourceTracker::BufferStage stage) { QVkBuffer::UsageState &u(bufD->usageState[slot]); - // The last arg will get ignored if this buffer was already used in the - // same pass; that's good because u is not the state at pass start anymore - // at that point. - passResTracker->registerBufferOnce(bufD, slot, access, stage, toPassTrackerUsageState(u)); + passResTracker->registerBuffer(bufD, slot, &access, &stage, toPassTrackerUsageState(u)); u.access = toVkAccess(access); u.stage = toVkPipelineStage(stage); } @@ -3239,10 +3368,7 @@ void QRhiVulkan::trackedRegisterTexture(QRhiPassResourceTracker *passResTracker, QRhiPassResourceTracker::TextureStage stage) { QVkTexture::UsageState &u(texD->usageState); - // The last arg will get ignored if this buffer was already used in the - // same pass; that's good because u is not the state at pass start anymore - // at that point. - passResTracker->registerTextureOnce(texD, access, stage, toPassTrackerUsageState(u)); + passResTracker->registerTexture(texD, &access, &stage, toPassTrackerUsageState(u)); u.layout = toVkLayout(access); u.access = toVkAccess(access); u.stage = toVkPipelineStage(stage); @@ -3417,6 +3543,8 @@ bool QRhiVulkan::isFeatureSupported(QRhi::Feature feature) const return true; case QRhi::ElementIndexUint: return true; + case QRhi::Compute: + return hasCompute; default: Q_UNREACHABLE(); return false; @@ -3492,6 +3620,11 @@ QRhiGraphicsPipeline *QRhiVulkan::createGraphicsPipeline() return new QVkGraphicsPipeline(this); } +QRhiComputePipeline *QRhiVulkan::createComputePipeline() +{ + return new QVkComputePipeline(this); +} + QRhiShaderResourceBindings *QRhiVulkan::createShaderResourceBindings() { return new QVkShaderResourceBindings(this); @@ -3499,19 +3632,20 @@ QRhiShaderResourceBindings *QRhiVulkan::createShaderResourceBindings() void QRhiVulkan::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps) { - Q_ASSERT(inPass); QVkGraphicsPipeline *psD = QRHI_RES(QVkGraphicsPipeline, ps); Q_ASSERT(psD->pipeline); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); - if (cbD->currentPipeline != ps || cbD->currentPipelineGeneration != psD->generation) { + if (cbD->currentGraphicsPipeline != ps || cbD->currentPipelineGeneration != psD->generation) { QVkCommandBuffer::Command cmd; cmd.cmd = QVkCommandBuffer::Command::BindPipeline; cmd.args.bindPipeline.bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; cmd.args.bindPipeline.pipeline = psD->pipeline; cbD->commands.append(cmd); - cbD->currentPipeline = ps; + cbD->currentGraphicsPipeline = ps; + cbD->currentComputePipeline = nullptr; cbD->currentPipelineGeneration = psD->generation; } @@ -3525,6 +3659,8 @@ QRhiPassResourceTracker::BufferStage toPassTrackerBufferStage(QRhiShaderResource return QRhiPassResourceTracker::BufVertexStage; if (stages.testFlag(QRhiShaderResourceBinding::FragmentStage)) return QRhiPassResourceTracker::BufFragmentStage; + if (stages.testFlag(QRhiShaderResourceBinding::ComputeStage)) + return QRhiPassResourceTracker::BufComputeStage; Q_UNREACHABLE(); return QRhiPassResourceTracker::BufVertexStage; @@ -3537,6 +3673,8 @@ QRhiPassResourceTracker::TextureStage toPassTrackerTextureStage(QRhiShaderResour return QRhiPassResourceTracker::TexVertexStage; if (stages.testFlag(QRhiShaderResourceBinding::FragmentStage)) return QRhiPassResourceTracker::TexFragmentStage; + if (stages.testFlag(QRhiShaderResourceBinding::ComputeStage)) + return QRhiPassResourceTracker::TexComputeStage; Q_UNREACHABLE(); return QRhiPassResourceTracker::TexVertexStage; @@ -3546,14 +3684,17 @@ void QRhiVulkan::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBin int dynamicOffsetCount, const QRhiCommandBuffer::DynamicOffset *dynamicOffsets) { - Q_ASSERT(inPass); - QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); - Q_ASSERT(cbD->currentPipeline); - QVkGraphicsPipeline *psD = QRHI_RES(QVkGraphicsPipeline, cbD->currentPipeline); + Q_ASSERT(cbD->recordingPass != QVkCommandBuffer::NoPass); + QVkGraphicsPipeline *gfxPsD = QRHI_RES(QVkGraphicsPipeline, cbD->currentGraphicsPipeline); + QVkComputePipeline *compPsD = QRHI_RES(QVkComputePipeline, cbD->currentComputePipeline); - if (!srb) - srb = psD->m_shaderResourceBindings; + if (!srb) { + if (gfxPsD) + srb = gfxPsD->m_shaderResourceBindings; + else + srb = compPsD->m_shaderResourceBindings; + } QVkShaderResourceBindings *srbD = QRHI_RES(QVkShaderResourceBindings, srb); bool hasSlottedResourceInSrb = false; @@ -3592,7 +3733,7 @@ void QRhiVulkan::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBin executeBufferHostWritesForCurrentFrame(bufD); bufD->lastActiveFrameSlot = currentFrameSlot; - trackedRegisterBuffer(&passResTracker, bufD, currentFrameSlot, + trackedRegisterBuffer(&passResTracker, bufD, bufD->m_type == QRhiBuffer::Dynamic ? currentFrameSlot : 0, QRhiPassResourceTracker::BufUniformRead, toPassTrackerBufferStage(b->stage)); @@ -3630,6 +3771,64 @@ void QRhiVulkan::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBin } } break; + case QRhiShaderResourceBinding::ImageLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageLoadStore: + { + QVkTexture *texD = QRHI_RES(QVkTexture, b->u.simage.tex); + Q_ASSERT(texD->m_flags.testFlag(QRhiTexture::UsedWithLoadStore)); + texD->lastActiveFrameSlot = currentFrameSlot; + QRhiPassResourceTracker::TextureAccess access; + if (b->type == QRhiShaderResourceBinding::ImageLoad) + access = QRhiPassResourceTracker::TexStorageLoad; + else if (b->type == QRhiShaderResourceBinding::ImageStore) + access = QRhiPassResourceTracker::TexStorageStore; + else + access = QRhiPassResourceTracker::TexStorageLoadStore; + trackedRegisterTexture(&passResTracker, texD, + access, + toPassTrackerTextureStage(b->stage)); + + if (texD->generation != bd.simage.generation || texD->m_id != bd.simage.id) { + rewriteDescSet = true; + bd.simage.id = texD->m_id; + bd.simage.generation = texD->generation; + } + } + break; + case QRhiShaderResourceBinding::BufferLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferLoadStore: + { + QVkBuffer *bufD = QRHI_RES(QVkBuffer, b->u.sbuf.buf); + Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)); + + if (bufD->m_type == QRhiBuffer::Dynamic) + executeBufferHostWritesForCurrentFrame(bufD); + + bufD->lastActiveFrameSlot = currentFrameSlot; + QRhiPassResourceTracker::BufferAccess access; + if (b->type == QRhiShaderResourceBinding::BufferLoad) + access = QRhiPassResourceTracker::BufStorageLoad; + else if (b->type == QRhiShaderResourceBinding::BufferStore) + access = QRhiPassResourceTracker::BufStorageStore; + else + access = QRhiPassResourceTracker::BufStorageLoadStore; + trackedRegisterBuffer(&passResTracker, bufD, bufD->m_type == QRhiBuffer::Dynamic ? currentFrameSlot : 0, + access, + toPassTrackerBufferStage(b->stage)); + + if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) { + rewriteDescSet = true; + bd.sbuf.id = bufD->m_id; + bd.sbuf.generation = bufD->generation; + } + } + break; default: Q_UNREACHABLE(); break; @@ -3644,7 +3843,9 @@ void QRhiVulkan::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBin // also, dynamic offsets always need a bind. const bool forceRebind = (hasSlottedResourceInSrb && cbD->currentDescSetSlot != descSetIdx) || hasDynamicOffsetInSrb; - if (forceRebind || rewriteDescSet || cbD->currentSrb != srb || cbD->currentSrbGeneration != srbD->generation) { + const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb); + + if (forceRebind || rewriteDescSet || srbChanged || cbD->currentSrbGeneration != srbD->generation) { QVarLengthArray<uint32_t, 4> dynOfs; if (hasDynamicOffsetInSrb) { // Filling out dynOfs based on the sorted bindings is important @@ -3669,15 +3870,22 @@ void QRhiVulkan::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBin QVkCommandBuffer::Command cmd; cmd.cmd = QVkCommandBuffer::Command::BindDescriptorSet; - cmd.args.bindDescriptorSet.bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - cmd.args.bindDescriptorSet.pipelineLayout = psD->layout; + cmd.args.bindDescriptorSet.bindPoint = gfxPsD ? VK_PIPELINE_BIND_POINT_GRAPHICS + : VK_PIPELINE_BIND_POINT_COMPUTE; + cmd.args.bindDescriptorSet.pipelineLayout = gfxPsD ? gfxPsD->layout : compPsD->layout; cmd.args.bindDescriptorSet.descSet = srbD->descSets[descSetIdx]; cmd.args.bindDescriptorSet.dynamicOffsetCount = dynOfs.count(); cmd.args.bindDescriptorSet.dynamicOffsetIndex = cbD->pools.dynamicOffset.count(); cbD->pools.dynamicOffset.append(dynOfs.constData(), dynOfs.count()); cbD->commands.append(cmd); - cbD->currentSrb = srb; + if (gfxPsD) { + cbD->currentGraphicsSrb = srb; + cbD->currentComputeSrb = nullptr; + } else { + cbD->currentGraphicsSrb = nullptr; + cbD->currentComputeSrb = srb; + } cbD->currentSrbGeneration = srbD->generation; cbD->currentDescSetSlot = descSetIdx; } @@ -3689,8 +3897,8 @@ void QRhiVulkan::setVertexInput(QRhiCommandBuffer *cb, int startBinding, int bindingCount, const QRhiCommandBuffer::VertexInput *bindings, QRhiBuffer *indexBuf, quint32 indexOffset, QRhiCommandBuffer::IndexFormat indexFormat) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); QRhiPassResourceTracker &passResTracker(cbD->passResTrackers[cbD->currentPassResTrackerIndex]); bool needsBindVBuf = false; @@ -3772,9 +3980,8 @@ void QRhiVulkan::setVertexInput(QRhiCommandBuffer *cb, void QRhiVulkan::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); - Q_ASSERT(cbD->currentPipeline && cbD->currentTarget); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); const QSize outputSize = cbD->currentTarget->pixelSize(); // x,y is top-left in VkViewport but bottom-left in QRhiViewport @@ -3793,7 +4000,7 @@ void QRhiVulkan::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport vp->maxDepth = viewport.maxDepth(); cbD->commands.append(cmd); - if (!QRHI_RES(QVkGraphicsPipeline, cbD->currentPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { + if (!QRHI_RES(QVkGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { cmd.cmd = QVkCommandBuffer::Command::SetScissor; VkRect2D *s = &cmd.args.setScissor.scissor; s->offset.x = x; @@ -3806,10 +4013,9 @@ void QRhiVulkan::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport void QRhiVulkan::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); - Q_ASSERT(cbD->currentPipeline && cbD->currentTarget); - Q_ASSERT(QRHI_RES(QVkGraphicsPipeline, cbD->currentPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); + Q_ASSERT(QRHI_RES(QVkGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); const QSize outputSize = cbD->currentTarget->pixelSize(); // x,y is top-left in VkRect2D but bottom-left in QRhiScissor @@ -3829,8 +4035,9 @@ void QRhiVulkan::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) void QRhiVulkan::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); + QVkCommandBuffer::Command cmd; cmd.cmd = QVkCommandBuffer::Command::SetBlendConstants; cmd.args.setBlendConstants.c[0] = c.redF(); @@ -3842,8 +4049,9 @@ void QRhiVulkan::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) void QRhiVulkan::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); + QVkCommandBuffer::Command cmd; cmd.cmd = QVkCommandBuffer::Command::SetStencilRef; cmd.args.setStencilRef.ref = refValue; @@ -3853,8 +4061,9 @@ void QRhiVulkan::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) void QRhiVulkan::draw(QRhiCommandBuffer *cb, quint32 vertexCount, quint32 instanceCount, quint32 firstVertex, quint32 firstInstance) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); + QVkCommandBuffer::Command cmd; cmd.cmd = QVkCommandBuffer::Command::Draw; cmd.args.draw.vertexCount = vertexCount; @@ -3867,8 +4076,9 @@ void QRhiVulkan::draw(QRhiCommandBuffer *cb, quint32 vertexCount, void QRhiVulkan::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, quint32 instanceCount, quint32 firstIndex, qint32 vertexOffset, quint32 firstInstance) { - Q_ASSERT(inPass); QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::RenderPass); + QVkCommandBuffer::Command cmd; cmd.cmd = QVkCommandBuffer::Command::DrawIndexed; cmd.args.drawIndexed.indexCount = indexCount; @@ -3969,6 +4179,8 @@ static inline VkBufferUsageFlagBits toVkBufferUsage(QRhiBuffer::UsageFlags usage u |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; if (usage.testFlag(QRhiBuffer::UniformBuffer)) u |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + if (usage.testFlag(QRhiBuffer::StorageBuffer)) + u |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; return VkBufferUsageFlagBits(u); } @@ -4019,13 +4231,15 @@ static inline VkSamplerAddressMode toVkAddressMode(QRhiSampler::AddressMode m) } } -static inline VkShaderStageFlagBits toVkShaderStage(QRhiGraphicsShaderStage::Type type) +static inline VkShaderStageFlagBits toVkShaderStage(QRhiShaderStage::Type type) { switch (type) { - case QRhiGraphicsShaderStage::Vertex: + case QRhiShaderStage::Vertex: return VK_SHADER_STAGE_VERTEX_BIT; - case QRhiGraphicsShaderStage::Fragment: + case QRhiShaderStage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; + case QRhiShaderStage::Compute: + return VK_SHADER_STAGE_COMPUTE_BIT; default: Q_UNREACHABLE(); return VK_SHADER_STAGE_VERTEX_BIT; @@ -4246,8 +4460,24 @@ static inline VkDescriptorType toVkDescriptorType(const QRhiShaderResourceBindin case QRhiShaderResourceBinding::UniformBuffer: return b->u.ubuf.hasDynamicOffset ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case QRhiShaderResourceBinding::SampledTexture: return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + + case QRhiShaderResourceBinding::ImageLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::ImageLoadStore: + return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + + case QRhiShaderResourceBinding::BufferLoad: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferStore: + Q_FALLTHROUGH(); + case QRhiShaderResourceBinding::BufferLoadStore: + return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + default: Q_UNREACHABLE(); return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; @@ -4261,6 +4491,8 @@ static inline VkShaderStageFlags toVkShaderStageFlags(QRhiShaderResourceBinding: s |= VK_SHADER_STAGE_VERTEX_BIT; if (stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) s |= VK_SHADER_STAGE_FRAGMENT_BIT; + if (stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) + s |= VK_SHADER_STAGE_COMPUTE_BIT; return VkShaderStageFlags(s); } @@ -4339,6 +4571,11 @@ bool QVkBuffer::build() if (buffers[0]) release(); + if (m_usage.testFlag(QRhiBuffer::StorageBuffer) && m_type == Dynamic) { + qWarning("StorageBuffer cannot be combined with Dynamic"); + return false; + } + const int nonZeroSize = m_size <= 0 ? 256 : m_size; VkBufferCreateInfo bufferInfo; @@ -4514,6 +4751,8 @@ QVkTexture::QVkTexture(QRhiImplementation *rhi, Format format, const QSize &pixe stagingBuffers[i] = VK_NULL_HANDLE; stagingAllocations[i] = nullptr; } + for (int i = 0; i < QRhi::MAX_LEVELS; ++i) + perLevelImageViews[i] = VK_NULL_HANDLE; } QVkTexture::~QVkTexture() @@ -4542,6 +4781,11 @@ void QVkTexture::release() stagingAllocations[i] = nullptr; } + for (int i = 0; i < QRhi::MAX_LEVELS; ++i) { + e.texture.extraImageViews[i] = perLevelImageViews[i]; + perLevelImageViews[i] = VK_NULL_HANDLE; + } + image = VK_NULL_HANDLE; imageView = VK_NULL_HANDLE; imageAlloc = nullptr; @@ -4674,6 +4918,8 @@ bool QVkTexture::build() imageInfo.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (m_flags.testFlag(QRhiTexture::UsedWithGenerateMips)) imageInfo.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (m_flags.testFlag(QRhiTexture::UsedWithLoadStore)) + imageInfo.usage |= VK_IMAGE_USAGE_STORAGE_BIT; VmaAllocationCreateInfo allocInfo; memset(&allocInfo, 0, sizeof(allocInfo)); @@ -4732,6 +4978,43 @@ const QRhiNativeHandles *QVkTexture::nativeHandles() return &nativeHandlesStruct; } +VkImageView QVkTexture::imageViewForLevel(int level) +{ + Q_ASSERT(level >= 0 && level < int(mipLevelCount)); + if (perLevelImageViews[level] != VK_NULL_HANDLE) + return perLevelImageViews[level]; + + const bool isDepth = isDepthTextureFormat(m_format); + const bool isCube = m_flags.testFlag(CubeMap); + + VkImageViewCreateInfo viewInfo; + memset(&viewInfo, 0, sizeof(viewInfo)); + viewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + viewInfo.image = image; + viewInfo.viewType = isCube ? VK_IMAGE_VIEW_TYPE_CUBE : VK_IMAGE_VIEW_TYPE_2D; + viewInfo.format = vkformat; + viewInfo.components.r = VK_COMPONENT_SWIZZLE_R; + viewInfo.components.g = VK_COMPONENT_SWIZZLE_G; + viewInfo.components.b = VK_COMPONENT_SWIZZLE_B; + viewInfo.components.a = VK_COMPONENT_SWIZZLE_A; + viewInfo.subresourceRange.aspectMask = isDepth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + viewInfo.subresourceRange.baseMipLevel = level; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = isCube ? 6 : 1; + + VkImageView v = VK_NULL_HANDLE; + QRHI_RES_RHI(QRhiVulkan); + VkResult err = rhiD->df->vkCreateImageView(rhiD->dev, &viewInfo, nullptr, &v); + if (err != VK_SUCCESS) { + qWarning("Failed to create image view: %d", err); + return VK_NULL_HANDLE; + } + + perLevelImageViews[level] = v; + return v; +} + QVkSampler::QVkSampler(QRhiImplementation *rhi, Filter magFilter, Filter minFilter, Filter mipmapMode, AddressMode u, AddressMode v) : QRhiSampler(rhi, magFilter, minFilter, mipmapMode, u, v) @@ -5222,7 +5505,7 @@ bool QVkGraphicsPipeline::build() QVarLengthArray<VkShaderModule, 4> shaders; QVarLengthArray<VkPipelineShaderStageCreateInfo, 4> shaderStageCreateInfos; - for (const QRhiGraphicsShaderStage &shaderStage : m_shaderStages) { + for (const QRhiShaderStage &shaderStage : m_shaderStages) { const QShader bakedShader = shaderStage.shader(); const QShaderCode spirv = bakedShader.shader({ QShader::SpirvShader, 100, shaderStage.shaderVariant() }); if (spirv.shader().isEmpty()) { @@ -5404,6 +5687,100 @@ bool QVkGraphicsPipeline::build() return true; } +QVkComputePipeline::QVkComputePipeline(QRhiImplementation *rhi) + : QRhiComputePipeline(rhi) +{ +} + +QVkComputePipeline::~QVkComputePipeline() +{ + release(); +} + +void QVkComputePipeline::release() +{ + if (!pipeline && !layout) + return; + + QRhiVulkan::DeferredReleaseEntry e; + e.type = QRhiVulkan::DeferredReleaseEntry::Pipeline; + e.lastActiveFrameSlot = lastActiveFrameSlot; + + e.pipelineState.pipeline = pipeline; + e.pipelineState.layout = layout; + + pipeline = VK_NULL_HANDLE; + layout = VK_NULL_HANDLE; + + QRHI_RES_RHI(QRhiVulkan); + rhiD->releaseQueue.append(e); + + rhiD->unregisterResource(this); +} + +bool QVkComputePipeline::build() +{ + if (pipeline) + release(); + + QRHI_RES_RHI(QRhiVulkan); + if (!rhiD->ensurePipelineCache()) + return false; + + VkPipelineLayoutCreateInfo pipelineLayoutInfo; + memset(&pipelineLayoutInfo, 0, sizeof(pipelineLayoutInfo)); + pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipelineLayoutInfo.setLayoutCount = 1; + QVkShaderResourceBindings *srbD = QRHI_RES(QVkShaderResourceBindings, m_shaderResourceBindings); + Q_ASSERT(m_shaderResourceBindings && srbD->layout); + pipelineLayoutInfo.pSetLayouts = &srbD->layout; + VkResult err = rhiD->df->vkCreatePipelineLayout(rhiD->dev, &pipelineLayoutInfo, nullptr, &layout); + if (err != VK_SUCCESS) { + qWarning("Failed to create pipeline layout: %d", err); + return false; + } + + VkComputePipelineCreateInfo pipelineInfo; + memset(&pipelineInfo, 0, sizeof(pipelineInfo)); + pipelineInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipelineInfo.layout = layout; + + if (m_shaderStage.type() != QRhiShaderStage::Compute) { + qWarning("Compute pipeline requires a compute shader stage"); + return false; + } + const QShader bakedShader = m_shaderStage.shader(); + const QShaderCode spirv = bakedShader.shader({ QShader::SpirvShader, 100, m_shaderStage.shaderVariant() }); + if (spirv.shader().isEmpty()) { + qWarning() << "No SPIR-V 1.0 shader code found in baked shader" << bakedShader; + return false; + } + if (bakedShader.stage() != QShader::ComputeStage) { + qWarning() << bakedShader << "is not a compute shader"; + return false; + } + VkShaderModule shader = rhiD->createShader(spirv.shader()); + VkPipelineShaderStageCreateInfo shaderInfo; + memset(&shaderInfo, 0, sizeof(shaderInfo)); + shaderInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shaderInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; + shaderInfo.module = shader; + shaderInfo.pName = spirv.entryPoint().constData(); + pipelineInfo.stage = shaderInfo; + + err = rhiD->df->vkCreateComputePipelines(rhiD->dev, rhiD->pipelineCache, 1, &pipelineInfo, nullptr, &pipeline); + rhiD->df->vkDestroyShaderModule(rhiD->dev, shader, nullptr); + if (err != VK_SUCCESS) { + qWarning("Failed to create graphics pipeline: %d", err); + return false; + } + + lastActiveFrameSlot = -1; + generation += 1; + rhiD->registerResource(this); + return true; +} + QVkCommandBuffer::QVkCommandBuffer(QRhiImplementation *rhi) : QRhiCommandBuffer(rhi) { diff --git a/src/gui/rhi/qrhivulkan_p_p.h b/src/gui/rhi/qrhivulkan_p_p.h index afb0cc1d5a..cec9016603 100644 --- a/src/gui/rhi/qrhivulkan_p_p.h +++ b/src/gui/rhi/qrhivulkan_p_p.h @@ -61,6 +61,8 @@ static const int QVK_FRAMES_IN_FLIGHT = 2; static const int QVK_DESC_SETS_PER_POOL = 128; static const int QVK_UNIFORM_BUFFERS_PER_POOL = 256; static const int QVK_COMBINED_IMAGE_SAMPLERS_PER_POOL = 256; +static const int QVK_STORAGE_BUFFERS_PER_POOL = 128; +static const int QVK_STORAGE_IMAGES_PER_POOL = 128; static const int QVK_MAX_ACTIVE_TIMESTAMP_PAIRS = 16; @@ -123,12 +125,14 @@ struct QVkTexture : public QRhiTexture bool prepareBuild(QSize *adjustedSize = nullptr); bool finishBuild(); + VkImageView imageViewForLevel(int level); VkImage image = VK_NULL_HANDLE; VkImageView imageView = VK_NULL_HANDLE; QVkAlloc imageAlloc = nullptr; VkBuffer stagingBuffers[QVK_FRAMES_IN_FLIGHT]; QVkAlloc stagingAllocations[QVK_FRAMES_IN_FLIGHT]; + VkImageView perLevelImageViews[QRhi::MAX_LEVELS]; bool owns = true; QRhiVulkanTextureNativeHandles nativeHandlesStruct; struct UsageState { @@ -246,10 +250,20 @@ struct QVkShaderResourceBindings : public QRhiShaderResourceBindings quint64 samplerId; uint samplerGeneration; }; + struct BoundStorageImageData { + quint64 id; + uint generation; + }; + struct BoundStorageBufferData { + quint64 id; + uint generation; + }; struct BoundResourceData { union { BoundUniformBufferData ubuf; BoundSampledTextureData stex; + BoundStorageImageData simage; + BoundStorageBufferData sbuf; }; }; QVector<BoundResourceData> boundResourceData[QVK_FRAMES_IN_FLIGHT]; @@ -273,6 +287,20 @@ struct QVkGraphicsPipeline : public QRhiGraphicsPipeline friend class QRhiVulkan; }; +struct QVkComputePipeline : public QRhiComputePipeline +{ + QVkComputePipeline(QRhiImplementation *rhi); + ~QVkComputePipeline(); + void release() override; + bool build() override; + + VkPipelineLayout layout = VK_NULL_HANDLE; + VkPipeline pipeline = VK_NULL_HANDLE; + int lastActiveFrameSlot = -1; + uint generation = 0; + friend class QRhiVulkan; +}; + struct QVkCommandBuffer : public QRhiCommandBuffer { QVkCommandBuffer(QRhiImplementation *rhi); @@ -287,16 +315,25 @@ struct QVkCommandBuffer : public QRhiCommandBuffer return &nativeHandlesStruct; } + enum PassType { + NoPass, + RenderPass, + ComputePass + }; + void resetState() { resetCommands(); + recordingPass = NoPass; currentTarget = nullptr; resetCachedState(); } void resetCachedState() { - currentPipeline = nullptr; + currentGraphicsPipeline = nullptr; + currentComputePipeline = nullptr; currentPipelineGeneration = 0; - currentSrb = nullptr; + currentGraphicsSrb = nullptr; + currentComputeSrb = nullptr; currentSrbGeneration = 0; currentDescSetSlot = -1; currentIndexBuffer = VK_NULL_HANDLE; @@ -306,10 +343,13 @@ struct QVkCommandBuffer : public QRhiCommandBuffer memset(currentVertexOffsets, 0, sizeof(currentVertexOffsets)); } + PassType recordingPass; QRhiRenderTarget *currentTarget; - QRhiGraphicsPipeline *currentPipeline; + QRhiGraphicsPipeline *currentGraphicsPipeline; + QRhiComputePipeline *currentComputePipeline; uint currentPipelineGeneration; - QRhiShaderResourceBindings *currentSrb; + QRhiShaderResourceBindings *currentGraphicsSrb; + QRhiShaderResourceBindings *currentComputeSrb; uint currentSrbGeneration; int currentDescSetSlot; VkBuffer currentIndexBuffer; @@ -343,7 +383,8 @@ struct QVkCommandBuffer : public QRhiCommandBuffer DebugMarkerBegin, DebugMarkerEnd, DebugMarkerInsert, - TransitionPassResources + TransitionPassResources, + Dispatch }; Cmd cmd; @@ -456,6 +497,9 @@ struct QVkCommandBuffer : public QRhiCommandBuffer struct { int trackerIndex; } transitionResources; + struct { + int x, y, z; + } dispatch; } args; }; QVector<Command> commands; @@ -532,7 +576,12 @@ struct QVkSwapChain : public QRhiSwapChain VkFramebuffer fb = VK_NULL_HANDLE; VkImage msaaImage = VK_NULL_HANDLE; VkImageView msaaImageView = VK_NULL_HANDLE; - bool transferSource = false; + enum LastUse { + ScImageUseNone, + ScImageUseRender, + ScImageUseTransferSource + }; + LastUse lastUse = ScImageUseNone; } imageRes[MAX_BUFFER_COUNT]; struct FrameResources { @@ -565,6 +614,7 @@ public: void destroy() override; QRhiGraphicsPipeline *createGraphicsPipeline() override; + QRhiComputePipeline *createComputePipeline() override; QRhiShaderResourceBindings *createShaderResourceBindings() override; QRhiBuffer *createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, @@ -629,6 +679,11 @@ public: void debugMarkEnd(QRhiCommandBuffer *cb) override; void debugMarkMsg(QRhiCommandBuffer *cb, const QByteArray &msg) override; + void beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) override; + void setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *ps) override; + void dispatch(QRhiCommandBuffer *cb, int x, int y, int z) override; + const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; @@ -722,6 +777,7 @@ public: VkCommandPool cmdPool = VK_NULL_HANDLE; int gfxQueueFamilyIdx = -1; VkQueue gfxQueue = VK_NULL_HANDLE; + bool hasCompute = false; quint32 timestampValidBits = 0; bool importedAllocator = false; QVkAllocator allocator = nullptr; @@ -765,8 +821,6 @@ public: VkFormat optimalDsFormat = VK_FORMAT_UNDEFINED; QMatrix4x4 clipCorrectMatrix; - bool inFrame = false; - bool inPass = false; QVkSwapChain *currentSwapChain = nullptr; QSet<QVkSwapChain *> swapchains; QRhiVulkanNativeHandles nativeHandlesStruct; @@ -830,6 +884,7 @@ public: QVkAlloc allocation; VkBuffer stagingBuffers[QVK_FRAMES_IN_FLIGHT]; QVkAlloc stagingAllocations[QVK_FRAMES_IN_FLIGHT]; + VkImageView extraImageViews[QRhi::MAX_LEVELS]; } texture; struct { VkSampler sampler; diff --git a/src/gui/rhi/qshaderdescription.cpp b/src/gui/rhi/qshaderdescription.cpp index c2dbbb38fa..77aceaddba 100644 --- a/src/gui/rhi/qshaderdescription.cpp +++ b/src/gui/rhi/qshaderdescription.cpp @@ -522,6 +522,21 @@ QVector<QShaderDescription::InOutVariable> QShaderDescription::storageImages() c return d->storageImages; } +/*! + Returns the local size of a compute shader. + + For example, for a compute shader with the following declaration the + function returns { 256, 16, 1}. + + \badcode + layout(local_size_x = 256, local_size_y = 16, local_size_z = 1) in; + \endcode + */ +std::array<uint, 3> QShaderDescription::computeShaderLocalSize() const +{ + return d->localSize; +} + static struct TypeTab { QString k; QShaderDescription::VariableType v; @@ -799,6 +814,7 @@ static const QString pushConstantBlocksKey = QLatin1String("pushConstantBlocks") static const QString storageBlocksKey = QLatin1String("storageBlocks"); static const QString combinedImageSamplersKey = QLatin1String("combinedImageSamplers"); static const QString storageImagesKey = QLatin1String("storageImages"); +static const QString localSizeKey = QLatin1String("localSize"); static void addDeco(QJsonObject *obj, const QShaderDescription::InOutVariable &v) { @@ -941,6 +957,11 @@ QJsonDocument QShaderDescriptionPrivate::makeDoc() if (!jstorageImages.isEmpty()) root[storageImagesKey] = jstorageImages; + QJsonArray jlocalSize; + for (int i = 0; i < 3; ++i) + jlocalSize.append(QJsonValue(int(localSize[i]))); + root[localSizeKey] = jlocalSize; + return QJsonDocument(root); } @@ -1082,6 +1103,14 @@ void QShaderDescriptionPrivate::loadDoc(const QJsonDocument &doc) for (int i = 0; i < images.count(); ++i) storageImages.append(inOutVar(images[i].toObject())); } + + if (root.contains(localSizeKey)) { + QJsonArray localSizeArr = root[localSizeKey].toArray(); + if (localSizeArr.count() == 3) { + for (int i = 0; i < 3; ++i) + localSize[i] = localSizeArr[i].toInt(); + } + } } QT_END_NAMESPACE diff --git a/src/gui/rhi/qshaderdescription_p.h b/src/gui/rhi/qshaderdescription_p.h index 43d4256a63..5a63b998cd 100644 --- a/src/gui/rhi/qshaderdescription_p.h +++ b/src/gui/rhi/qshaderdescription_p.h @@ -51,6 +51,7 @@ #include <QtGui/qtguiglobal.h> #include <QtCore/QString> #include <QtCore/QVector> +#include <array> QT_BEGIN_NAMESPACE @@ -254,6 +255,8 @@ public: QVector<InOutVariable> combinedImageSamplers() const; QVector<InOutVariable> storageImages() const; + std::array<uint, 3> computeShaderLocalSize() const; + private: QShaderDescriptionPrivate *d; friend struct QShaderDescriptionPrivate; diff --git a/src/gui/rhi/qshaderdescription_p_p.h b/src/gui/rhi/qshaderdescription_p_p.h index dbe68d1060..1caee24984 100644 --- a/src/gui/rhi/qshaderdescription_p_p.h +++ b/src/gui/rhi/qshaderdescription_p_p.h @@ -60,6 +60,7 @@ struct Q_GUI_EXPORT QShaderDescriptionPrivate QShaderDescriptionPrivate() : ref(1) { + localSize[0] = localSize[1] = localSize[2] = 0; } QShaderDescriptionPrivate(const QShaderDescriptionPrivate *other) @@ -70,7 +71,8 @@ struct Q_GUI_EXPORT QShaderDescriptionPrivate pushConstantBlocks(other->pushConstantBlocks), storageBlocks(other->storageBlocks), combinedImageSamplers(other->combinedImageSamplers), - storageImages(other->storageImages) + storageImages(other->storageImages), + localSize(other->localSize) { } @@ -88,6 +90,7 @@ struct Q_GUI_EXPORT QShaderDescriptionPrivate QVector<QShaderDescription::StorageBlock> storageBlocks; QVector<QShaderDescription::InOutVariable> combinedImageSamplers; QVector<QShaderDescription::InOutVariable> storageImages; + std::array<uint, 3> localSize; }; QT_END_NAMESPACE |