From 9a0ab41f28042c7f27208337e419435d16f00006 Mon Sep 17 00:00:00 2001 From: Laszlo Agocs Date: Wed, 8 Jan 2020 12:55:57 +0100 Subject: rhi: gl: vulkan: Generate barriers between dispatches in a compute pass The new float16texture_with_compute manual test demonstrates a case that was not handled correctly before: multiple dispatch() calls in a row storing to and then loading from the same subresource(s). Without the appropriate barriers subtle data corruption issues appear. For Vulkan this also adds better batching for image/buffer barriers when using deferred recording. Also, for OpenGL this fixes the case of updating a buffer or rendering into a texture and then using it for load/store in a compute pass (previously this also lacked an appropriate glMemoryBarrier). Task-number: QTBUG-81217 Change-Id: I7970c445564473f9452662f4b1a20618cb8627a3 Reviewed-by: Paul Olav Tvete Reviewed-by: Johan Helsing --- src/gui/rhi/qrhigles2.cpp | 124 +++++++++++++++++++++++++++++++--- src/gui/rhi/qrhigles2_p_p.h | 11 +++ src/gui/rhi/qrhivulkan.cpp | 157 +++++++++++++++++++++++++++++++++++++++++-- src/gui/rhi/qrhivulkan_p_p.h | 17 ++++- 4 files changed, 291 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp index 81a1daa16b..ec5e531e14 100644 --- a/src/gui/rhi/qrhigles2.cpp +++ b/src/gui/rhi/qrhigles2.cpp @@ -269,6 +269,14 @@ QT_BEGIN_NAMESPACE #define GL_ALL_BARRIER_BITS 0xFFFFFFFF #endif +#ifndef GL_SHADER_IMAGE_ACCESS_BARRIER_BIT +#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 +#endif + +#ifndef GL_SHADER_STORAGE_BARRIER_BIT +#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 +#endif + #ifndef GL_VERTEX_PROGRAM_POINT_SIZE #define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642 #endif @@ -1307,6 +1315,21 @@ QRhi::FrameOpResult QRhiGles2::finish() return QRhi::FrameOpSuccess; } +static bool bufferAccessIsWrite(QGles2Buffer::Access access) +{ + return access == QGles2Buffer::AccessStorageWrite + || access == QGles2Buffer::AccessStorageReadWrite + || access == QGles2Buffer::AccessUpdate; +} + +static bool textureAccessIsWrite(QGles2Texture::Access access) +{ + return access == QGles2Texture::AccessStorageWrite + || access == QGles2Texture::AccessStorageReadWrite + || access == QGles2Texture::AccessUpdate + || access == QGles2Texture::AccessFramebuffer; +} + void QRhiGles2::trackedBufferBarrier(QGles2CommandBuffer *cbD, QGles2Buffer *bufD, QGles2Buffer::Access access) { Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); // this is for resource updates only @@ -1314,7 +1337,7 @@ void QRhiGles2::trackedBufferBarrier(QGles2CommandBuffer *cbD, QGles2Buffer *buf if (access == prevAccess) return; - if (prevAccess == QGles2Buffer::AccessStorageWrite || prevAccess == QGles2Buffer::AccessStorageReadWrite) { + if (bufferAccessIsWrite(prevAccess)) { // Generating the minimal barrier set is way too complicated to do // correctly (prevAccess is overwritten so we won't have proper // tracking across multiple passes) so setting all barrier bits will do @@ -1335,7 +1358,7 @@ void QRhiGles2::trackedImageBarrier(QGles2CommandBuffer *cbD, QGles2Texture *tex if (access == prevAccess) return; - if (prevAccess == QGles2Texture::AccessStorageWrite || prevAccess == QGles2Texture::AccessStorageReadWrite) { + if (textureAccessIsWrite(prevAccess)) { QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::Barrier; cmd.args.barrier.barriers = GL_ALL_BARRIER_BITS; @@ -2273,19 +2296,13 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) // subsequent pass. for (auto it = tracker.cbeginBuffers(), itEnd = tracker.cendBuffers(); it != itEnd; ++it) { QGles2Buffer::Access accessBeforePass = QGles2Buffer::Access(it->stateAtPassBegin.access); - if (accessBeforePass == QGles2Buffer::AccessStorageWrite - || accessBeforePass == QGles2Buffer::AccessStorageReadWrite) - { + if (bufferAccessIsWrite(accessBeforePass)) barriers |= GL_ALL_BARRIER_BITS; - } } for (auto it = tracker.cbeginTextures(), itEnd = tracker.cendTextures(); it != itEnd; ++it) { QGles2Texture::Access accessBeforePass = QGles2Texture::Access(it->stateAtPassBegin.access); - if (accessBeforePass == QGles2Texture::AccessStorageWrite - || accessBeforePass == QGles2Texture::AccessStorageReadWrite) - { + if (textureAccessIsWrite(accessBeforePass)) barriers |= GL_ALL_BARRIER_BITS; - } } if (barriers && caps.compute) f->glMemoryBarrier(barriers); @@ -2792,6 +2809,8 @@ void QRhiGles2::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch cbD->recordingPass = QGles2CommandBuffer::ComputePass; cbD->resetCachedState(); + + cbD->computePassState.reset(); } void QRhiGles2::endComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) @@ -2824,11 +2843,96 @@ void QRhiGles2::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *p } } +template +inline void qrhigl_accumulateComputeResource(T *writtenResources, QRhiResource *resource, + QRhiShaderResourceBinding::Type bindingType, + int loadTypeVal, int storeTypeVal, int loadStoreTypeVal) +{ + int access = 0; + if (bindingType == loadTypeVal) { + access = QGles2CommandBuffer::ComputePassState::Read; + } else { + access = QGles2CommandBuffer::ComputePassState::Write; + if (bindingType == loadStoreTypeVal) + access |= QGles2CommandBuffer::ComputePassState::Read; + } + auto it = writtenResources->find(resource); + if (it != writtenResources->end()) + it->first |= access; + else if (bindingType == storeTypeVal || bindingType == loadStoreTypeVal) + writtenResources->insert(resource, { access, true }); +} + void QRhiGles2::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) { QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::ComputePass); + if (cbD->currentComputeSrb) { + GLbitfield barriers = 0; + + // The key in the writtenResources map indicates that the resource was + // written in a previous dispatch, whereas the value accumulates the + // access mask in the current one. + for (auto &accessAndIsNewFlag : cbD->computePassState.writtenResources) + accessAndIsNewFlag = { 0, false }; + + QGles2ShaderResourceBindings *srbD = QRHI_RES(QGles2ShaderResourceBindings, cbD->currentComputeSrb); + const int bindingCount = srbD->m_bindings.count(); + for (int i = 0; i < bindingCount; ++i) { + const QRhiShaderResourceBinding::Data *b = srbD->m_bindings.at(i).data(); + switch (b->type) { + case QRhiShaderResourceBinding::ImageLoad: + case QRhiShaderResourceBinding::ImageStore: + case QRhiShaderResourceBinding::ImageLoadStore: + qrhigl_accumulateComputeResource(&cbD->computePassState.writtenResources, + b->u.simage.tex, + b->type, + QRhiShaderResourceBinding::ImageLoad, + QRhiShaderResourceBinding::ImageStore, + QRhiShaderResourceBinding::ImageLoadStore); + break; + case QRhiShaderResourceBinding::BufferLoad: + case QRhiShaderResourceBinding::BufferStore: + case QRhiShaderResourceBinding::BufferLoadStore: + qrhigl_accumulateComputeResource(&cbD->computePassState.writtenResources, + b->u.sbuf.buf, + b->type, + QRhiShaderResourceBinding::BufferLoad, + QRhiShaderResourceBinding::BufferStore, + QRhiShaderResourceBinding::BufferLoadStore); + break; + default: + break; + } + } + + for (auto it = cbD->computePassState.writtenResources.begin(); it != cbD->computePassState.writtenResources.end(); ) { + const int accessInThisDispatch = it->first; + const bool isNewInThisDispatch = it->second; + if (accessInThisDispatch && !isNewInThisDispatch) { + if (it.key()->resourceType() == QRhiResource::Texture) + barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; + else + barriers |= GL_SHADER_STORAGE_BARRIER_BIT; + } + // Anything that was previously written, but is only read now, can be + // removed from the written list (because that previous write got a + // corresponding barrier now). + if (accessInThisDispatch == QGles2CommandBuffer::ComputePassState::Read) + it = cbD->computePassState.writtenResources.erase(it); + else + ++it; + } + + if (barriers) { + QGles2CommandBuffer::Command cmd; + cmd.cmd = QGles2CommandBuffer::Command::Barrier; + cmd.args.barrier.barriers = barriers; + cbD->commands.append(cmd); + } + } + QGles2CommandBuffer::Command cmd; cmd.cmd = QGles2CommandBuffer::Command::Dispatch; cmd.args.dispatch.x = GLuint(x); diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h index a9b3022612..679f806004 100644 --- a/src/gui/rhi/qrhigles2_p_p.h +++ b/src/gui/rhi/qrhigles2_p_p.h @@ -521,6 +521,17 @@ struct QGles2CommandBuffer : public QRhiCommandBuffer QRhiShaderResourceBindings *currentComputeSrb; uint currentSrbGeneration; + struct ComputePassState { + enum Access { + Read = 0x01, + Write = 0x02 + }; + QHash > writtenResources; + void reset() { + writtenResources.clear(); + } + } computePassState; + QVector dataRetainPool; QVector imageRetainPool; diff --git a/src/gui/rhi/qrhivulkan.cpp b/src/gui/rhi/qrhivulkan.cpp index c4f56f2dd2..f4c72d2cca 100644 --- a/src/gui/rhi/qrhivulkan.cpp +++ b/src/gui/rhi/qrhivulkan.cpp @@ -2219,6 +2219,8 @@ void QRhiVulkan::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch cbD->recordingPass = QVkCommandBuffer::ComputePass; + cbD->computePassState.reset(); + if (cbD->useSecondaryCb) cbD->secondaryCbs.append(startSecondaryCommandBuffer()); } @@ -2267,15 +2269,152 @@ void QRhiVulkan::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline * psD->lastActiveFrameSlot = currentFrameSlot; } +template +inline void qrhivk_accumulateComputeResource(T *writtenResources, QRhiResource *resource, + QRhiShaderResourceBinding::Type bindingType, + int loadTypeVal, int storeTypeVal, int loadStoreTypeVal) +{ + VkAccessFlags access = 0; + if (bindingType == loadTypeVal) { + access = VK_ACCESS_SHADER_READ_BIT; + } else { + access = VK_ACCESS_SHADER_WRITE_BIT; + if (bindingType == loadStoreTypeVal) + access |= VK_ACCESS_SHADER_READ_BIT; + } + auto it = writtenResources->find(resource); + if (it != writtenResources->end()) + it->first |= access; + else if (bindingType == storeTypeVal || bindingType == loadStoreTypeVal) + writtenResources->insert(resource, { access, true }); +} + void QRhiVulkan::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) { QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QVkCommandBuffer::ComputePass); + // When there are multiple dispatches, read-after-write and + // write-after-write need a barrier. + QVarLengthArray imageBarriers; + QVarLengthArray bufferBarriers; + if (cbD->currentComputeSrb) { + // The key in the writtenResources map indicates that the resource was + // written in a previous dispatch, whereas the value accumulates the + // access mask in the current one. + for (auto &accessAndIsNewFlag : cbD->computePassState.writtenResources) + accessAndIsNewFlag = { 0, false }; + + QVkShaderResourceBindings *srbD = QRHI_RES(QVkShaderResourceBindings, cbD->currentComputeSrb); + const int bindingCount = srbD->m_bindings.count(); + for (int i = 0; i < bindingCount; ++i) { + const QRhiShaderResourceBinding::Data *b = srbD->m_bindings.at(i).data(); + switch (b->type) { + case QRhiShaderResourceBinding::ImageLoad: + case QRhiShaderResourceBinding::ImageStore: + case QRhiShaderResourceBinding::ImageLoadStore: + qrhivk_accumulateComputeResource(&cbD->computePassState.writtenResources, + b->u.simage.tex, + b->type, + QRhiShaderResourceBinding::ImageLoad, + QRhiShaderResourceBinding::ImageStore, + QRhiShaderResourceBinding::ImageLoadStore); + break; + case QRhiShaderResourceBinding::BufferLoad: + case QRhiShaderResourceBinding::BufferStore: + case QRhiShaderResourceBinding::BufferLoadStore: + qrhivk_accumulateComputeResource(&cbD->computePassState.writtenResources, + b->u.sbuf.buf, + b->type, + QRhiShaderResourceBinding::BufferLoad, + QRhiShaderResourceBinding::BufferStore, + QRhiShaderResourceBinding::BufferLoadStore); + break; + default: + break; + } + } + + for (auto it = cbD->computePassState.writtenResources.begin(); it != cbD->computePassState.writtenResources.end(); ) { + const int accessInThisDispatch = it->first; + const bool isNewInThisDispatch = it->second; + if (accessInThisDispatch && !isNewInThisDispatch) { + if (it.key()->resourceType() == QRhiResource::Texture) { + QVkTexture *texD = QRHI_RES(QVkTexture, it.key()); + VkImageMemoryBarrier barrier; + memset(&barrier, 0, sizeof(barrier)); + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + // won't care about subresources, pretend the whole resource was written + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + barrier.oldLayout = texD->usageState.layout; + barrier.newLayout = texD->usageState.layout; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = accessInThisDispatch; + barrier.image = texD->image; + imageBarriers.append(barrier); + } else { + QVkBuffer *bufD = QRHI_RES(QVkBuffer, it.key()); + VkBufferMemoryBarrier barrier; + memset(&barrier, 0, sizeof(barrier)); + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = accessInThisDispatch; + barrier.buffer = bufD->buffers[bufD->m_type == QRhiBuffer::Dynamic ? currentFrameSlot : 0]; + barrier.size = VK_WHOLE_SIZE; + bufferBarriers.append(barrier); + } + } + // Anything that was previously written, but is only read now, can be + // removed from the written list (because that previous write got a + // corresponding barrier now). + if (accessInThisDispatch == VK_ACCESS_SHADER_READ_BIT) + it = cbD->computePassState.writtenResources.erase(it); + else + ++it; + } + } + if (cbD->useSecondaryCb) { - df->vkCmdDispatch(cbD->secondaryCbs.last(), uint32_t(x), uint32_t(y), uint32_t(z)); + VkCommandBuffer secondaryCb = cbD->secondaryCbs.last(); + if (!imageBarriers.isEmpty()) { + df->vkCmdPipelineBarrier(secondaryCb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, + 0, nullptr, + imageBarriers.count(), imageBarriers.constData()); + } + if (!bufferBarriers.isEmpty()) { + df->vkCmdPipelineBarrier(secondaryCb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, + bufferBarriers.count(), bufferBarriers.constData(), + 0, nullptr); + } + df->vkCmdDispatch(secondaryCb, uint32_t(x), uint32_t(y), uint32_t(z)); } else { QVkCommandBuffer::Command cmd; + if (!imageBarriers.isEmpty()) { + cmd.cmd = QVkCommandBuffer::Command::ImageBarrier; + cmd.args.imageBarrier.srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + cmd.args.imageBarrier.dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + cmd.args.imageBarrier.count = imageBarriers.count(); + cmd.args.imageBarrier.index = cbD->pools.imageBarrier.count(); + cbD->pools.imageBarrier.append(imageBarriers.constData(), imageBarriers.count()); + cbD->commands.append(cmd); + } + if (!bufferBarriers.isEmpty()) { + cmd.cmd = QVkCommandBuffer::Command::BufferBarrier; + cmd.args.bufferBarrier.srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + cmd.args.bufferBarrier.dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + cmd.args.bufferBarrier.count = bufferBarriers.count(); + cmd.args.bufferBarrier.index = cbD->pools.bufferBarrier.count(); + cbD->pools.bufferBarrier.append(bufferBarriers.constData(), bufferBarriers.count()); + cbD->commands.append(cmd); + } cmd.cmd = QVkCommandBuffer::Command::Dispatch; cmd.args.dispatch.x = x; cmd.args.dispatch.y = y; @@ -2465,7 +2604,9 @@ void QRhiVulkan::trackedBufferBarrier(QVkCommandBuffer *cbD, QVkBuffer *bufD, in cmd.cmd = QVkCommandBuffer::Command::BufferBarrier; cmd.args.bufferBarrier.srcStageMask = s.stage; cmd.args.bufferBarrier.dstStageMask = stage; - cmd.args.bufferBarrier.desc = bufMemBarrier; + cmd.args.bufferBarrier.count = 1; + cmd.args.bufferBarrier.index = cbD->pools.bufferBarrier.count(); + cbD->pools.bufferBarrier.append(bufMemBarrier); cbD->commands.append(cmd); s.access = access; @@ -2507,7 +2648,9 @@ void QRhiVulkan::trackedImageBarrier(QVkCommandBuffer *cbD, QVkTexture *texD, cmd.cmd = QVkCommandBuffer::Command::ImageBarrier; cmd.args.imageBarrier.srcStageMask = srcStage; cmd.args.imageBarrier.dstStageMask = stage; - cmd.args.imageBarrier.desc = barrier; + cmd.args.imageBarrier.count = 1; + cmd.args.imageBarrier.index = cbD->pools.imageBarrier.count(); + cbD->pools.imageBarrier.append(barrier); cbD->commands.append(cmd); s.layout = layout; @@ -2541,7 +2684,9 @@ void QRhiVulkan::subresourceBarrier(QVkCommandBuffer *cbD, VkImage image, cmd.cmd = QVkCommandBuffer::Command::ImageBarrier; cmd.args.imageBarrier.srcStageMask = srcStage; cmd.args.imageBarrier.dstStageMask = dstStage; - cmd.args.imageBarrier.desc = barrier; + cmd.args.imageBarrier.count = 1; + cmd.args.imageBarrier.index = cbD->pools.imageBarrier.count(); + cbD->pools.imageBarrier.append(barrier); cbD->commands.append(cmd); } @@ -3409,12 +3554,12 @@ void QRhiVulkan::recordPrimaryCommandBuffer(QVkCommandBuffer *cbD) case QVkCommandBuffer::Command::ImageBarrier: df->vkCmdPipelineBarrier(cbD->cb, cmd.args.imageBarrier.srcStageMask, cmd.args.imageBarrier.dstStageMask, 0, 0, nullptr, 0, nullptr, - 1, &cmd.args.imageBarrier.desc); + cmd.args.imageBarrier.count, cbD->pools.imageBarrier.constData() + cmd.args.imageBarrier.index); break; case QVkCommandBuffer::Command::BufferBarrier: df->vkCmdPipelineBarrier(cbD->cb, cmd.args.bufferBarrier.srcStageMask, cmd.args.bufferBarrier.dstStageMask, 0, 0, nullptr, - 1, &cmd.args.bufferBarrier.desc, + cmd.args.bufferBarrier.count, cbD->pools.bufferBarrier.constData() + cmd.args.bufferBarrier.index, 0, nullptr); break; case QVkCommandBuffer::Command::BlitImage: diff --git a/src/gui/rhi/qrhivulkan_p_p.h b/src/gui/rhi/qrhivulkan_p_p.h index 9f18d0bf5e..b0e90dae56 100644 --- a/src/gui/rhi/qrhivulkan_p_p.h +++ b/src/gui/rhi/qrhivulkan_p_p.h @@ -370,6 +370,13 @@ struct QVkCommandBuffer : public QRhiCommandBuffer QVarLengthArray secondaryCbs; bool inExternal; + struct { + QHash > writtenResources; + void reset() { + writtenResources.clear(); + } + } computePassState; + struct Command { enum Cmd { CopyBuffer, @@ -429,12 +436,14 @@ struct QVkCommandBuffer : public QRhiCommandBuffer struct { VkPipelineStageFlags srcStageMask; VkPipelineStageFlags dstStageMask; - VkImageMemoryBarrier desc; + int count; + int index; } imageBarrier; struct { VkPipelineStageFlags srcStageMask; VkPipelineStageFlags dstStageMask; - VkBufferMemoryBarrier desc; + int count; + int index; } bufferBarrier; struct { VkImage src; @@ -537,6 +546,8 @@ struct QVkCommandBuffer : public QRhiCommandBuffer pools.vertexBuffer.clear(); pools.vertexBufferOffset.clear(); pools.debugMarkerData.clear(); + pools.imageBarrier.clear(); + pools.bufferBarrier.clear(); } struct { @@ -546,6 +557,8 @@ struct QVkCommandBuffer : public QRhiCommandBuffer QVarLengthArray vertexBuffer; QVarLengthArray vertexBufferOffset; QVarLengthArray debugMarkerData; + QVarLengthArray imageBarrier; + QVarLengthArray bufferBarrier; } pools; friend class QRhiVulkan; -- cgit v1.2.3