diff options
author | Laszlo Agocs <laszlo.agocs@qt.io> | 2023-04-03 19:18:59 +0200 |
---|---|---|
committer | Laszlo Agocs <laszlo.agocs@qt.io> | 2023-04-18 15:35:28 +0200 |
commit | e539e9a7afacc330117a761e0ee55d6452cffaca (patch) | |
tree | 169646118e75acc84663190e41f97efd42c627cd /src/gui/rhi/qrhivulkan.cpp | |
parent | 62a4ca773a9e8a2f0c993c159c045f2794078aad (diff) |
rhi: Replace the temporary GPU time query API with a saner one
Modeled after Metal's cb.GPUStart/EndTime. Implemented with timestamp
queries for other APIs.
Implemented for Metal, D3D11, Vulkan for now. No more callback, just
a getter on the command buffer which returns the latest known value,
referring to some previous frame. This makes it a lot more usable
than the original solution that is not really used anywhere at
the moment.
Now works for offscreen "frames" as well, this was not implemented
before.
Opt in with a new QRhi::create() flag because we cannot tell in
advance if the getter will be called or not, and this way we can
skip recording the timestamps by default. The cost is probably
minimal, though. Qt Quick will set this automatically when running
with QSG_RHI_PROFILE=1.
Change-Id: I903779984a4e0bbf1d03806d04bf61571ce23d72
Reviewed-by: Laszlo Agocs <laszlo.agocs@qt.io>
Diffstat (limited to 'src/gui/rhi/qrhivulkan.cpp')
-rw-r--r-- | src/gui/rhi/qrhivulkan.cpp | 142 |
1 files changed, 101 insertions, 41 deletions
diff --git a/src/gui/rhi/qrhivulkan.cpp b/src/gui/rhi/qrhivulkan.cpp index b2f72dce49..17e291eee1 100644 --- a/src/gui/rhi/qrhivulkan.cpp +++ b/src/gui/rhi/qrhivulkan.cpp @@ -1671,6 +1671,24 @@ void QRhiVulkan::ensureCommandPoolForNewFrame() df->vkResetCommandPool(dev, cmdPool[currentFrameSlot], flags); } +double QRhiVulkan::elapsedSecondsFromTimestamp(quint64 timestamp[2], bool *ok) +{ + quint64 mask = 0; + for (quint64 i = 0; i < timestampValidBits; i += 8) + mask |= 0xFFULL << i; + const quint64 ts0 = timestamp[0] & mask; + const quint64 ts1 = timestamp[1] & mask; + const float nsecsPerTick = physDevProperties.limits.timestampPeriod; + if (!qFuzzyIsNull(nsecsPerTick)) { + const float elapsedMs = float(ts1 - ts0) * nsecsPerTick / 1000000.0f; + const double elapsedSec = elapsedMs / 1000.0; + *ok = true; + return elapsedSec; + } + *ok = false; + return 0; +} + QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags) { QVkSwapChain *swapChainD = QRHI_RES(QVkSwapChain, swapChain); @@ -1720,30 +1738,6 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin // mess up A's in-flight commands (as they are not in flight anymore). waitCommandCompletion(frameResIndex); - // Now is the time to read the timestamps for the previous frame for this slot. - if (frame.timestampQueryIndex >= 0) { - quint64 timestamp[2] = { 0, 0 }; - VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(frame.timestampQueryIndex), 2, - 2 * sizeof(quint64), timestamp, sizeof(quint64), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); - timestampQueryPoolMap.clearBit(frame.timestampQueryIndex / 2); - frame.timestampQueryIndex = -1; - if (err == VK_SUCCESS) { - quint64 mask = 0; - for (quint64 i = 0; i < timestampValidBits; i += 8) - mask |= 0xFFULL << i; - const quint64 ts0 = timestamp[0] & mask; - const quint64 ts1 = timestamp[1] & mask; - const float nsecsPerTick = physDevProperties.limits.timestampPeriod; - if (!qFuzzyIsNull(nsecsPerTick)) { - const float elapsedMs = float(ts1 - ts0) * nsecsPerTick / 1000000.0f; - runGpuFrameTimeCallbacks(elapsedMs); - } - } else { - qWarning("Failed to query timestamp: %d", err); - } - } - currentFrameSlot = int(swapChainD->currentFrameSlot); currentSwapChain = swapChainD; if (swapChainD->ds) @@ -1757,9 +1751,34 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin if (cbres != QRhi::FrameOpSuccess) return cbres; - // when profiling is enabled, pick a free query (pair) from the pool - int timestampQueryIdx = -1; - if (hasGpuFrameTimeCallback() && swapChainD->bufferCount > 1) { // no timestamps if not having at least 2 frames in flight + swapChainD->cbWrapper.cb = frame.cmdBuf; + + QVkSwapChain::ImageResources &image(swapChainD->imageRes[swapChainD->currentImageIndex]); + swapChainD->rtWrapper.d.fb = image.fb; + + prepareNewFrame(&swapChainD->cbWrapper); + + // Read the timestamps for the previous frame for this slot. + if (frame.timestampQueryIndex >= 0) { + quint64 timestamp[2] = { 0, 0 }; + VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(frame.timestampQueryIndex), 2, + 2 * sizeof(quint64), timestamp, sizeof(quint64), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + timestampQueryPoolMap.clearBit(frame.timestampQueryIndex / 2); + frame.timestampQueryIndex = -1; + if (err == VK_SUCCESS) { + bool ok = false; + const double elapsedSec = elapsedSecondsFromTimestamp(timestamp, &ok); + if (ok) + swapChainD->cbWrapper.lastGpuTime = elapsedSec; + } else { + qWarning("Failed to query timestamp: %d", err); + } + } + + // No timestamps if the client did not opt in, or when not having at least 2 frames in flight. + if (rhiFlags.testFlag(QRhi::EnableTimestamps) && swapChainD->bufferCount > 1) { + int timestampQueryIdx = -1; for (int i = 0; i < timestampQueryPoolMap.size(); ++i) { if (!timestampQueryPoolMap.testBit(i)) { timestampQueryPoolMap.setBit(i); @@ -1767,21 +1786,14 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin break; } } + if (timestampQueryIdx >= 0) { + df->vkCmdResetQueryPool(frame.cmdBuf, timestampQueryPool, uint32_t(timestampQueryIdx), 2); + // record timestamp at the start of the command buffer + df->vkCmdWriteTimestamp(frame.cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + timestampQueryPool, uint32_t(timestampQueryIdx)); + frame.timestampQueryIndex = timestampQueryIdx; + } } - if (timestampQueryIdx >= 0) { - df->vkCmdResetQueryPool(frame.cmdBuf, timestampQueryPool, uint32_t(timestampQueryIdx), 2); - // record timestamp at the start of the command buffer - df->vkCmdWriteTimestamp(frame.cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - timestampQueryPool, uint32_t(timestampQueryIdx)); - frame.timestampQueryIndex = timestampQueryIdx; - } - - swapChainD->cbWrapper.cb = frame.cmdBuf; - - QVkSwapChain::ImageResources &image(swapChainD->imageRes[swapChainD->currentImageIndex]); - swapChainD->rtWrapper.d.fb = image.fb; - - prepareNewFrame(&swapChainD->cbWrapper); return QRhi::FrameOpSuccess; } @@ -2031,6 +2043,24 @@ QRhi::FrameOpResult QRhiVulkan::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi prepareNewFrame(cbWrapper); ofr.active = true; + if (rhiFlags.testFlag(QRhi::EnableTimestamps)) { + int timestampQueryIdx = -1; + for (int i = 0; i < timestampQueryPoolMap.size(); ++i) { + if (!timestampQueryPoolMap.testBit(i)) { + timestampQueryPoolMap.setBit(i); + timestampQueryIdx = i * 2; + break; + } + } + if (timestampQueryIdx >= 0) { + df->vkCmdResetQueryPool(cbWrapper->cb, timestampQueryPool, uint32_t(timestampQueryIdx), 2); + // record timestamp at the start of the command buffer + df->vkCmdWriteTimestamp(cbWrapper->cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + timestampQueryPool, uint32_t(timestampQueryIdx)); + ofr.timestampQueryIndex = timestampQueryIdx; + } + } + *cb = cbWrapper; return QRhi::FrameOpSuccess; } @@ -2044,6 +2074,12 @@ QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame(QRhi::EndFrameFlags flags) QVkCommandBuffer *cbWrapper(ofr.cbWrapper[currentFrameSlot]); recordPrimaryCommandBuffer(cbWrapper); + // record another timestamp, when enabled + if (ofr.timestampQueryIndex >= 0) { + df->vkCmdWriteTimestamp(cbWrapper->cb, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + timestampQueryPool, uint32_t(ofr.timestampQueryIndex + 1)); + } + if (!ofr.cmdFence) { VkFenceCreateInfo fenceInfo = {}; fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; @@ -2066,6 +2102,24 @@ QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame(QRhi::EndFrameFlags flags) // previous) frame is safe since we waited for completion above. finishActiveReadbacks(true); + // Read the timestamps, if we wrote them. + if (ofr.timestampQueryIndex >= 0) { + quint64 timestamp[2] = { 0, 0 }; + VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(ofr.timestampQueryIndex), 2, + 2 * sizeof(quint64), timestamp, sizeof(quint64), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + timestampQueryPoolMap.clearBit(ofr.timestampQueryIndex / 2); + ofr.timestampQueryIndex = -1; + if (err == VK_SUCCESS) { + bool ok = false; + const double elapsedSec = elapsedSecondsFromTimestamp(timestamp, &ok); + if (ok) + cbWrapper->lastGpuTime = elapsedSec; + } else { + qWarning("Failed to query timestamp: %d", err); + } + } + return QRhi::FrameOpSuccess; } @@ -5153,6 +5207,12 @@ void QRhiVulkan::endExternal(QRhiCommandBuffer *cb) cbD->resetCachedState(); } +double QRhiVulkan::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + return cbD->lastGpuTime; +} + void QRhiVulkan::setObjectName(uint64_t object, VkObjectType type, const QByteArray &name, int slot) { #ifdef VK_EXT_debug_utils |