diff options
Diffstat (limited to 'src/gui/rhi/qrhid3d12.cpp')
-rw-r--r-- | src/gui/rhi/qrhid3d12.cpp | 1118 |
1 files changed, 775 insertions, 343 deletions
diff --git a/src/gui/rhi/qrhid3d12.cpp b/src/gui/rhi/qrhid3d12.cpp index 6c58a4d008..0f176c683d 100644 --- a/src/gui/rhi/qrhid3d12.cpp +++ b/src/gui/rhi/qrhid3d12.cpp @@ -2,16 +2,10 @@ // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qrhid3d12_p.h" -#include "qshader.h" -#include <QWindow> #include <qmath.h> -#include <private/qsystemlibrary_p.h> -#include <QtCore/qcryptographichash.h> #include <QtCore/private/qsystemerror_p.h> - -#include <d3dcompiler.h> #include <comdef.h> - +#include "qrhid3dhelpers_p.h" #include "cs_mipmap_p.h" #if __has_include(<pix.h>) @@ -19,6 +13,8 @@ #define QRHI_D3D12_HAS_OLD_PIX #endif +#ifdef __ID3D12Device2_INTERFACE_DEFINED__ + QT_BEGIN_NAMESPACE /* @@ -30,7 +26,7 @@ QT_BEGIN_NAMESPACE \inmodule QtGui \brief Direct3D 12 specific initialization parameters. - \note This a RHI API with limited compatibility guarantees, see \l QRhi + \note This is a RHI API with limited compatibility guarantees, see \l QRhi for details. A D3D12-based QRhi needs no special parameters for initialization. If @@ -81,34 +77,60 @@ QT_BEGIN_NAMESPACE \c{d3d12.h} headers is not acceptable here. The actual types are \c{ID3D12Device *} and \c{ID3D12CommandQueue *}. - \note This a RHI API with limited compatibility guarantees, see \l QRhi + \note This is a RHI API with limited compatibility guarantees, see \l QRhi for details. */ /*! \variable QRhiD3D12NativeHandles::dev + + Points to a + \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12device}{ID3D12Device} + or left set to \nullptr if no existing device is to be imported. */ /*! \variable QRhiD3D12NativeHandles::minimumFeatureLevel + + Specifies the \b minimum feature level passed to + \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-d3d12createdevice}{D3D12CreateDevice()}. + When not set, \c{D3D_FEATURE_LEVEL_11_0} is used. See + \l{https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-feature-levels}{this + page} for details. + + Relevant only when QRhi creates the device, ignored when importing a device + and device context. */ /*! \variable QRhiD3D12NativeHandles::adapterLuidLow + + The low part of the local identifier (LUID) of the DXGI adapter to use. + Relevant only when QRhi creates the device, ignored when importing a device + and device context. */ /*! \variable QRhiD3D12NativeHandles::adapterLuidHigh + + The high part of the local identifier (LUID) of the DXGI adapter to use. + Relevant only when QRhi creates the device, ignored when importing a device + and device context. */ /*! \variable QRhiD3D12NativeHandles::commandQueue + + When set, must point to a + \l{https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12commandqueue}{ID3D12CommandQueue}. + It allows to optionally import a command queue as well, in addition to a + device. */ /*! \class QRhiD3D12CommandBufferNativeHandles \inmodule QtGui - \brief Holds the ID3D12GraphicsCommandList object that is backing a QRhiCommandBuffer. + \brief Holds the ID3D12GraphicsCommandList1 object that is backing a QRhiCommandBuffer. \note The command list object is only guaranteed to be valid, and in recording state, while recording a frame. That is, between a @@ -116,7 +138,7 @@ QT_BEGIN_NAMESPACE \l{QRhi::beginOffscreenFrame()}{beginOffscreenFrame()} - \l{QRhi::endOffscreenFrame()}{endOffscreenFrame()} pair. - \note This a RHI API with limited compatibility guarantees, see \l QRhi + \note This is a RHI API with limited compatibility guarantees, see \l QRhi for details. */ @@ -132,8 +154,14 @@ QRhiD3D12::QRhiD3D12(QRhiD3D12InitParams *params, QRhiD3D12NativeHandles *import debugLayer = params->enableDebugLayer; if (importParams) { if (importParams->dev) { - dev = reinterpret_cast<ID3D12Device *>(importParams->dev); - importedDevice = true; + ID3D12Device *d3d12Device = reinterpret_cast<ID3D12Device *>(importParams->dev); + if (SUCCEEDED(d3d12Device->QueryInterface(__uuidof(ID3D12Device2), reinterpret_cast<void **>(&dev)))) { + // get rid of the ref added by QueryInterface + d3d12Device->Release(); + importedDevice = true; + } else { + qWarning("ID3D12Device2 not supported, cannot import device"); + } } if (importParams->commandQueue) { cmdQueue = reinterpret_cast<ID3D12CommandQueue *>(importParams->commandQueue); @@ -179,9 +207,20 @@ bool QRhiD3D12::create(QRhi::Flags flags) factoryFlags |= DXGI_CREATE_FACTORY_DEBUG; HRESULT hr = CreateDXGIFactory2(factoryFlags, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgiFactory)); if (FAILED(hr)) { - qWarning("CreateDXGIFactory2() failed to create DXGI factory: %s", - qPrintable(QSystemError::windowsComString(hr))); - return false; + // retry without debug, if it was requested (to match D3D11 backend behavior) + if (debugLayer) { + qCDebug(QRHI_LOG_INFO, "Debug layer was requested but is not available. " + "Attempting to create DXGIFactory2 without it."); + factoryFlags &= ~DXGI_CREATE_FACTORY_DEBUG; + hr = CreateDXGIFactory2(factoryFlags, __uuidof(IDXGIFactory2), reinterpret_cast<void **>(&dxgiFactory)); + } + if (SUCCEEDED(hr)) { + debugLayer = false; + } else { + qWarning("CreateDXGIFactory2() failed to create DXGI factory: %s", + qPrintable(QSystemError::windowsComString(hr))); + return false; + } } supportsAllowTearing = false; @@ -249,9 +288,7 @@ bool QRhiD3D12::create(QRhi::Flags flags) if (!activeAdapter && (requestedAdapterIndex < 0 || requestedAdapterIndex == adapterIndex)) { activeAdapter = adapter; adapterLuid = desc.AdapterLuid; - driverInfoStruct.deviceName = name.toUtf8(); - driverInfoStruct.deviceId = desc.DeviceId; - driverInfoStruct.vendorId = desc.VendorId; + QRhiD3D::fillDriverInfo(&driverInfoStruct, desc); qCDebug(QRHI_LOG_INFO, " using this adapter"); } else { adapter->Release(); @@ -267,7 +304,7 @@ bool QRhiD3D12::create(QRhi::Flags flags) hr = D3D12CreateDevice(activeAdapter, minimumFeatureLevel, - __uuidof(ID3D12Device), + __uuidof(ID3D12Device2), reinterpret_cast<void **>(&dev)); if (FAILED(hr)) { qWarning("Failed to create D3D12 device: %s", qPrintable(QSystemError::windowsComString(hr))); @@ -281,16 +318,20 @@ bool QRhiD3D12::create(QRhi::Flags flags) for (int adapterIndex = 0; dxgiFactory->EnumAdapters1(UINT(adapterIndex), &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) { DXGI_ADAPTER_DESC1 desc; adapter->GetDesc1(&desc); - adapter->Release(); if (desc.AdapterLuid.LowPart == adapterLuid.LowPart && desc.AdapterLuid.HighPart == adapterLuid.HighPart) { - driverInfoStruct.deviceName = QString::fromUtf16(reinterpret_cast<char16_t *>(desc.Description)).toUtf8(); - driverInfoStruct.deviceId = desc.DeviceId; - driverInfoStruct.vendorId = desc.VendorId; + activeAdapter = adapter; + QRhiD3D::fillDriverInfo(&driverInfoStruct, desc); break; + } else { + adapter->Release(); } } + if (!activeAdapter) { + qWarning("No adapter"); + return false; + } qCDebug(QRHI_LOG_INFO, "Using imported device %p", dev); } @@ -392,6 +433,9 @@ bool QRhiD3D12::create(QRhi::Flags flags) qWarning("Could not create host-visible staging area"); return false; } + QString decoratedName = QLatin1String("Small staging area buffer/"); + decoratedName += QString::number(i); + smallStagingAreas[i].mem.buffer->SetName(reinterpret_cast<LPCWSTR>(decoratedName.utf16())); } if (!shaderVisibleCbvSrvUavHeap.create(dev, @@ -402,6 +446,53 @@ bool QRhiD3D12::create(QRhi::Flags flags) return false; } + if (flags.testFlag(QRhi::EnableTimestamps)) { + static bool wantsStablePowerState = qEnvironmentVariableIntValue("QT_D3D_STABLE_POWER_STATE"); + // + // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-setstablepowerstate + // + // NB! This is a _global_ setting, affecting other processes (and 3D + // APIs such as Vulkan), as long as this application is running. Hence + // making it an env.var. for now. Never enable it in production. But + // extremely useful for the GPU timings with NVIDIA at least; the + // timestamps become stable and smooth, making the number readable and + // actually useful e.g. in Quick 3D's DebugView when this is enabled. + // (otherwise the number's all over the place) + // + // See also + // https://developer.nvidia.com/blog/advanced-api-performance-setstablepowerstate/ + // for possible other approaches. + // + if (wantsStablePowerState) + dev->SetStablePowerState(TRUE); + + hr = cmdQueue->GetTimestampFrequency(×tampTicksPerSecond); + if (FAILED(hr)) { + qWarning("Failed to query timestamp frequency: %s", + qPrintable(QSystemError::windowsComString(hr))); + return false; + } + if (!timestampQueryHeap.create(dev, QD3D12_FRAMES_IN_FLIGHT * 2, D3D12_QUERY_HEAP_TYPE_TIMESTAMP)) { + qWarning("Failed to create timestamp query pool"); + return false; + } + const quint32 readbackBufSize = QD3D12_FRAMES_IN_FLIGHT * 2 * sizeof(quint64); + if (!timestampReadbackArea.create(this, readbackBufSize, D3D12_HEAP_TYPE_READBACK)) { + qWarning("Failed to create timestamp readback buffer"); + return false; + } + timestampReadbackArea.mem.buffer->SetName(L"Timestamp readback buffer"); + memset(timestampReadbackArea.mem.p, 0, readbackBufSize); + } + + caps = {}; + D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {}; + if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)))) { + caps.multiView = options3.ViewInstancingTier != D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED; + // https://microsoft.github.io/DirectX-Specs/d3d/RelaxedCasting.html + caps.textureViewFormat = options3.CastingFullyTypedFormatSupported; + } + deviceLost = false; offscreenActive = false; @@ -430,6 +521,9 @@ void QRhiD3D12::destroy() } } + timestampQueryHeap.destroy(); + timestampReadbackArea.destroy(); + shaderVisibleCbvSrvUavHeap.destroy(); for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) @@ -445,8 +539,10 @@ void QRhiD3D12::destroy() cbvSrvUavPool.destroy(); for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) { - cmdAllocators[i]->Release(); - cmdAllocators[i] = nullptr; + if (cmdAllocators[i]) { + cmdAllocators[i]->Release(); + cmdAllocators[i] = nullptr; + } } if (fullFenceEvent) { @@ -565,7 +661,7 @@ bool QRhiD3D12::isFeatureSupported(QRhi::Feature feature) const return false; #endif case QRhi::Timestamps: - return false; // ### + return true; case QRhi::Instancing: return true; case QRhi::CustomInstanceStepRate: @@ -638,6 +734,14 @@ bool QRhiD3D12::isFeatureSupported(QRhi::Feature feature) const return true; case QRhi::ThreeDimensionalTextureMipmaps: return false; // we generate mipmaps ourselves with compute and this is not implemented + case QRhi::MultiView: + return caps.multiView; + case QRhi::TextureViewFormat: + return caps.textureViewFormat; + case QRhi::ResolveDepthStencil: + // there is no Multisample Resolve support for depth/stencil formats + // https://learn.microsoft.com/en-us/windows/win32/direct3ddxgi/hardware-support-for-direct3d-12-1-formats + return false; } return false; } @@ -794,15 +898,18 @@ void QRhiD3D12::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline } cbD->cmdList->IASetPrimitiveTopology(psD->topology); + + if (psD->viewInstanceMask) + cbD->cmdList->SetViewInstanceMask(psD->viewInstanceMask); } } -void QRhiD3D12::visitUniformBuffer(QD3D12Stage s, - const QRhiShaderResourceBinding::Data::UniformBufferData &d, - int, - int binding, - int dynamicOffsetCount, - const QRhiCommandBuffer::DynamicOffset *dynamicOffsets) +void QD3D12CommandBuffer::visitUniformBuffer(QD3D12Stage s, + const QRhiShaderResourceBinding::Data::UniformBufferData &d, + int, + int binding, + int dynamicOffsetCount, + const QRhiCommandBuffer::DynamicOffset *dynamicOffsets) { QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, d.buf); quint32 offset = d.offset; @@ -815,29 +922,30 @@ void QRhiD3D12::visitUniformBuffer(QD3D12Stage s, } } } - visitorData.cbufs[s].append({ bufD->handles[currentFrameSlot], offset }); + QRHI_RES_RHI(QRhiD3D12); + visitorData.cbufs[s].append({ bufD->handles[rhiD->currentFrameSlot], offset }); } -void QRhiD3D12::visitTexture(QD3D12Stage s, - const QRhiShaderResourceBinding::TextureAndSampler &d, - int) +void QD3D12CommandBuffer::visitTexture(QD3D12Stage s, + const QRhiShaderResourceBinding::TextureAndSampler &d, + int) { QD3D12Texture *texD = QRHI_RES(QD3D12Texture, d.tex); visitorData.srvs[s].append(texD->srv); } -void QRhiD3D12::visitSampler(QD3D12Stage s, - const QRhiShaderResourceBinding::TextureAndSampler &d, - int) +void QD3D12CommandBuffer::visitSampler(QD3D12Stage s, + const QRhiShaderResourceBinding::TextureAndSampler &d, + int) { QD3D12Sampler *samplerD = QRHI_RES(QD3D12Sampler, d.sampler); visitorData.samplers[s].append(samplerD->lookupOrCreateShaderVisibleDescriptor()); } -void QRhiD3D12::visitStorageBuffer(QD3D12Stage s, - const QRhiShaderResourceBinding::Data::StorageBufferData &d, - QD3D12ShaderResourceVisitor::StorageOp, - int) +void QD3D12CommandBuffer::visitStorageBuffer(QD3D12Stage s, + const QRhiShaderResourceBinding::Data::StorageBufferData &d, + QD3D12ShaderResourceVisitor::StorageOp, + int) { QD3D12Buffer *bufD = QRHI_RES(QD3D12Buffer, d.buf); // SPIRV-Cross generated HLSL uses RWByteAddressBuffer @@ -850,17 +958,17 @@ void QRhiD3D12::visitStorageBuffer(QD3D12Stage s, visitorData.uavs[s].append({ bufD->handles[0], uavDesc }); } -void QRhiD3D12::visitStorageImage(QD3D12Stage s, - const QRhiShaderResourceBinding::Data::StorageImageData &d, - QD3D12ShaderResourceVisitor::StorageOp, - int) +void QD3D12CommandBuffer::visitStorageImage(QD3D12Stage s, + const QRhiShaderResourceBinding::Data::StorageImageData &d, + QD3D12ShaderResourceVisitor::StorageOp, + int) { QD3D12Texture *texD = QRHI_RES(QD3D12Texture, d.tex); const bool isCube = texD->m_flags.testFlag(QRhiTexture::CubeMap); const bool isArray = texD->m_flags.testFlag(QRhiTexture::TextureArray); const bool is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional); D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.Format = texD->dxgiFormat; + uavDesc.Format = texD->rtFormat; if (isCube) { uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; uavDesc.Texture2DArray.MipSlice = UINT(d.level); @@ -899,8 +1007,8 @@ void QRhiD3D12::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind QD3D12ShaderResourceBindings *srbD = QRHI_RES(QD3D12ShaderResourceBindings, srb); - for (int i = 0, ie = srbD->sortedBindings.size(); i != ie; ++i) { - const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->sortedBindings[i]); + for (int i = 0, ie = srbD->m_bindings.size(); i != ie; ++i) { + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->m_bindings[i]); switch (b->type) { case QRhiShaderResourceBinding::UniformBuffer: { @@ -1012,14 +1120,15 @@ void QRhiD3D12::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind QD3D12ShaderResourceVisitor visitor(srbD, stageData, gfxPsD ? 5 : 1); + QD3D12CommandBuffer::VisitorData &visitorData(cbD->visitorData); visitorData = {}; using namespace std::placeholders; - visitor.uniformBuffer = std::bind(&QRhiD3D12::visitUniformBuffer, this, _1, _2, _3, _4, dynamicOffsetCount, dynamicOffsets); - visitor.texture = std::bind(&QRhiD3D12::visitTexture, this, _1, _2, _3); - visitor.sampler = std::bind(&QRhiD3D12::visitSampler, this, _1, _2, _3); - visitor.storageBuffer = std::bind(&QRhiD3D12::visitStorageBuffer, this, _1, _2, _3, _4); - visitor.storageImage = std::bind(&QRhiD3D12::visitStorageImage, this, _1, _2, _3, _4); + visitor.uniformBuffer = std::bind(&QD3D12CommandBuffer::visitUniformBuffer, cbD, _1, _2, _3, _4, dynamicOffsetCount, dynamicOffsets); + visitor.texture = std::bind(&QD3D12CommandBuffer::visitTexture, cbD, _1, _2, _3); + visitor.sampler = std::bind(&QD3D12CommandBuffer::visitSampler, cbD, _1, _2, _3); + visitor.storageBuffer = std::bind(&QD3D12CommandBuffer::visitStorageBuffer, cbD, _1, _2, _3, _4); + visitor.storageImage = std::bind(&QD3D12CommandBuffer::visitStorageImage, cbD, _1, _2, _3, _4); visitor.visit(); @@ -1375,8 +1484,24 @@ void QRhiD3D12::endExternal(QRhiCommandBuffer *cb) double QRhiD3D12::lastCompletedGpuTime(QRhiCommandBuffer *cb) { - Q_UNUSED(cb); - return 0; + QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb); + return cbD->lastGpuTime; +} + +static void calculateGpuTime(QD3D12CommandBuffer *cbD, + int timestampPairStartIndex, + const quint8 *readbackBufPtr, + quint64 timestampTicksPerSecond) +{ + const size_t byteOffset = timestampPairStartIndex * sizeof(quint64); + const quint64 *p = reinterpret_cast<const quint64 *>(readbackBufPtr + byteOffset); + const quint64 startTime = *p++; + const quint64 endTime = *p; + if (startTime < endTime) { + const quint64 ticks = endTime - startTime; + const double timeSec = ticks / double(timestampTicksPerSecond); + cbD->lastGpuTime = timeSec; + } } QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) @@ -1400,7 +1525,7 @@ QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF // be in flight anymore). With Qt Quick this situation cannot happen anyway // by design (one QRhi per window). for (QD3D12SwapChain *sc : std::as_const(swapchains)) - sc->waitCommandCompletionForFrameSlot(sc->currentFrameSlot); + sc->waitCommandCompletionForFrameSlot(currentFrameSlot); // note: swapChainD->currentFrameSlot, not sc's HRESULT hr = cmdAllocators[currentFrameSlot]->Reset(); if (FAILED(hr)) { @@ -1422,6 +1547,16 @@ QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF swapChainD->rtWrapper.d.dsv = swapChainD->ds ? swapChainD->ds->dsv.cpuHandle : D3D12_CPU_DESCRIPTOR_HANDLE { 0 }; + if (swapChainD->stereo) { + swapChainD->rtWrapperRight.d.rtv[0] = swapChainD->sampleDesc.Count > 1 + ? swapChainD->msaaRtvs[swapChainD->currentBackBufferIndex].cpuHandle + : swapChainD->rtvsRight[swapChainD->currentBackBufferIndex].cpuHandle; + + swapChainD->rtWrapperRight.d.dsv = + swapChainD->ds ? swapChainD->ds->dsv.cpuHandle : D3D12_CPU_DESCRIPTOR_HANDLE{ 0 }; + } + + // Time to release things that are marked for currentFrameSlot since due to // the wait above we know that the previous commands on the GPU for this // slot must have finished already. @@ -1439,6 +1574,20 @@ QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF finishActiveReadbacks(); // last, in case the readback-completed callback issues rhi calls + if (timestampQueryHeap.isValid() && timestampTicksPerSecond) { + // Read the timestamps for the previous frame for this slot. (the + // ResolveQuery() should have completed by now due to the wait above) + const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT; + calculateGpuTime(cbD, + timestampPairStartIndex, + timestampReadbackArea.mem.p, + timestampTicksPerSecond); + // Write the start timestamp for this frame for this slot. + cbD->cmdList->EndQuery(timestampQueryHeap.heap, + D3D12_QUERY_TYPE_TIMESTAMP, + timestampPairStartIndex); + } + return QRhi::FrameOpSuccess; } @@ -1463,7 +1612,20 @@ QRhi::FrameOpResult QRhiD3D12::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame barrierGen.addTransitionBarrier(backBufferResourceHandle, D3D12_RESOURCE_STATE_PRESENT); barrierGen.enqueueBufferedTransitionBarriers(cbD); - ID3D12GraphicsCommandList *cmdList = cbD->cmdList; + if (timestampQueryHeap.isValid()) { + const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT; + cbD->cmdList->EndQuery(timestampQueryHeap.heap, + D3D12_QUERY_TYPE_TIMESTAMP, + timestampPairStartIndex + 1); + cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap, + D3D12_QUERY_TYPE_TIMESTAMP, + timestampPairStartIndex, + 2, + timestampReadbackArea.mem.buffer, + timestampPairStartIndex * sizeof(quint64)); + } + + ID3D12GraphicsCommandList1 *cmdList = cbD->cmdList; HRESULT hr = cmdList->Close(); if (FAILED(hr)) { qWarning("Failed to close command list: %s", @@ -1481,6 +1643,10 @@ QRhi::FrameOpResult QRhiD3D12::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame { presentFlags |= DXGI_PRESENT_ALLOW_TEARING; } + if (!swapChainD->swapChain) { + qWarning("Failed to present, no swapchain"); + return QRhi::FrameOpError; + } HRESULT hr = swapChainD->swapChain->Present(swapChainD->swapInterval, presentFlags); if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) { qWarning("Device loss detected in Present()"); @@ -1547,6 +1713,12 @@ QRhi::FrameOpResult QRhiD3D12::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi: bindShaderVisibleHeaps(cbD); + if (timestampQueryHeap.isValid() && timestampTicksPerSecond) { + cbD->cmdList->EndQuery(timestampQueryHeap.heap, + D3D12_QUERY_TYPE_TIMESTAMP, + currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT); + } + offscreenActive = true; *cb = cbD; @@ -1560,7 +1732,20 @@ QRhi::FrameOpResult QRhiD3D12::endOffscreenFrame(QRhi::EndFrameFlags flags) offscreenActive = false; QD3D12CommandBuffer *cbD = offscreenCb[currentFrameSlot]; - ID3D12GraphicsCommandList *cmdList = cbD->cmdList; + if (timestampQueryHeap.isValid()) { + const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT; + cbD->cmdList->EndQuery(timestampQueryHeap.heap, + D3D12_QUERY_TYPE_TIMESTAMP, + timestampPairStartIndex + 1); + cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap, + D3D12_QUERY_TYPE_TIMESTAMP, + timestampPairStartIndex, + 2, + timestampReadbackArea.mem.buffer, + timestampPairStartIndex * sizeof(quint64)); + } + + ID3D12GraphicsCommandList1 *cmdList = cbD->cmdList; HRESULT hr = cmdList->Close(); if (FAILED(hr)) { qWarning("Failed to close command list: %s", @@ -1580,6 +1765,14 @@ QRhi::FrameOpResult QRhiD3D12::endOffscreenFrame(QRhi::EndFrameFlags flags) // previous) frame is safe since we waited for completion above. finishActiveReadbacks(true); + // the timestamp query results should be available too, given the wait + if (timestampQueryHeap.isValid()) { + calculateGpuTime(cbD, + currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT, + timestampReadbackArea.mem.p, + timestampTicksPerSecond); + } + return QRhi::FrameOpSuccess; } @@ -1601,7 +1794,7 @@ QRhi::FrameOpResult QRhiD3D12::finish() Q_ASSERT(cbD->recordingPass == QD3D12CommandBuffer::NoPass); - ID3D12GraphicsCommandList *cmdList = cbD->cmdList; + ID3D12GraphicsCommandList1 *cmdList = cbD->cmdList; HRESULT hr = cmdList->Close(); if (FAILED(hr)) { qWarning("Failed to close command list: %s", @@ -1786,15 +1979,19 @@ void QRhiD3D12::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resource barrierGen.addTransitionBarrier(dstTexD->handle, D3D12_RESOURCE_STATE_RESOLVE_DEST); barrierGen.enqueueBufferedTransitionBarriers(cbD); - const UINT srcSubresource = calcSubresource(0, UINT(colorAtt.layer()), 1); - const UINT dstSubresource = calcSubresource(UINT(colorAtt.resolveLevel()), - UINT(colorAtt.resolveLayer()), - dstTexD->mipLevelCount); - cbD->cmdList->ResolveSubresource(dstRes->resource, dstSubresource, - srcRes->resource, srcSubresource, - dstTexD->dxgiFormat); + const UINT resolveCount = colorAtt.multiViewCount() >= 2 ? colorAtt.multiViewCount() : 1; + for (UINT resolveIdx = 0; resolveIdx < resolveCount; ++resolveIdx) { + const UINT srcSubresource = calcSubresource(0, UINT(colorAtt.layer()) + resolveIdx, 1); + const UINT dstSubresource = calcSubresource(UINT(colorAtt.resolveLevel()), + UINT(colorAtt.resolveLayer()) + resolveIdx, + dstTexD->mipLevelCount); + cbD->cmdList->ResolveSubresource(dstRes->resource, dstSubresource, + srcRes->resource, srcSubresource, + dstTexD->dxgiFormat); + } } - + if (rtTex->m_desc.depthResolveTexture()) + qWarning("Resolving multisample depth-stencil buffers is not supported with D3D"); } cbD->recordingPass = QD3D12CommandBuffer::NoPass; @@ -2041,6 +2238,36 @@ void QD3D12CpuDescriptorPool::release(const QD3D12Descriptor &descriptor, quint3 quint64(descriptor.cpuHandle.ptr)); } +bool QD3D12QueryHeap::create(ID3D12Device *device, + quint32 queryCount, + D3D12_QUERY_HEAP_TYPE heapType) +{ + capacity = queryCount; + + D3D12_QUERY_HEAP_DESC heapDesc = {}; + heapDesc.Type = heapType; + heapDesc.Count = capacity; + + HRESULT hr = device->CreateQueryHeap(&heapDesc, __uuidof(ID3D12QueryHeap), reinterpret_cast<void **>(&heap)); + if (FAILED(hr)) { + qWarning("Failed to create query heap: %s", qPrintable(QSystemError::windowsComString(hr))); + heap = nullptr; + capacity = 0; + return false; + } + + return true; +} + +void QD3D12QueryHeap::destroy() +{ + if (heap) { + heap->Release(); + heap = nullptr; + } + capacity = 0; +} + bool QD3D12StagingArea::create(QRhiD3D12 *rhi, quint32 capacity, D3D12_HEAP_TYPE heapType) { Q_ASSERT(heapType == D3D12_HEAP_TYPE_UPLOAD || heapType == D3D12_HEAP_TYPE_READBACK); @@ -2381,8 +2608,8 @@ static inline QPair<int, int> mapBinding(int binding, const QShader::NativeResou void QD3D12ShaderResourceVisitor::visit() { - for (int bindingIdx = 0, bindingCount = srb->sortedBindings.count(); bindingIdx != bindingCount; ++bindingIdx) { - const QRhiShaderResourceBinding &b(srb->sortedBindings[bindingIdx]); + for (int bindingIdx = 0, bindingCount = srb->m_bindings.count(); bindingIdx != bindingCount; ++bindingIdx) { + const QRhiShaderResourceBinding &b(srb->m_bindings[bindingIdx]); const QRhiShaderResourceBinding::Data *bd = QRhiImplementation::shaderResourceBindingData(b); for (int stageIdx = 0; stageIdx < stageCount; ++stageIdx) { @@ -2533,6 +2760,7 @@ bool QD3D12MipmapGenerator::create(QRhiD3D12 *rhiD) // b0 rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParams[0].Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC; // t0 descriptorRanges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; @@ -2879,24 +3107,18 @@ void QRhiD3D12::waitGpu() } } -DXGI_SAMPLE_DESC QRhiD3D12::effectiveSampleCount(int sampleCount, DXGI_FORMAT format) const +DXGI_SAMPLE_DESC QRhiD3D12::effectiveSampleDesc(int sampleCount, DXGI_FORMAT format) const { DXGI_SAMPLE_DESC desc; desc.Count = 1; desc.Quality = 0; - // Stay compatible with QSurfaceFormat and friends where samples == 0 means the same as 1. - int s = qBound(1, sampleCount, 64); - - if (!supportedSampleCounts().contains(s)) { - qWarning("Attempted to set unsupported sample count %d", sampleCount); - return desc; - } + const int s = effectiveSampleCount(sampleCount); if (s > 1) { D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msaaInfo = {}; msaaInfo.Format = format; - msaaInfo.SampleCount = s; + msaaInfo.SampleCount = UINT(s); if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msaaInfo, sizeof(msaaInfo)))) { if (msaaInfo.NumQualityLevels > 0) { desc.Count = UINT(s); @@ -2910,7 +3132,7 @@ DXGI_SAMPLE_DESC QRhiD3D12::effectiveSampleCount(int sampleCount, DXGI_FORMAT fo return desc; } -bool QRhiD3D12::startCommandListForCurrentFrameSlot(ID3D12GraphicsCommandList **cmdList) +bool QRhiD3D12::startCommandListForCurrentFrameSlot(ID3D12GraphicsCommandList1 **cmdList) { ID3D12CommandAllocator *cmdAlloc = cmdAllocators[currentFrameSlot]; if (!*cmdList) { @@ -2918,7 +3140,7 @@ bool QRhiD3D12::startCommandListForCurrentFrameSlot(ID3D12GraphicsCommandList ** D3D12_COMMAND_LIST_TYPE_DIRECT, cmdAlloc, nullptr, - __uuidof(ID3D12GraphicsCommandList), + __uuidof(ID3D12GraphicsCommandList1), reinterpret_cast<void **>(cmdList)); if (FAILED(hr)) { qWarning("Failed to create command list: %s", qPrintable(QSystemError::windowsComString(hr))); @@ -3076,18 +3298,42 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd for (int layer = 0, maxLayer = u.subresDesc.size(); layer < maxLayer; ++layer) { for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) { for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) { - const UINT subresource = calcSubresource(UINT(level), is3D ? 0u : UINT(layer), texD->mipLevelCount); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout; - UINT64 totalBytes = 0; - D3D12_RESOURCE_DESC desc = res->desc; - if (is3D) { - desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - desc.DepthOrArraySize = 1; + D3D12_SUBRESOURCE_FOOTPRINT footprint = {}; + footprint.Format = res->desc.Format; + footprint.Depth = 1; + quint32 totalBytes = 0; + + const QSize subresSize = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize) + : subresDesc.sourceSize(); + const QPoint srcPos = subresDesc.sourceTopLeft(); + QPoint dstPos = subresDesc.destinationTopLeft(); + + if (!subresDesc.image().isNull()) { + const QImage img = subresDesc.image(); + const int bpl = img.bytesPerLine(); + footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + totalBytes = footprint.RowPitch * img.height(); + } else if (!subresDesc.data().isEmpty() && isCompressedFormat(texD->m_format)) { + QSize blockDim; + quint32 bpl = 0; + compressedFormatInfo(texD->m_format, subresSize, &bpl, nullptr, &blockDim); + footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height(); + totalBytes = footprint.RowPitch * rowCount; + } else if (!subresDesc.data().isEmpty()) { + quint32 bpl = 0; + if (subresDesc.dataStride()) + bpl = subresDesc.dataStride(); + else + textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr); + footprint.RowPitch = aligned<UINT>(bpl, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + totalBytes = footprint.RowPitch * subresSize.height(); + } else { + qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level); + continue; } - dev->GetCopyableFootprints(&desc, subresource, 1, 0, - &layout, nullptr, nullptr, &totalBytes); - const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(quint32(totalBytes), 1); + const quint32 allocSize = QD3D12StagingArea::allocSizeForArray(totalBytes, 1); QD3D12StagingArea::Allocation stagingAlloc; if (smallStagingAreas[currentFrameSlot].remainingCapacity() >= allocSize) stagingAlloc = smallStagingAreas[currentFrameSlot].get(allocSize); @@ -3104,32 +3350,29 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd } } - const UINT requiredBytesPerLine = layout.Footprint.RowPitch; // multiple of 256 - const QSize subresSize = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize) - : subresDesc.sourceSize(); - const QPoint srcPos = subresDesc.sourceTopLeft(); - QPoint dstPos = subresDesc.destinationTopLeft(); - D3D12_TEXTURE_COPY_LOCATION dst; dst.pResource = res->resource; dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst.SubresourceIndex = subresource; + dst.SubresourceIndex = calcSubresource(UINT(level), is3D ? 0u : UINT(layer), texD->mipLevelCount); D3D12_TEXTURE_COPY_LOCATION src; src.pResource = stagingAlloc.buffer; src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; src.PlacedFootprint.Offset = stagingAlloc.bufferOffset; - src.PlacedFootprint.Footprint = layout.Footprint; D3D12_BOX srcBox; // back, right, bottom are exclusive if (!subresDesc.image().isNull()) { - QImage img = subresDesc.image(); + const QImage img = subresDesc.image(); const int bpc = qMax(1, img.depth() / 8); const int bpl = img.bytesPerLine(); QSize size = subresDesc.sourceSize().isEmpty() ? img.size() : subresDesc.sourceSize(); size.setWidth(qMin(size.width(), img.width() - srcPos.x())); size.setHeight(qMin(size.height(), img.height() - srcPos.y())); + + footprint.Width = size.width(); + footprint.Height = size.height(); + srcBox.left = 0; srcBox.top = 0; srcBox.right = UINT(size.width()); @@ -3140,7 +3383,7 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd const uchar *imgPtr = img.constBits(); const quint32 lineBytes = size.width() * bpc; for (int y = 0, h = size.height(); y < h; ++y) { - memcpy(stagingAlloc.p + y * requiredBytesPerLine, + memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + srcPos.x() * bpc + (y + srcPos.y()) * bpl, lineBytes); } @@ -3157,15 +3400,19 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd // width and height must be multiples of the block width and height srcBox.right = aligned(subresSize.width(), blockDim.width()); srcBox.bottom = aligned(subresSize.height(), blockDim.height()); + srcBox.front = 0; srcBox.back = 1; - const quint32 copyBytes = qMin(bpl, requiredBytesPerLine); + footprint.Width = aligned(subresSize.width(), blockDim.width()); + footprint.Height = aligned(subresSize.height(), blockDim.height()); + + const quint32 copyBytes = qMin(bpl, footprint.RowPitch); const QByteArray imgData = subresDesc.data(); const char *imgPtr = imgData.constData(); const int rowCount = aligned(subresSize.height(), blockDim.height()) / blockDim.height(); for (int y = 0; y < rowCount; ++y) - memcpy(stagingAlloc.p + y * requiredBytesPerLine, imgPtr + y * bpl, copyBytes); + memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes); } else if (!subresDesc.data().isEmpty()) { srcBox.left = 0; srcBox.top = 0; @@ -3174,24 +3421,24 @@ void QRhiD3D12::enqueueResourceUpdates(QD3D12CommandBuffer *cbD, QRhiResourceUpd srcBox.front = 0; srcBox.back = 1; + footprint.Width = subresSize.width(); + footprint.Height = subresSize.height(); + quint32 bpl = 0; if (subresDesc.dataStride()) bpl = subresDesc.dataStride(); else textureFormatInfo(texD->m_format, subresSize, &bpl, nullptr, nullptr); - const quint32 copyBytes = qMin(bpl, requiredBytesPerLine); + const quint32 copyBytes = qMin(bpl, footprint.RowPitch); const QByteArray data = subresDesc.data(); const char *imgPtr = data.constData(); for (int y = 0, h = subresSize.height(); y < h; ++y) - memcpy(stagingAlloc.p + y * requiredBytesPerLine, imgPtr + y * bpl, copyBytes); - } else { - qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level); - if (ownStagingArea.has_value()) - ownStagingArea->destroyWithDeferredRelease(&releaseQueue); - continue; + memcpy(stagingAlloc.p + y * footprint.RowPitch, imgPtr + y * bpl, copyBytes); } + src.PlacedFootprint.Footprint = footprint; + cbD->cmdList->CopyTextureRegion(&dst, UINT(dstPos.x()), UINT(dstPos.y()), @@ -3742,7 +3989,7 @@ bool QD3D12RenderBuffer::create() case QRhiRenderBuffer::Color: { dxgiFormat = toD3DTextureFormat(backingFormat(), {}); - sampleDesc = rhiD->effectiveSampleCount(m_sampleCount, dxgiFormat); + sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat); D3D12_RESOURCE_DESC resourceDesc = {}; resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; resourceDesc.Width = UINT64(m_pixelSize.width()); @@ -3783,7 +4030,7 @@ bool QD3D12RenderBuffer::create() case QRhiRenderBuffer::DepthStencil: { dxgiFormat = DS_FORMAT; - sampleDesc = rhiD->effectiveSampleCount(m_sampleCount, dxgiFormat); + sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat); D3D12_RESOURCE_DESC resourceDesc = {}; resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; resourceDesc.Width = UINT64(m_pixelSize.width()); @@ -3936,10 +4183,30 @@ bool QD3D12Texture::prepareCreate(QSize *adjustedSize) const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1) : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize); - QRHI_RES_RHI(QRhiD3D12); dxgiFormat = toD3DTextureFormat(m_format, m_flags); + if (isDepth) { + srvFormat = toD3DDepthTextureSRVFormat(m_format); + rtFormat = toD3DDepthTextureDSVFormat(m_format); + } else { + srvFormat = dxgiFormat; + rtFormat = dxgiFormat; + } + if (m_writeViewFormat.format != UnknownFormat) { + if (isDepth) + rtFormat = toD3DDepthTextureDSVFormat(m_writeViewFormat.format); + else + rtFormat = toD3DTextureFormat(m_writeViewFormat.format, m_writeViewFormat.srgb ? sRGB : Flags()); + } + if (m_readViewFormat.format != UnknownFormat) { + if (isDepth) + srvFormat = toD3DDepthTextureSRVFormat(m_readViewFormat.format); + else + srvFormat = toD3DTextureFormat(m_readViewFormat.format, m_readViewFormat.srgb ? sRGB : Flags()); + } + + QRHI_RES_RHI(QRhiD3D12); mipLevelCount = uint(hasMipMaps ? rhiD->q->mipLevelsForSize(size) : 1); - sampleDesc = rhiD->effectiveSampleCount(m_sampleCount, dxgiFormat); + sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, dxgiFormat); if (sampleDesc.Count > 1) { if (isCube) { qWarning("Cubemap texture cannot be multisample"); @@ -3996,14 +4263,13 @@ bool QD3D12Texture::prepareCreate(QSize *adjustedSize) bool QD3D12Texture::finishCreate() { QRHI_RES_RHI(QRhiD3D12); - const bool isDepth = isDepthTextureFormat(m_format); const bool isCube = m_flags.testFlag(CubeMap); const bool is3D = m_flags.testFlag(ThreeDimensional); const bool isArray = m_flags.testFlag(TextureArray); const bool is1D = m_flags.testFlag(OneDimensional); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Format = isDepth ? toD3DDepthTextureSRVFormat(m_format) : dxgiFormat; + srvDesc.Format = srvFormat; srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; if (isCube) { @@ -4093,7 +4359,7 @@ bool QD3D12Texture::create() bool needsOptimizedClearValueSpecified = false; UINT resourceFlags = 0; - if (m_flags.testFlag(RenderTarget)) { + if (m_flags.testFlag(RenderTarget) || sampleDesc.Count > 1) { if (isDepth) resourceFlags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; else @@ -4362,7 +4628,7 @@ QRhiRenderPassDescriptor *QD3D12TextureRenderTarget::newCompatibleRenderPassDesc QD3D12Texture *texD = QRHI_RES(QD3D12Texture, it->texture()); QD3D12RenderBuffer *rbD = QRHI_RES(QD3D12RenderBuffer, it->renderBuffer()); if (texD) - rpD->colorFormat[rpD->colorAttachmentCount] = texD->dxgiFormat; + rpD->colorFormat[rpD->colorAttachmentCount] = texD->rtFormat; else if (rbD) rpD->colorFormat[rpD->colorAttachmentCount] = rbD->dxgiFormat; rpD->colorAttachmentCount += 1; @@ -4410,19 +4676,21 @@ bool QD3D12TextureRenderTarget::create() qWarning("Could not look up texture handle for render target"); return false; } + const bool isMultiView = it->multiViewCount() >= 2; + UINT layerCount = isMultiView ? UINT(it->multiViewCount()) : 1; D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; - rtvDesc.Format = toD3DTextureFormat(texD->format(), texD->flags()); + rtvDesc.Format = texD->rtFormat; if (texD->flags().testFlag(QRhiTexture::CubeMap)) { rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; rtvDesc.Texture2DArray.MipSlice = UINT(colorAtt.level()); rtvDesc.Texture2DArray.FirstArraySlice = UINT(colorAtt.layer()); - rtvDesc.Texture2DArray.ArraySize = 1; + rtvDesc.Texture2DArray.ArraySize = layerCount; } else if (texD->flags().testFlag(QRhiTexture::OneDimensional)) { if (texD->flags().testFlag(QRhiTexture::TextureArray)) { rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY; rtvDesc.Texture1DArray.MipSlice = UINT(colorAtt.level()); rtvDesc.Texture1DArray.FirstArraySlice = UINT(colorAtt.layer()); - rtvDesc.Texture1DArray.ArraySize = 1; + rtvDesc.Texture1DArray.ArraySize = layerCount; } else { rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; rtvDesc.Texture1D.MipSlice = UINT(colorAtt.level()); @@ -4431,18 +4699,18 @@ bool QD3D12TextureRenderTarget::create() if (texD->sampleDesc.Count > 1) { rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; rtvDesc.Texture2DMSArray.FirstArraySlice = UINT(colorAtt.layer()); - rtvDesc.Texture2DMSArray.ArraySize = 1; + rtvDesc.Texture2DMSArray.ArraySize = layerCount; } else { rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; rtvDesc.Texture2DArray.MipSlice = UINT(colorAtt.level()); rtvDesc.Texture2DArray.FirstArraySlice = UINT(colorAtt.layer()); - rtvDesc.Texture2DArray.ArraySize = 1; + rtvDesc.Texture2DArray.ArraySize = layerCount; } } else if (texD->flags().testFlag(QRhiTexture::ThreeDimensional)) { rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; rtvDesc.Texture3D.MipSlice = UINT(colorAtt.level()); rtvDesc.Texture3D.FirstWSlice = UINT(colorAtt.layer()); - rtvDesc.Texture3D.WSize = 1; + rtvDesc.Texture3D.WSize = layerCount; } else { if (texD->sampleDesc.Count > 1) { rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; @@ -4485,9 +4753,30 @@ bool QD3D12TextureRenderTarget::create() return false; } D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; - dsvDesc.Format = toD3DDepthTextureDSVFormat(depthTexD->format()); + dsvDesc.Format = depthTexD->rtFormat; dsvDesc.ViewDimension = depthTexD->sampleDesc.Count > 1 ? D3D12_DSV_DIMENSION_TEXTURE2DMS : D3D12_DSV_DIMENSION_TEXTURE2D; + if (depthTexD->flags().testFlag(QRhiTexture::TextureArray)) { + if (depthTexD->sampleDesc.Count > 1) { + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY; + if (depthTexD->arrayRangeStart() >= 0 && depthTexD->arrayRangeLength() >= 0) { + dsvDesc.Texture2DMSArray.FirstArraySlice = UINT(depthTexD->arrayRangeStart()); + dsvDesc.Texture2DMSArray.ArraySize = UINT(depthTexD->arrayRangeLength()); + } else { + dsvDesc.Texture2DMSArray.FirstArraySlice = 0; + dsvDesc.Texture2DMSArray.ArraySize = UINT(qMax(0, depthTexD->arraySize())); + } + } else { + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY; + if (depthTexD->arrayRangeStart() >= 0 && depthTexD->arrayRangeLength() >= 0) { + dsvDesc.Texture2DArray.FirstArraySlice = UINT(depthTexD->arrayRangeStart()); + dsvDesc.Texture2DArray.ArraySize = UINT(depthTexD->arrayRangeLength()); + } else { + dsvDesc.Texture2DArray.FirstArraySlice = 0; + dsvDesc.Texture2DArray.ArraySize = UINT(qMax(0, depthTexD->arraySize())); + } + } + } dsv = rhiD->dsvPool.allocate(1); if (!dsv.isValid()) { qWarning("Failed to allocate DSV for texture render target"); @@ -4554,8 +4843,6 @@ QD3D12ShaderResourceBindings::~QD3D12ShaderResourceBindings() void QD3D12ShaderResourceBindings::destroy() { - sortedBindings.clear(); - QRHI_RES_RHI(QRhiD3D12); if (rhiD) rhiD->unregisterResource(this); @@ -4563,20 +4850,14 @@ void QD3D12ShaderResourceBindings::destroy() bool QD3D12ShaderResourceBindings::create() { - if (!sortedBindings.isEmpty()) - destroy(); - QRHI_RES_RHI(QRhiD3D12); if (!rhiD->sanityCheckShaderResourceBindings(this)) return false; rhiD->updateLayoutDesc(this); - std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings)); - std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan); - hasDynamicOffset = false; - for (const QRhiShaderResourceBinding &b : sortedBindings) { + for (const QRhiShaderResourceBinding &b : std::as_const(m_bindings)) { const QRhiShaderResourceBinding::Data *bd = QRhiImplementation::shaderResourceBindingData(b); if (bd->type == QRhiShaderResourceBinding::UniformBuffer && bd->u.ubuf.hasDynamicOffset) { hasDynamicOffset = true; @@ -4599,11 +4880,7 @@ bool QD3D12ShaderResourceBindings::create() void QD3D12ShaderResourceBindings::updateResources(UpdateFlags flags) { - sortedBindings.clear(); - std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings)); - if (!flags.testFlag(BindingsAreSorted)) - std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan); - + Q_UNUSED(flags); generation += 1; } @@ -4621,6 +4898,7 @@ void QD3D12ShaderResourceBindings::visitUniformBuffer(QD3D12Stage s, rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParam.ShaderVisibility = qd3d12_stageToVisibility(s); rootParam.Descriptor.ShaderRegister = shaderRegister; + rootParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC; visitorData.cbParams[s].append(rootParam); } @@ -4812,21 +5090,14 @@ QD3D12ObjectHandle QD3D12ShaderResourceBindings::createRootSignature(const QD3D1 return QD3D12RootSignature::addToPool(&rhiD->rootSignaturePool, rootSig); } -// For now we mirror exactly what's done in the D3D11 backend, meaning we use -// the old shader compiler (so like fxc, not dxc) to generate shader model 5.0 -// output. Some day this should be moved to the new compiler and DXIL. - -static pD3DCompile resolveD3DCompile() -{ - for (const wchar_t *libraryName : {L"D3DCompiler_47", L"D3DCompiler_43"}) { - QSystemLibrary library(libraryName); - if (library.load()) { - if (auto symbol = library.resolve("D3DCompile")) - return reinterpret_cast<pD3DCompile>(symbol); - } - } - return nullptr; -} +// For shader model < 6.0 we do the same as the D3D11 backend: use the old +// compiler (D3DCompile) to generate DXBC, just as qsb does (when -c is passed) +// by invoking fxc, not dxc. For SM >= 6.0 we have to use the new compiler and +// work with DXIL. And that involves IDxcCompiler and needs the presence of +// dxcompiler.dll and dxil.dll at runtime. Plus there's a chance we have +// ancient SDK headers when not using MSVC. So this is heavily optional, +// meaning support for dxc can be disabled both at build time (no dxcapi.h) and +// at run time (no DLLs). static inline void makeHlslTargetString(char target[7], const char stage[3], int version) { @@ -4841,9 +5112,139 @@ static inline void makeHlslTargetString(char target[7], const char stage[3], int target[6] = '\0'; } +enum class HlslCompileFlag +{ + WithDebugInfo = 0x01 +}; + +static QByteArray legacyCompile(const QShaderCode &hlslSource, const char *target, int flags, QString *error) +{ + static const pD3DCompile d3dCompile = QRhiD3D::resolveD3DCompile(); + if (!d3dCompile) { + qWarning("Unable to resolve function D3DCompile()"); + return QByteArray(); + } + + ID3DBlob *bytecode = nullptr; + ID3DBlob *errors = nullptr; + UINT d3dCompileFlags = 0; + if (flags & int(HlslCompileFlag::WithDebugInfo)) + d3dCompileFlags |= D3DCOMPILE_DEBUG; + + HRESULT hr = d3dCompile(hlslSource.shader().constData(), SIZE_T(hlslSource.shader().size()), + nullptr, nullptr, nullptr, + hlslSource.entryPoint().constData(), target, d3dCompileFlags, 0, &bytecode, &errors); + if (FAILED(hr) || !bytecode) { + qWarning("HLSL shader compilation failed: 0x%x", uint(hr)); + if (errors) { + *error = QString::fromUtf8(static_cast<const char *>(errors->GetBufferPointer()), + int(errors->GetBufferSize())); + errors->Release(); + } + return QByteArray(); + } + + QByteArray result; + result.resize(int(bytecode->GetBufferSize())); + memcpy(result.data(), bytecode->GetBufferPointer(), size_t(result.size())); + bytecode->Release(); + return result; +} + +#ifdef QRHI_D3D12_HAS_DXC + +#ifndef DXC_CP_UTF8 +#define DXC_CP_UTF8 65001 +#endif + +#ifndef DXC_ARG_DEBUG +#define DXC_ARG_DEBUG L"-Zi" +#endif + +static QByteArray dxcCompile(const QShaderCode &hlslSource, const char *target, int flags, QString *error) +{ + static std::pair<IDxcCompiler *, IDxcLibrary *> dxc = QRhiD3D::createDxcCompiler(); + IDxcCompiler *compiler = dxc.first; + if (!compiler) { + qWarning("Unable to instantiate IDxcCompiler. Likely no dxcompiler.dll and dxil.dll present. " + "Use windeployqt or try https://github.com/microsoft/DirectXShaderCompiler/releases"); + return QByteArray(); + } + IDxcLibrary *library = dxc.second; + if (!library) + return QByteArray(); + + IDxcBlobEncoding *sourceBlob = nullptr; + HRESULT hr = library->CreateBlobWithEncodingOnHeapCopy(hlslSource.shader().constData(), + UINT32(hlslSource.shader().size()), + DXC_CP_UTF8, + &sourceBlob); + if (FAILED(hr)) { + qWarning("Failed to create source blob for dxc: 0x%x (%s)", + uint(hr), + qPrintable(QSystemError::windowsComString(hr))); + return QByteArray(); + } + + const QString entryPointStr = QString::fromLatin1(hlslSource.entryPoint()); + const QString targetStr = QString::fromLatin1(target); + + QVarLengthArray<LPCWSTR, 4> argPtrs; + QString debugArg; + if (flags & int(HlslCompileFlag::WithDebugInfo)) { + debugArg = QString::fromUtf16(reinterpret_cast<const char16_t *>(DXC_ARG_DEBUG)); + argPtrs.append(reinterpret_cast<LPCWSTR>(debugArg.utf16())); + } + + IDxcOperationResult *result = nullptr; + hr = compiler->Compile(sourceBlob, + nullptr, + reinterpret_cast<LPCWSTR>(entryPointStr.utf16()), + reinterpret_cast<LPCWSTR>(targetStr.utf16()), + argPtrs.data(), argPtrs.count(), + nullptr, 0, + nullptr, + &result); + sourceBlob->Release(); + if (SUCCEEDED(hr)) + result->GetStatus(&hr); + if (FAILED(hr)) { + qWarning("HLSL shader compilation failed: 0x%x (%s)", + uint(hr), + qPrintable(QSystemError::windowsComString(hr))); + if (result) { + IDxcBlobEncoding *errorsBlob = nullptr; + if (SUCCEEDED(result->GetErrorBuffer(&errorsBlob))) { + if (errorsBlob) { + *error = QString::fromUtf8(static_cast<const char *>(errorsBlob->GetBufferPointer()), + int(errorsBlob->GetBufferSize())); + errorsBlob->Release(); + } + } + } + return QByteArray(); + } + + IDxcBlob *bytecode = nullptr; + if FAILED(result->GetResult(&bytecode)) { + qWarning("No result from IDxcCompiler: 0x%x (%s)", + uint(hr), + qPrintable(QSystemError::windowsComString(hr))); + return QByteArray(); + } + + QByteArray ba; + ba.resize(int(bytecode->GetBufferSize())); + memcpy(ba.data(), bytecode->GetBufferPointer(), size_t(ba.size())); + bytecode->Release(); + return ba; +} + +#endif // QRHI_D3D12_HAS_DXC + static QByteArray compileHlslShaderSource(const QShader &shader, QShader::Variant shaderVariant, - UINT flags, + int flags, QString *error, QShaderKey *usedShaderKey) { @@ -4900,33 +5301,17 @@ static QByteArray compileHlslShaderSource(const QShader &shader, break; } - static const pD3DCompile d3dCompile = resolveD3DCompile(); - if (!d3dCompile) { - qWarning("Unable to resolve function D3DCompile()"); - return QByteArray(); - } - - ID3DBlob *bytecode = nullptr; - ID3DBlob *errors = nullptr; - HRESULT hr = d3dCompile(hlslSource.shader().constData(), SIZE_T(hlslSource.shader().size()), - nullptr, nullptr, nullptr, - hlslSource.entryPoint().constData(), target, flags, 0, &bytecode, &errors); - if (FAILED(hr) || !bytecode) { - qWarning("HLSL shader compilation failed: 0x%x", uint(hr)); - if (errors) { - *error = QString::fromUtf8(static_cast<const char *>(errors->GetBufferPointer()), - int(errors->GetBufferSize())); - errors->Release(); - } - return QByteArray(); + if (key.sourceVersion().version() >= 60) { +#ifdef QRHI_D3D12_HAS_DXC + return dxcCompile(hlslSource, target, flags, error); +#else + qWarning("Attempted to runtime-compile HLSL source code for shader model >= 6.0 " + "but the Qt build has no support for DXC. " + "Rebuild Qt with a recent Windows SDK or switch to an MSVC build."); +#endif } - QByteArray result; - result.resize(int(bytecode->GetBufferSize())); - memcpy(result.data(), bytecode->GetBufferPointer(), size_t(result.size())); - bytecode->Release(); - - return result; + return legacyCompile(hlslSource, target, flags, error); } static inline UINT8 toD3DColorWriteMask(QRhiGraphicsPipeline::ColorMask c) @@ -5161,6 +5546,22 @@ static inline DXGI_FORMAT toD3DAttributeFormat(QRhiVertexInputAttribute::Format return DXGI_FORMAT_R16G16_FLOAT; case QRhiVertexInputAttribute::Half: return DXGI_FORMAT_R16_FLOAT; + case QRhiVertexInputAttribute::UShort4: + // Note: D3D does not support UShort3. Pass through UShort3 as UShort4. + case QRhiVertexInputAttribute::UShort3: + return DXGI_FORMAT_R16G16B16A16_UINT; + case QRhiVertexInputAttribute::UShort2: + return DXGI_FORMAT_R16G16_UINT; + case QRhiVertexInputAttribute::UShort: + return DXGI_FORMAT_R16_UINT; + case QRhiVertexInputAttribute::SShort4: + // Note: D3D does not support SShort3. Pass through SShort3 as SShort4. + case QRhiVertexInputAttribute::SShort3: + return DXGI_FORMAT_R16G16B16A16_SINT; + case QRhiVertexInputAttribute::SShort2: + return DXGI_FORMAT_R16G16_SINT; + case QRhiVertexInputAttribute::SShort: + return DXGI_FORMAT_R16_SINT; } Q_UNREACHABLE_RETURN(DXGI_FORMAT_R32G32B32A32_FLOAT); } @@ -5216,16 +5617,16 @@ bool QD3D12GraphicsPipeline::create() } else { QString error; QShaderKey shaderKey; - UINT compileFlags = 0; + int compileFlags = 0; if (m_flags.testFlag(CompileShadersWithDebugInfo)) - compileFlags |= D3DCOMPILE_DEBUG; + compileFlags |= int(HlslCompileFlag::WithDebugInfo); const QByteArray bytecode = compileHlslShaderSource(shaderStage.shader(), shaderStage.shaderVariant(), compileFlags, &error, &shaderKey); if (bytecode.isEmpty()) { - qWarning("HLSL compute shader compilation failed: %s", qPrintable(error)); + qWarning("HLSL graphics shader compilation failed: %s", qPrintable(error)); return false; } @@ -5253,32 +5654,94 @@ bool QD3D12GraphicsPipeline::create() } QD3D12RenderPassDescriptor *rpD = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc); - const DXGI_SAMPLE_DESC sampleDesc = rhiD->effectiveSampleCount(m_sampleCount, DXGI_FORMAT(rpD->colorFormat[0])); + const DXGI_SAMPLE_DESC sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, DXGI_FORMAT(rpD->colorFormat[0])); + + struct { + QD3D12PipelineStateSubObject<ID3D12RootSignature *, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> rootSig; + QD3D12PipelineStateSubObject<D3D12_INPUT_LAYOUT_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT> inputLayout; + QD3D12PipelineStateSubObject<D3D12_PRIMITIVE_TOPOLOGY_TYPE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY> primitiveTopology; + QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS> VS; + QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS> HS; + QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS> DS; + QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS> GS; + QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS> PS; + QD3D12PipelineStateSubObject<D3D12_RASTERIZER_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER> rasterizerState; + QD3D12PipelineStateSubObject<D3D12_DEPTH_STENCIL_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL> depthStencilState; + QD3D12PipelineStateSubObject<D3D12_BLEND_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND> blendState; + QD3D12PipelineStateSubObject<D3D12_RT_FORMAT_ARRAY, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS> rtFormats; + QD3D12PipelineStateSubObject<DXGI_FORMAT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT> dsFormat; + QD3D12PipelineStateSubObject<DXGI_SAMPLE_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC> sampleDesc; + QD3D12PipelineStateSubObject<UINT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK> sampleMask; + QD3D12PipelineStateSubObject<D3D12_VIEW_INSTANCING_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING> viewInstancingDesc; + } stream; + + stream.rootSig.object = rootSig; + + QVarLengthArray<D3D12_INPUT_ELEMENT_DESC, 4> inputDescs; + QByteArrayList matrixSliceSemantics; + if (!shaderBytecode[VS].isEmpty()) { + for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes(); + it != itEnd; ++it) + { + D3D12_INPUT_ELEMENT_DESC desc = {}; + // The output from SPIRV-Cross uses TEXCOORD<location> as the + // semantic, except for matrices that are unrolled into consecutive + // vec2/3/4s attributes and need TEXCOORD<location>_ as + // SemanticName and row/column index as SemanticIndex. + const int matrixSlice = it->matrixSlice(); + if (matrixSlice < 0) { + desc.SemanticName = "TEXCOORD"; + desc.SemanticIndex = UINT(it->location()); + } else { + QByteArray sem; + sem.resize(16); + qsnprintf(sem.data(), sem.size(), "TEXCOORD%d_", it->location() - matrixSlice); + matrixSliceSemantics.append(sem); + desc.SemanticName = matrixSliceSemantics.last().constData(); + desc.SemanticIndex = UINT(matrixSlice); + } + desc.Format = toD3DAttributeFormat(it->format()); + desc.InputSlot = UINT(it->binding()); + desc.AlignedByteOffset = it->offset(); + const QRhiVertexInputBinding *inputBinding = m_vertexInputLayout.bindingAt(it->binding()); + if (inputBinding->classification() == QRhiVertexInputBinding::PerInstance) { + desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + desc.InstanceDataStepRate = inputBinding->instanceStepRate(); + } else { + desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + } + inputDescs.append(desc); + } + } + + stream.inputLayout.object.NumElements = inputDescs.count(); + stream.inputLayout.object.pInputElementDescs = inputDescs.isEmpty() ? nullptr : inputDescs.constData(); + + stream.primitiveTopology.object = toD3DTopologyType(m_topology); + topology = toD3DTopology(m_topology, m_patchControlPointCount); - D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; - psoDesc.pRootSignature = rootSig; for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { const int d3dStage = qd3d12_stage(shaderStage.type()); switch (d3dStage) { case VS: - psoDesc.VS.pShaderBytecode = shaderBytecode[d3dStage].constData(); - psoDesc.VS.BytecodeLength = shaderBytecode[d3dStage].size(); + stream.VS.object.pShaderBytecode = shaderBytecode[d3dStage].constData(); + stream.VS.object.BytecodeLength = shaderBytecode[d3dStage].size(); break; case HS: - psoDesc.HS.pShaderBytecode = shaderBytecode[d3dStage].constData(); - psoDesc.HS.BytecodeLength = shaderBytecode[d3dStage].size(); + stream.HS.object.pShaderBytecode = shaderBytecode[d3dStage].constData(); + stream.HS.object.BytecodeLength = shaderBytecode[d3dStage].size(); break; case DS: - psoDesc.DS.pShaderBytecode = shaderBytecode[d3dStage].constData(); - psoDesc.DS.BytecodeLength = shaderBytecode[d3dStage].size(); + stream.DS.object.pShaderBytecode = shaderBytecode[d3dStage].constData(); + stream.DS.object.BytecodeLength = shaderBytecode[d3dStage].size(); break; case GS: - psoDesc.GS.pShaderBytecode = shaderBytecode[d3dStage].constData(); - psoDesc.GS.BytecodeLength = shaderBytecode[d3dStage].size(); + stream.GS.object.pShaderBytecode = shaderBytecode[d3dStage].constData(); + stream.GS.object.BytecodeLength = shaderBytecode[d3dStage].size(); break; case PS: - psoDesc.PS.pShaderBytecode = shaderBytecode[d3dStage].constData(); - psoDesc.PS.BytecodeLength = shaderBytecode[d3dStage].size(); + stream.PS.object.pShaderBytecode = shaderBytecode[d3dStage].constData(); + stream.PS.object.BytecodeLength = shaderBytecode[d3dStage].size(); break; default: Q_UNREACHABLE(); @@ -5286,7 +5749,32 @@ bool QD3D12GraphicsPipeline::create() } } - psoDesc.BlendState.IndependentBlendEnable = m_targetBlends.count() > 1; + stream.rasterizerState.object.FillMode = toD3DFillMode(m_polygonMode); + stream.rasterizerState.object.CullMode = toD3DCullMode(m_cullMode); + stream.rasterizerState.object.FrontCounterClockwise = m_frontFace == CCW; + stream.rasterizerState.object.DepthBias = m_depthBias; + stream.rasterizerState.object.SlopeScaledDepthBias = m_slopeScaledDepthBias; + stream.rasterizerState.object.DepthClipEnable = TRUE; + stream.rasterizerState.object.MultisampleEnable = sampleDesc.Count > 1; + + stream.depthStencilState.object.DepthEnable = m_depthTest; + stream.depthStencilState.object.DepthWriteMask = m_depthWrite ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + stream.depthStencilState.object.DepthFunc = toD3DCompareOp(m_depthOp); + stream.depthStencilState.object.StencilEnable = m_stencilTest; + if (m_stencilTest) { + stream.depthStencilState.object.StencilReadMask = UINT8(m_stencilReadMask); + stream.depthStencilState.object.StencilWriteMask = UINT8(m_stencilWriteMask); + stream.depthStencilState.object.FrontFace.StencilFailOp = toD3DStencilOp(m_stencilFront.failOp); + stream.depthStencilState.object.FrontFace.StencilDepthFailOp = toD3DStencilOp(m_stencilFront.depthFailOp); + stream.depthStencilState.object.FrontFace.StencilPassOp = toD3DStencilOp(m_stencilFront.passOp); + stream.depthStencilState.object.FrontFace.StencilFunc = toD3DCompareOp(m_stencilFront.compareOp); + stream.depthStencilState.object.BackFace.StencilFailOp = toD3DStencilOp(m_stencilBack.failOp); + stream.depthStencilState.object.BackFace.StencilDepthFailOp = toD3DStencilOp(m_stencilBack.depthFailOp); + stream.depthStencilState.object.BackFace.StencilPassOp = toD3DStencilOp(m_stencilBack.passOp); + stream.depthStencilState.object.BackFace.StencilFunc = toD3DCompareOp(m_stencilBack.compareOp); + } + + stream.blendState.object.IndependentBlendEnable = m_targetBlends.count() > 1; for (int i = 0, ie = m_targetBlends.count(); i != ie; ++i) { const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[i]); D3D12_RENDER_TARGET_BLEND_DESC blend = {}; @@ -5298,95 +5786,40 @@ bool QD3D12GraphicsPipeline::create() blend.DestBlendAlpha = toD3DBlendFactor(b.dstAlpha, false); blend.BlendOpAlpha = toD3DBlendOp(b.opAlpha); blend.RenderTargetWriteMask = toD3DColorWriteMask(b.colorWrite); - psoDesc.BlendState.RenderTarget[i] = blend; + stream.blendState.object.RenderTarget[i] = blend; } if (m_targetBlends.isEmpty()) { D3D12_RENDER_TARGET_BLEND_DESC blend = {}; blend.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - psoDesc.BlendState.RenderTarget[0] = blend; + stream.blendState.object.RenderTarget[0] = blend; } - psoDesc.SampleMask = 0xFFFFFFFF; + stream.rtFormats.object.NumRenderTargets = rpD->colorAttachmentCount; + for (int i = 0; i < rpD->colorAttachmentCount; ++i) + stream.rtFormats.object.RTFormats[i] = DXGI_FORMAT(rpD->colorFormat[i]); - psoDesc.RasterizerState.FillMode = toD3DFillMode(m_polygonMode); - psoDesc.RasterizerState.CullMode = toD3DCullMode(m_cullMode); - psoDesc.RasterizerState.FrontCounterClockwise = m_frontFace == CCW; - psoDesc.RasterizerState.DepthBias = m_depthBias; - psoDesc.RasterizerState.SlopeScaledDepthBias = m_slopeScaledDepthBias; - psoDesc.RasterizerState.DepthClipEnable = TRUE; - psoDesc.RasterizerState.MultisampleEnable = sampleDesc.Count > 1; + stream.dsFormat.object = rpD->hasDepthStencil ? DXGI_FORMAT(rpD->dsFormat) : DXGI_FORMAT_UNKNOWN; - psoDesc.DepthStencilState.DepthEnable = m_depthTest; - psoDesc.DepthStencilState.DepthWriteMask = m_depthWrite ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; - psoDesc.DepthStencilState.DepthFunc = toD3DCompareOp(m_depthOp); - psoDesc.DepthStencilState.StencilEnable = m_stencilTest; - if (m_stencilTest) { - psoDesc.DepthStencilState.StencilReadMask = UINT8(m_stencilReadMask); - psoDesc.DepthStencilState.StencilWriteMask = UINT8(m_stencilWriteMask); - psoDesc.DepthStencilState.FrontFace.StencilFailOp = toD3DStencilOp(m_stencilFront.failOp); - psoDesc.DepthStencilState.FrontFace.StencilDepthFailOp = toD3DStencilOp(m_stencilFront.depthFailOp); - psoDesc.DepthStencilState.FrontFace.StencilPassOp = toD3DStencilOp(m_stencilFront.passOp); - psoDesc.DepthStencilState.FrontFace.StencilFunc = toD3DCompareOp(m_stencilFront.compareOp); - psoDesc.DepthStencilState.BackFace.StencilFailOp = toD3DStencilOp(m_stencilBack.failOp); - psoDesc.DepthStencilState.BackFace.StencilDepthFailOp = toD3DStencilOp(m_stencilBack.depthFailOp); - psoDesc.DepthStencilState.BackFace.StencilPassOp = toD3DStencilOp(m_stencilBack.passOp); - psoDesc.DepthStencilState.BackFace.StencilFunc = toD3DCompareOp(m_stencilBack.compareOp); - } + stream.sampleDesc.object = sampleDesc; - QVarLengthArray<D3D12_INPUT_ELEMENT_DESC, 4> inputDescs; - QByteArrayList matrixSliceSemantics; - if (!shaderBytecode[VS].isEmpty()) { - for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes(); - it != itEnd; ++it) - { - D3D12_INPUT_ELEMENT_DESC desc = {}; - // The output from SPIRV-Cross uses TEXCOORD<location> as the - // semantic, except for matrices that are unrolled into consecutive - // vec2/3/4s attributes and need TEXCOORD<location>_ as - // SemanticName and row/column index as SemanticIndex. - const int matrixSlice = it->matrixSlice(); - if (matrixSlice < 0) { - desc.SemanticName = "TEXCOORD"; - desc.SemanticIndex = UINT(it->location()); - } else { - QByteArray sem; - sem.resize(16); - qsnprintf(sem.data(), sem.size(), "TEXCOORD%d_", it->location() - matrixSlice); - matrixSliceSemantics.append(sem); - desc.SemanticName = matrixSliceSemantics.last().constData(); - desc.SemanticIndex = UINT(matrixSlice); - } - desc.Format = toD3DAttributeFormat(it->format()); - desc.InputSlot = UINT(it->binding()); - desc.AlignedByteOffset = it->offset(); - const QRhiVertexInputBinding *inputBinding = m_vertexInputLayout.bindingAt(it->binding()); - if (inputBinding->classification() == QRhiVertexInputBinding::PerInstance) { - desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; - desc.InstanceDataStepRate = inputBinding->instanceStepRate(); - } else { - desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - } - inputDescs.append(desc); + stream.sampleMask.object = 0xFFFFFFFF; + + viewInstanceMask = 0; + const bool isMultiView = m_multiViewCount >= 2; + stream.viewInstancingDesc.object.ViewInstanceCount = isMultiView ? m_multiViewCount : 0; + QVarLengthArray<D3D12_VIEW_INSTANCE_LOCATION, 4> viewInstanceLocations; + if (isMultiView) { + for (int i = 0; i < m_multiViewCount; ++i) { + viewInstanceMask |= (1 << i); + viewInstanceLocations.append({ 0, UINT(i) }); } - } - if (!inputDescs.isEmpty()) { - psoDesc.InputLayout.pInputElementDescs = inputDescs.constData(); - psoDesc.InputLayout.NumElements = inputDescs.count(); + stream.viewInstancingDesc.object.pViewInstanceLocations = viewInstanceLocations.constData(); } - psoDesc.PrimitiveTopologyType = toD3DTopologyType(m_topology); - topology = toD3DTopology(m_topology, m_patchControlPointCount); - - psoDesc.NumRenderTargets = rpD->colorAttachmentCount; - for (int i = 0; i < rpD->colorAttachmentCount; ++i) - psoDesc.RTVFormats[i] = DXGI_FORMAT(rpD->colorFormat[i]); - psoDesc.DSVFormat = rpD->hasDepthStencil ? DXGI_FORMAT(rpD->dsFormat) : DXGI_FORMAT_UNKNOWN; - psoDesc.SampleDesc = sampleDesc; + const D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = { sizeof(stream), &stream }; ID3D12PipelineState *pso = nullptr; - HRESULT hr = rhiD->dev->CreateGraphicsPipelineState(&psoDesc, - __uuidof(ID3D12PipelineState), - reinterpret_cast<void **>(&pso)); + HRESULT hr = rhiD->dev->CreatePipelineState(&streamDesc, __uuidof(ID3D12PipelineState), reinterpret_cast<void **>(&pso)); if (FAILED(hr)) { qWarning("Failed to create graphics pipeline state: %s", qPrintable(QSystemError::windowsComString(hr))); @@ -5450,9 +5883,9 @@ bool QD3D12ComputePipeline::create() } else { QString error; QShaderKey shaderKey; - UINT compileFlags = 0; + int compileFlags = 0; if (m_flags.testFlag(CompileShadersWithDebugInfo)) - compileFlags |= D3DCOMPILE_DEBUG; + compileFlags |= int(HlslCompileFlag::WithDebugInfo); const QByteArray bytecode = compileHlslShaderSource(m_shaderStage.shader(), m_shaderStage.shaderVariant(), compileFlags, @@ -5485,14 +5918,16 @@ bool QD3D12ComputePipeline::create() return false; } - D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; - psoDesc.pRootSignature = rootSig; - psoDesc.CS.pShaderBytecode = shaderBytecode.constData(); - psoDesc.CS.BytecodeLength = shaderBytecode.size(); + struct { + QD3D12PipelineStateSubObject<ID3D12RootSignature *, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> rootSig; + QD3D12PipelineStateSubObject<D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS> CS; + } stream; + stream.rootSig.object = rootSig; + stream.CS.object.pShaderBytecode = shaderBytecode.constData(); + stream.CS.object.BytecodeLength = shaderBytecode.size(); + const D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = { sizeof(stream), &stream }; ID3D12PipelineState *pso = nullptr; - HRESULT hr = rhiD->dev->CreateComputePipelineState(&psoDesc, - __uuidof(ID3D12PipelineState), - reinterpret_cast<void **>(&pso)); + HRESULT hr = rhiD->dev->CreatePipelineState(&streamDesc, __uuidof(ID3D12PipelineState), reinterpret_cast<void **>(&pso)); if (FAILED(hr)) { qWarning("Failed to create compute pipeline state: %s", qPrintable(QSystemError::windowsComString(hr))); @@ -5644,6 +6079,7 @@ int QD3D12SwapChainRenderTarget::sampleCount() const QD3D12SwapChain::QD3D12SwapChain(QRhiImplementation *rhi) : QRhiSwapChain(rhi), rtWrapper(rhi, this), + rtWrapperRight(rhi, this), cbWrapper(rhi) { } @@ -5700,6 +6136,8 @@ void QD3D12SwapChain::releaseBuffers() for (UINT i = 0; i < BUFFER_COUNT; ++i) { rhiD->resourcePool.remove(colorBuffers[i]); rhiD->rtvPool.release(rtvs[i], 1); + if (stereo) + rhiD->rtvPool.release(rtvsRight[i], 1); if (!msaaBuffers[i].isNull()) rhiD->resourcePool.remove(msaaBuffers[i]); if (msaaRtvs[i].isValid()) @@ -5734,48 +6172,15 @@ QRhiRenderTarget *QD3D12SwapChain::currentFrameRenderTarget() return &rtWrapper; } -QSize QD3D12SwapChain::surfacePixelSize() +QRhiRenderTarget *QD3D12SwapChain::currentFrameRenderTarget(StereoTargetBuffer targetBuffer) { - Q_ASSERT(m_window); - return m_window->size() * m_window->devicePixelRatio(); + return !stereo || targetBuffer == StereoTargetBuffer::LeftBuffer ? &rtWrapper : &rtWrapperRight; } -static bool output6ForWindow(QWindow *w, IDXGIAdapter1 *adapter, IDXGIOutput6 **result) -{ - bool ok = false; - QRect wr = w->geometry(); - wr = QRect(wr.topLeft() * w->devicePixelRatio(), wr.size() * w->devicePixelRatio()); - const QPoint center = wr.center(); - IDXGIOutput *currentOutput = nullptr; - IDXGIOutput *output = nullptr; - for (UINT i = 0; adapter->EnumOutputs(i, &output) != DXGI_ERROR_NOT_FOUND; ++i) { - DXGI_OUTPUT_DESC desc; - output->GetDesc(&desc); - const RECT r = desc.DesktopCoordinates; - const QRect dr(QPoint(r.left, r.top), QPoint(r.right - 1, r.bottom - 1)); - if (dr.contains(center)) { - currentOutput = output; - break; - } else { - output->Release(); - } - } - if (currentOutput) { - ok = SUCCEEDED(currentOutput->QueryInterface(__uuidof(IDXGIOutput6), reinterpret_cast<void **>(result))); - currentOutput->Release(); - } - return ok; -} - -static bool outputDesc1ForWindow(QWindow *w, IDXGIAdapter1 *adapter, DXGI_OUTPUT_DESC1 *result) +QSize QD3D12SwapChain::surfacePixelSize() { - bool ok = false; - IDXGIOutput6 *out6 = nullptr; - if (output6ForWindow(w, adapter, &out6)) { - ok = SUCCEEDED(out6->GetDesc1(result)); - out6->Release(); - } - return ok; + Q_ASSERT(m_window); + return m_window->size() * m_window->devicePixelRatio(); } bool QD3D12SwapChain::isFormatSupported(Format f) @@ -5790,8 +6195,10 @@ bool QD3D12SwapChain::isFormatSupported(Format f) QRHI_RES_RHI(QRhiD3D12); DXGI_OUTPUT_DESC1 desc1; - if (outputDesc1ForWindow(m_window, rhiD->activeAdapter, &desc1)) - return desc1.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; + if (QRhiD3D::outputDesc1ForWindow(m_window, rhiD->activeAdapter, &desc1)) { + if (desc1.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020) + return f == QRhiSwapChain::HDRExtendedSrgbLinear || f == QRhiSwapChain::HDR10; + } return false; } @@ -5799,14 +6206,16 @@ bool QD3D12SwapChain::isFormatSupported(Format f) QRhiSwapChainHdrInfo QD3D12SwapChain::hdrInfo() { QRhiSwapChainHdrInfo info = QRhiSwapChain::hdrInfo(); - if (m_format != QRhiSwapChain::SDR && m_window) { + // Must use m_window, not window, given this may be called before createOrResize(). + if (m_window) { QRHI_RES_RHI(QRhiD3D12); DXGI_OUTPUT_DESC1 hdrOutputDesc; - if (outputDesc1ForWindow(m_window, rhiD->activeAdapter, &hdrOutputDesc)) { - info.isHardCodedDefaults = false; + if (QRhiD3D::outputDesc1ForWindow(m_window, rhiD->activeAdapter, &hdrOutputDesc)) { info.limitsType = QRhiSwapChainHdrInfo::LuminanceInNits; info.limits.luminanceInNits.minLuminance = hdrOutputDesc.MinLuminance; info.limits.luminanceInNits.maxLuminance = hdrOutputDesc.MaxLuminance; + info.luminanceBehavior = QRhiSwapChainHdrInfo::SceneReferred; // 1.0 = 80 nits + info.sdrWhiteLevel = QRhiD3D::sdrWhiteLevelInNits(hdrOutputDesc); } } return info; @@ -5829,25 +6238,19 @@ QRhiRenderPassDescriptor *QD3D12SwapChain::newCompatibleRenderPassDescriptor() return rpD; } -static const DXGI_FORMAT DEFAULT_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM; -static const DXGI_FORMAT DEFAULT_SRGB_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; - bool QRhiD3D12::ensureDirectCompositionDevice() { if (dcompDevice) return true; qCDebug(QRHI_LOG_INFO, "Creating Direct Composition device (needed for semi-transparent windows)"); - - HRESULT hr = DCompositionCreateDevice(nullptr, __uuidof(IDCompositionDevice), reinterpret_cast<void **>(&dcompDevice)); - if (FAILED(hr)) { - qWarning("Failed to Direct Composition device: %s", qPrintable(QSystemError::windowsComString(hr))); - return false; - } - - return true; + dcompDevice = QRhiD3D::createDirectCompositionDevice(); + return dcompDevice ? true : false; } +static const DXGI_FORMAT DEFAULT_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM; +static const DXGI_FORMAT DEFAULT_SRGB_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + void QD3D12SwapChain::chooseFormats() { colorFormat = DEFAULT_FORMAT; @@ -5855,7 +6258,7 @@ void QD3D12SwapChain::chooseFormats() hdrColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; // SDR DXGI_OUTPUT_DESC1 hdrOutputDesc; QRHI_RES_RHI(QRhiD3D12); - if (outputDesc1ForWindow(m_window, rhiD->activeAdapter, &hdrOutputDesc) && m_format != SDR) { + if (QRhiD3D::outputDesc1ForWindow(m_window, rhiD->activeAdapter, &hdrOutputDesc) && m_format != SDR) { // https://docs.microsoft.com/en-us/windows/win32/direct3darticles/high-dynamic-range if (hdrOutputDesc.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020) { switch (m_format) { @@ -5880,7 +6283,7 @@ void QD3D12SwapChain::chooseFormats() "(or Use HDR is Off in the Display Settings), ignoring HDR format request"); } } - sampleDesc = rhiD->effectiveSampleCount(m_sampleCount, colorFormat); + sampleDesc = rhiD->effectiveSampleDesc(m_sampleCount, colorFormat); } bool QD3D12SwapChain::createOrResize() @@ -5905,13 +6308,14 @@ bool QD3D12SwapChain::createOrResize() HWND hwnd = reinterpret_cast<HWND>(window->winId()); HRESULT hr; QRHI_RES_RHI(QRhiD3D12); + stereo = m_window->format().stereo() && rhiD->dxgiFactory->IsWindowedStereoEnabled(); if (m_flags.testFlag(SurfaceHasPreMulAlpha) || m_flags.testFlag(SurfaceHasNonPreMulAlpha)) { if (rhiD->ensureDirectCompositionDevice()) { if (!dcompTarget) { - hr = rhiD->dcompDevice->CreateTargetForHwnd(hwnd, true, &dcompTarget); + hr = rhiD->dcompDevice->CreateTargetForHwnd(hwnd, false, &dcompTarget); if (FAILED(hr)) { - qWarning("Failed to create Direct Compsition target for the window: %s", + qWarning("Failed to create Direct Composition target for the window: %s", qPrintable(QSystemError::windowsComString(hr))); } } @@ -5947,6 +6351,7 @@ bool QD3D12SwapChain::createOrResize() desc.Flags = swapChainFlags; desc.Scaling = DXGI_SCALING_NONE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + desc.Stereo = stereo; if (dcompVisual) { // With DirectComposition setting AlphaMode to STRAIGHT fails the @@ -6001,13 +6406,19 @@ bool QD3D12SwapChain::createOrResize() qWarning("Failed to set content for Direct Composition visual: %s", qPrintable(QSystemError::windowsComString(hr))); } + } else { + // disable Alt+Enter; not relevant when using DirectComposition + rhiD->dxgiFactory->MakeWindowAssociation(hwnd, DXGI_MWA_NO_WINDOW_CHANGES); } } if (FAILED(hr)) { - qWarning("Failed to create D3D12 swapchain: %s", qPrintable(QSystemError::windowsComString(hr))); + qWarning("Failed to create D3D12 swapchain: %s" + " (Width=%u Height=%u Format=%u SampleCount=%u BufferCount=%u Scaling=%u SwapEffect=%u Stereo=%u)", + qPrintable(QSystemError::windowsComString(hr)), + desc.Width, desc.Height, UINT(desc.Format), desc.SampleDesc.Count, + desc.BufferCount, UINT(desc.Scaling), UINT(desc.SwapEffect), UINT(desc.Stereo)); return false; } - rhiD->dxgiFactory->MakeWindowAssociation(hwnd, DXGI_MWA_NO_WINDOW_CHANGES); for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i) { hr = rhiD->dev->CreateFence(0, @@ -6054,6 +6465,16 @@ bool QD3D12SwapChain::createOrResize() rtvDesc.Format = srgbAdjustedColorFormat; rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rhiD->dev->CreateRenderTargetView(colorBuffer, &rtvDesc, rtvs[i].cpuHandle); + + if (stereo) { + rtvsRight[i] = rhiD->rtvPool.allocate(1); + D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = srgbAdjustedColorFormat; + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + rtvDesc.Texture2DArray.ArraySize = 1; + rtvDesc.Texture2DArray.FirstArraySlice = 1; + rhiD->dev->CreateRenderTargetView(colorBuffer, &rtvDesc, rtvsRight[i].cpuHandle); + } } if (m_depthStencil && m_depthStencil->sampleCount() != m_sampleCount) { @@ -6126,6 +6547,15 @@ bool QD3D12SwapChain::createOrResize() rtD->d.colorAttCount = 1; rtD->d.dsAttCount = m_depthStencil ? 1 : 0; + rtWrapperRight.setRenderPassDescriptor(m_renderPassDesc); + QD3D12SwapChainRenderTarget *rtDr = QRHI_RES(QD3D12SwapChainRenderTarget, &rtWrapperRight); + rtDr->d.rp = QRHI_RES(QD3D12RenderPassDescriptor, m_renderPassDesc); + rtDr->d.pixelSize = pixelSize; + rtDr->d.dpr = float(window->devicePixelRatio()); + rtDr->d.sampleCount = int(sampleDesc.Count); + rtDr->d.colorAttCount = 1; + rtDr->d.dsAttCount = m_depthStencil ? 1 : 0; + if (needsRegistration) { rhiD->swapchains.insert(this); rhiD->registerResource(this); @@ -6135,3 +6565,5 @@ bool QD3D12SwapChain::createOrResize() } QT_END_NAMESPACE + +#endif // __ID3D12Device2_INTERFACE_DEFINED__ |