/**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtQuick module of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** Commercial License Usage ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in ** a written agreement between you and The Qt Company. For licensing terms ** and conditions see https://www.qt.io/terms-conditions. For further ** information use the contact form at https://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 3 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL3 included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 3 requirements ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. ** ** GNU General Public License Usage ** Alternatively, this file may be used under the terms of the GNU ** General Public License version 2.0 or (at your option) the GNU General ** Public license version 3 or any later version approved by the KDE Free ** Qt Foundation. The licenses are as published by the Free Software ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 ** included in the packaging of this file. Please review the following ** information to ensure the GNU General Public License requirements will ** be met: https://www.gnu.org/licenses/gpl-2.0.html and ** https://www.gnu.org/licenses/gpl-3.0.html. ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #include "qsgd3d12engine_p.h" #include "qsgd3d12engine_p_p.h" #include "cs_mipmapgen.hlslh" #include #include #include #include #include // Comment out to disable DeviceLossTester functionality in order to reduce // code size and improve startup perf a tiny bit. #define DEVLOSS_TEST #ifdef DEVLOSS_TEST #include "cs_tdr.hlslh" #endif #ifdef Q_OS_WINRT #include #include #include #include #endif #include QT_BEGIN_NAMESPACE // NOTE: Avoid categorized logging. It is slow. #define DECLARE_DEBUG_VAR(variable) \ static bool debug_ ## variable() \ { static bool value = qgetenv("QSG_RENDERER_DEBUG").contains(QT_STRINGIFY(variable)); return value; } DECLARE_DEBUG_VAR(render) DECLARE_DEBUG_VAR(descheap) DECLARE_DEBUG_VAR(buffer) DECLARE_DEBUG_VAR(texture) // Except for system info on startup. Q_LOGGING_CATEGORY(QSG_LOG_INFO_GENERAL, "qt.scenegraph.general") // Any changes to the defaults below must be reflected in adaptations.qdoc as // well and proven by qmlbench or similar. static const int DEFAULT_SWAP_CHAIN_BUFFER_COUNT = 3; static const int DEFAULT_FRAME_IN_FLIGHT_COUNT = 2; static const int DEFAULT_WAITABLE_SWAP_CHAIN_MAX_LATENCY = 0; static const int MAX_DRAW_CALLS_PER_LIST = 4096; static const int MAX_CACHED_ROOTSIG = 16; static const int MAX_CACHED_PSO = 64; static const int GPU_CBVSRVUAV_DESCRIPTORS = 512; static const DXGI_FORMAT RT_COLOR_FORMAT = DXGI_FORMAT_R8G8B8A8_UNORM; static const int BUCKETS_PER_HEAP = 8; // must match freeMap static const int DESCRIPTORS_PER_BUCKET = 32; // the bit map (freeMap) is quint32 static const int MAX_DESCRIPTORS_PER_HEAP = BUCKETS_PER_HEAP * DESCRIPTORS_PER_BUCKET; static QString comErrorMessage(HRESULT hr) { #ifndef Q_OS_WINRT const _com_error comError(hr); #else const _com_error comError(hr, nullptr); #endif QString result = QLatin1String("Error 0x") + QString::number(ulong(hr), 16); if (const wchar_t *msg = comError.ErrorMessage()) result += QLatin1String(": ") + QString::fromWCharArray(msg); return result; } D3D12_CPU_DESCRIPTOR_HANDLE QSGD3D12CPUDescriptorHeapManager::allocate(D3D12_DESCRIPTOR_HEAP_TYPE type) { D3D12_CPU_DESCRIPTOR_HANDLE h = {}; for (Heap &heap : m_heaps) { if (heap.type == type) { for (int bucket = 0; bucket < _countof(heap.freeMap); ++bucket) if (heap.freeMap[bucket]) { uint freePos = qCountTrailingZeroBits(heap.freeMap[bucket]); heap.freeMap[bucket] &= ~(1UL << freePos); if (Q_UNLIKELY(debug_descheap())) qDebug("descriptor handle heap %p type %x reserve in bucket %d index %d", &heap, type, bucket, freePos); freePos += bucket * DESCRIPTORS_PER_BUCKET; h = heap.start; h.ptr += freePos * heap.handleSize; return h; } } } Heap heap; heap.type = type; heap.handleSize = m_handleSizes[type]; D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; heapDesc.NumDescriptors = MAX_DESCRIPTORS_PER_HEAP; heapDesc.Type = type; // The heaps created here are _never_ shader-visible. HRESULT hr = m_device->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap.heap)); if (FAILED(hr)) { qWarning("Failed to create heap with type 0x%x: %s", type, qPrintable(comErrorMessage(hr))); return h; } heap.start = heap.heap->GetCPUDescriptorHandleForHeapStart(); if (Q_UNLIKELY(debug_descheap())) qDebug("new descriptor heap, type %x, start %llu", type, heap.start.ptr); heap.freeMap[0] = 0xFFFFFFFE; for (int i = 1; i < _countof(heap.freeMap); ++i) heap.freeMap[i] = 0xFFFFFFFF; h = heap.start; m_heaps.append(heap); return h; } void QSGD3D12CPUDescriptorHeapManager::release(D3D12_CPU_DESCRIPTOR_HANDLE handle, D3D12_DESCRIPTOR_HEAP_TYPE type) { for (Heap &heap : m_heaps) { if (heap.type == type && handle.ptr >= heap.start.ptr && handle.ptr < heap.start.ptr + heap.handleSize * MAX_DESCRIPTORS_PER_HEAP) { unsigned long pos = (handle.ptr - heap.start.ptr) / heap.handleSize; const int bucket = pos / DESCRIPTORS_PER_BUCKET; const int indexInBucket = pos - bucket * DESCRIPTORS_PER_BUCKET; heap.freeMap[bucket] |= 1UL << indexInBucket; if (Q_UNLIKELY(debug_descheap())) qDebug("free descriptor handle heap %p type %x bucket %d index %d", &heap, type, bucket, indexInBucket); return; } } qWarning("QSGD3D12CPUDescriptorHeapManager: Attempted to release untracked descriptor handle %llu of type %d", handle.ptr, type); } void QSGD3D12CPUDescriptorHeapManager::initialize(ID3D12Device *device) { m_device = device; for (int i = 0; i < D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES; ++i) m_handleSizes[i] = m_device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE(i)); } void QSGD3D12CPUDescriptorHeapManager::releaseResources() { for (Heap &heap : m_heaps) heap.heap = nullptr; m_heaps.clear(); m_device = nullptr; } // One device per process, one everything else (engine) per window. Q_GLOBAL_STATIC(QSGD3D12DeviceManager, deviceManager) static void getHardwareAdapter(IDXGIFactory1 *factory, IDXGIAdapter1 **outAdapter) { const D3D_FEATURE_LEVEL fl = D3D_FEATURE_LEVEL_11_0; ComPtr adapter; DXGI_ADAPTER_DESC1 desc; for (int adapterIndex = 0; factory->EnumAdapters1(adapterIndex, &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) { DXGI_ADAPTER_DESC1 desc; adapter->GetDesc1(&desc); const QString name = QString::fromUtf16((char16_t *) desc.Description); qCDebug(QSG_LOG_INFO_GENERAL, "Adapter %d: '%s' (flags 0x%x)", adapterIndex, qPrintable(name), desc.Flags); } if (qEnvironmentVariableIsSet("QT_D3D_ADAPTER_INDEX")) { const int adapterIndex = qEnvironmentVariableIntValue("QT_D3D_ADAPTER_INDEX"); if (SUCCEEDED(factory->EnumAdapters1(adapterIndex, &adapter))) { adapter->GetDesc1(&desc); const QString name = QString::fromUtf16((char16_t *) desc.Description); HRESULT hr = D3D12CreateDevice(adapter.Get(), fl, _uuidof(ID3D12Device), nullptr); if (SUCCEEDED(hr)) { qCDebug(QSG_LOG_INFO_GENERAL, "Using requested adapter '%s'", qPrintable(name)); *outAdapter = adapter.Detach(); return; } else { qWarning("Failed to create device for requested adapter '%s': %s", qPrintable(name), qPrintable(comErrorMessage(hr))); } } } for (int adapterIndex = 0; factory->EnumAdapters1(adapterIndex, &adapter) != DXGI_ERROR_NOT_FOUND; ++adapterIndex) { adapter->GetDesc1(&desc); if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) continue; if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), fl, _uuidof(ID3D12Device), nullptr))) { const QString name = QString::fromUtf16((char16_t *) desc.Description); qCDebug(QSG_LOG_INFO_GENERAL, "Using adapter '%s'", qPrintable(name)); break; } } *outAdapter = adapter.Detach(); } ID3D12Device *QSGD3D12DeviceManager::ref() { ensureCreated(); m_ref.ref(); return m_device.Get(); } void QSGD3D12DeviceManager::unref() { if (!m_ref.deref()) { if (Q_UNLIKELY(debug_render())) qDebug("destroying d3d device"); m_device = nullptr; m_factory = nullptr; } } void QSGD3D12DeviceManager::deviceLossDetected() { for (DeviceLossObserver *observer : qAsConst(m_observers)) observer->deviceLost(); // Nothing else to do here. All windows are expected to release their // resources and call unref() in response immediately. } IDXGIFactory4 *QSGD3D12DeviceManager::dxgi() { ensureCreated(); return m_factory.Get(); } void QSGD3D12DeviceManager::ensureCreated() { if (m_device) return; HRESULT hr = CreateDXGIFactory2(0, IID_PPV_ARGS(&m_factory)); if (FAILED(hr)) { qWarning("Failed to create DXGI: %s", qPrintable(comErrorMessage(hr))); return; } ComPtr adapter; getHardwareAdapter(m_factory.Get(), &adapter); bool warp = true; if (adapter) { HRESULT hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)); if (SUCCEEDED(hr)) warp = false; else qWarning("Failed to create device: %s", qPrintable(comErrorMessage(hr))); } if (warp) { qCDebug(QSG_LOG_INFO_GENERAL, "Using WARP"); m_factory->EnumWarpAdapter(IID_PPV_ARGS(&adapter)); HRESULT hr = D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)); if (FAILED(hr)) { qWarning("Failed to create WARP device: %s", qPrintable(comErrorMessage(hr))); return; } } ComPtr adapter3; if (SUCCEEDED(adapter.As(&adapter3))) { DXGI_QUERY_VIDEO_MEMORY_INFO vidMemInfo; if (SUCCEEDED(adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &vidMemInfo))) { qCDebug(QSG_LOG_INFO_GENERAL, "Video memory info: LOCAL: Budget %llu KB CurrentUsage %llu KB AvailableForReservation %llu KB CurrentReservation %llu KB", vidMemInfo.Budget / 1024, vidMemInfo.CurrentUsage / 1024, vidMemInfo.AvailableForReservation / 1024, vidMemInfo.CurrentReservation / 1024); } if (SUCCEEDED(adapter3->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &vidMemInfo))) { qCDebug(QSG_LOG_INFO_GENERAL, "Video memory info: NON-LOCAL: Budget %llu KB CurrentUsage %llu KB AvailableForReservation %llu KB CurrentReservation %llu KB", vidMemInfo.Budget / 1024, vidMemInfo.CurrentUsage / 1024, vidMemInfo.AvailableForReservation / 1024, vidMemInfo.CurrentReservation / 1024); } } } void QSGD3D12DeviceManager::registerDeviceLossObserver(DeviceLossObserver *observer) { if (!m_observers.contains(observer)) m_observers.append(observer); } QSGD3D12Engine::QSGD3D12Engine() { d = new QSGD3D12EnginePrivate; } QSGD3D12Engine::~QSGD3D12Engine() { d->waitGPU(); d->releaseResources(); delete d; } bool QSGD3D12Engine::attachToWindow(WId window, const QSize &size, float dpr, int surfaceFormatSamples, bool alpha) { if (d->isInitialized()) { qWarning("QSGD3D12Engine: Cannot attach active engine to window"); return false; } d->initialize(window, size, dpr, surfaceFormatSamples, alpha); return d->isInitialized(); } void QSGD3D12Engine::releaseResources() { d->releaseResources(); } bool QSGD3D12Engine::hasResources() const { // An explicit releaseResources() or a device loss results in initialized == false. return d->isInitialized(); } void QSGD3D12Engine::setWindowSize(const QSize &size, float dpr) { d->setWindowSize(size, dpr); } WId QSGD3D12Engine::window() const { return d->currentWindow(); } QSize QSGD3D12Engine::windowSize() const { return d->currentWindowSize(); } float QSGD3D12Engine::windowDevicePixelRatio() const { return d->currentWindowDpr(); } uint QSGD3D12Engine::windowSamples() const { return d->currentWindowSamples(); } void QSGD3D12Engine::beginFrame() { d->beginFrame(); } void QSGD3D12Engine::endFrame() { d->endFrame(); } void QSGD3D12Engine::beginLayer() { d->beginLayer(); } void QSGD3D12Engine::endLayer() { d->endLayer(); } void QSGD3D12Engine::invalidateCachedFrameState() { d->invalidateCachedFrameState(); } void QSGD3D12Engine::restoreFrameState(bool minimal) { d->restoreFrameState(minimal); } void QSGD3D12Engine::finalizePipeline(const QSGD3D12PipelineState &pipelineState) { d->finalizePipeline(pipelineState); } uint QSGD3D12Engine::genBuffer() { return d->genBuffer(); } void QSGD3D12Engine::releaseBuffer(uint id) { d->releaseBuffer(id); } void QSGD3D12Engine::resetBuffer(uint id, const quint8 *data, int size) { d->resetBuffer(id, data, size); } void QSGD3D12Engine::markBufferDirty(uint id, int offset, int size) { d->markBufferDirty(id, offset, size); } void QSGD3D12Engine::queueViewport(const QRect &rect) { d->queueViewport(rect); } void QSGD3D12Engine::queueScissor(const QRect &rect) { d->queueScissor(rect); } void QSGD3D12Engine::queueSetRenderTarget(uint id) { d->queueSetRenderTarget(id); } void QSGD3D12Engine::queueClearRenderTarget(const QColor &color) { d->queueClearRenderTarget(color); } void QSGD3D12Engine::queueClearDepthStencil(float depthValue, quint8 stencilValue, ClearFlags which) { d->queueClearDepthStencil(depthValue, stencilValue, which); } void QSGD3D12Engine::queueSetBlendFactor(const QVector4D &factor) { d->queueSetBlendFactor(factor); } void QSGD3D12Engine::queueSetStencilRef(quint32 ref) { d->queueSetStencilRef(ref); } void QSGD3D12Engine::queueDraw(const DrawParams ¶ms) { d->queueDraw(params); } void QSGD3D12Engine::present() { d->present(); } void QSGD3D12Engine::waitGPU() { d->waitGPU(); } uint QSGD3D12Engine::genTexture() { return d->genTexture(); } void QSGD3D12Engine::createTexture(uint id, const QSize &size, QImage::Format format, TextureCreateFlags flags) { d->createTexture(id, size, format, flags); } void QSGD3D12Engine::queueTextureResize(uint id, const QSize &size) { d->queueTextureResize(id, size); } void QSGD3D12Engine::queueTextureUpload(uint id, const QImage &image, const QPoint &dstPos, TextureUploadFlags flags) { d->queueTextureUpload(id, QVector() << image, QVector() << dstPos, flags); } void QSGD3D12Engine::queueTextureUpload(uint id, const QVector &images, const QVector &dstPos, TextureUploadFlags flags) { d->queueTextureUpload(id, images, dstPos, flags); } void QSGD3D12Engine::releaseTexture(uint id) { d->releaseTexture(id); } void QSGD3D12Engine::useTexture(uint id) { d->useTexture(id); } uint QSGD3D12Engine::genRenderTarget() { return d->genRenderTarget(); } void QSGD3D12Engine::createRenderTarget(uint id, const QSize &size, const QVector4D &clearColor, uint samples) { d->createRenderTarget(id, size, clearColor, samples); } void QSGD3D12Engine::releaseRenderTarget(uint id) { d->releaseRenderTarget(id); } void QSGD3D12Engine::useRenderTargetAsTexture(uint id) { d->useRenderTargetAsTexture(id); } uint QSGD3D12Engine::activeRenderTarget() const { return d->activeRenderTarget(); } QImage QSGD3D12Engine::executeAndWaitReadbackRenderTarget(uint id) { return d->executeAndWaitReadbackRenderTarget(id); } void QSGD3D12Engine::simulateDeviceLoss() { d->simulateDeviceLoss(); } void *QSGD3D12Engine::getResource(QQuickWindow *, QSGRendererInterface::Resource resource) const { return d->getResource(resource); } static inline quint32 alignedSize(quint32 size, quint32 byteAlign) { return (size + byteAlign - 1) & ~(byteAlign - 1); } quint32 QSGD3D12Engine::alignedConstantBufferSize(quint32 size) { return alignedSize(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); } QSGD3D12Format QSGD3D12Engine::toDXGIFormat(QSGGeometry::Type sgtype, int tupleSize, int *size) { QSGD3D12Format format = FmtUnknown; static const QSGD3D12Format formatMap_ub[] = { FmtUnknown, FmtUNormByte, FmtUNormByte2, FmtUnknown, FmtUNormByte4 }; static const QSGD3D12Format formatMap_f[] = { FmtUnknown, FmtFloat, FmtFloat2, FmtFloat3, FmtFloat4 }; switch (sgtype) { case QSGGeometry::UnsignedByteType: format = formatMap_ub[tupleSize]; if (size) *size = tupleSize; break; case QSGGeometry::FloatType: format = formatMap_f[tupleSize]; if (size) *size = sizeof(float) * tupleSize; break; case QSGGeometry::UnsignedShortType: format = FmtUnsignedShort; if (size) *size = sizeof(ushort) * tupleSize; break; case QSGGeometry::UnsignedIntType: format = FmtUnsignedInt; if (size) *size = sizeof(uint) * tupleSize; break; case QSGGeometry::ByteType: case QSGGeometry::IntType: case QSGGeometry::ShortType: qWarning("no mapping for GL type 0x%x", sgtype); break; default: qWarning("unknown GL type 0x%x", sgtype); break; } return format; } int QSGD3D12Engine::mipMapLevels(const QSize &size) { return ceil(log2(qMax(size.width(), size.height()))) + 1; } inline static bool isPowerOfTwo(int x) { // Assumption: x >= 1 return x == (x & -x); } QSize QSGD3D12Engine::mipMapAdjustedSourceSize(const QSize &size) { if (size.isEmpty()) return size; QSize adjustedSize = size; // ### for now only power-of-two sizes are mipmap-capable if (!isPowerOfTwo(size.width())) adjustedSize.setWidth(qNextPowerOfTwo(size.width())); if (!isPowerOfTwo(size.height())) adjustedSize.setHeight(qNextPowerOfTwo(size.height())); return adjustedSize; } void QSGD3D12EnginePrivate::releaseResources() { if (!initialized) return; mipmapper.releaseResources(); devLossTest.releaseResources(); frameCommandList = nullptr; copyCommandList = nullptr; copyCommandAllocator = nullptr; for (int i = 0; i < frameInFlightCount; ++i) { frameCommandAllocator[i] = nullptr; pframeData[i].gpuCbvSrvUavHeap = nullptr; delete frameFence[i]; } defaultDS = nullptr; for (int i = 0; i < swapChainBufferCount; ++i) { backBufferRT[i] = nullptr; defaultRT[i] = nullptr; } psoCache.clear(); rootSigCache.clear(); buffers.clear(); textures.clear(); renderTargets.clear(); cpuDescHeapManager.releaseResources(); commandQueue = nullptr; copyCommandQueue = nullptr; #ifndef Q_OS_WINRT dcompTarget = nullptr; dcompVisual = nullptr; dcompDevice = nullptr; #endif swapChain = nullptr; delete presentFence; textureUploadFence = nullptr; deviceManager()->unref(); initialized = false; // 'window' must be kept, may just be a device loss } void QSGD3D12EnginePrivate::initialize(WId w, const QSize &size, float dpr, int surfaceFormatSamples, bool alpha) { if (initialized) return; window = w; windowSize = size; windowDpr = dpr; windowSamples = qMax(1, surfaceFormatSamples); // may be -1 or 0, whereas windowSamples is uint and >= 1 windowAlpha = alpha; swapChainBufferCount = qMin(qEnvironmentVariableIntValue("QT_D3D_BUFFER_COUNT"), MAX_SWAP_CHAIN_BUFFER_COUNT); if (swapChainBufferCount < 2) swapChainBufferCount = DEFAULT_SWAP_CHAIN_BUFFER_COUNT; frameInFlightCount = qMin(qEnvironmentVariableIntValue("QT_D3D_FRAME_COUNT"), MAX_FRAME_IN_FLIGHT_COUNT); if (frameInFlightCount < 1) frameInFlightCount = DEFAULT_FRAME_IN_FLIGHT_COUNT; static const char *latReqEnvVar = "QT_D3D_WAITABLE_SWAP_CHAIN_MAX_LATENCY"; if (!qEnvironmentVariableIsSet(latReqEnvVar)) waitableSwapChainMaxLatency = DEFAULT_WAITABLE_SWAP_CHAIN_MAX_LATENCY; else waitableSwapChainMaxLatency = qBound(0, qEnvironmentVariableIntValue(latReqEnvVar), 16); if (qEnvironmentVariableIsSet("QSG_INFO")) const_cast(QSG_LOG_INFO_GENERAL()).setEnabled(QtDebugMsg, true); qCDebug(QSG_LOG_INFO_GENERAL, "d3d12 engine init. swap chain buffer count %d, max frames prepared without blocking %d", swapChainBufferCount, frameInFlightCount); if (waitableSwapChainMaxLatency) qCDebug(QSG_LOG_INFO_GENERAL, "Swap chain frame latency waitable object enabled. Frame latency is %d", waitableSwapChainMaxLatency); const bool debugLayer = qEnvironmentVariableIntValue("QT_D3D_DEBUG") != 0; if (debugLayer) { qCDebug(QSG_LOG_INFO_GENERAL, "Enabling debug layer"); #if !defined(Q_OS_WINRT) || !defined(NDEBUG) ComPtr debugController; if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) debugController->EnableDebugLayer(); #else qCDebug(QSG_LOG_INFO_GENERAL, "Using DebugInterface will not allow certification to pass"); #endif } QSGD3D12DeviceManager *dev = deviceManager(); device = dev->ref(); dev->registerDeviceLossObserver(this); if (debugLayer) { ComPtr infoQueue; if (SUCCEEDED(device->QueryInterface(IID_PPV_ARGS(&infoQueue)))) { infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, true); infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, true); const bool breakOnWarning = qEnvironmentVariableIntValue("QT_D3D_DEBUG_BREAK_ON_WARNING") != 0; infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, breakOnWarning); D3D12_INFO_QUEUE_FILTER filter = {}; D3D12_MESSAGE_ID suppressedMessages[] = { // When using a render target other than the default one we // have no way to know the custom clear color, if there is one. D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE }; filter.DenyList.NumIDs = _countof(suppressedMessages); filter.DenyList.pIDList = suppressedMessages; // setting the filter would enable Info messages which we don't need D3D12_MESSAGE_SEVERITY infoSev = D3D12_MESSAGE_SEVERITY_INFO; filter.DenyList.NumSeverities = 1; filter.DenyList.pSeverityList = &infoSev; infoQueue->PushStorageFilter(&filter); } } D3D12_COMMAND_QUEUE_DESC queueDesc = {}; queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; if (FAILED(device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&commandQueue)))) { qWarning("Failed to create command queue"); return; } queueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY; if (FAILED(device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(©CommandQueue)))) { qWarning("Failed to create copy command queue"); return; } #ifndef Q_OS_WINRT HWND hwnd = reinterpret_cast(w); if (windowAlpha) { // Go through DirectComposition for semi-transparent windows since the // traditional approaches won't fly with flip model swapchains. HRESULT hr = DCompositionCreateDevice(nullptr, IID_PPV_ARGS(&dcompDevice)); if (SUCCEEDED(hr)) { hr = dcompDevice->CreateTargetForHwnd(hwnd, true, &dcompTarget); if (SUCCEEDED(hr)) { hr = dcompDevice->CreateVisual(&dcompVisual); if (FAILED(hr)) { qWarning("Failed to create DirectComposition visual: %s", qPrintable(comErrorMessage(hr))); windowAlpha = false; } } else { qWarning("Failed to create DirectComposition target: %s", qPrintable(comErrorMessage(hr))); windowAlpha = false; } } else { qWarning("Failed to create DirectComposition device: %s", qPrintable(comErrorMessage(hr))); windowAlpha = false; } } if (windowAlpha) { DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; swapChainDesc.Width = windowSize.width() * windowDpr; swapChainDesc.Height = windowSize.height() * windowDpr; swapChainDesc.Format = RT_COLOR_FORMAT; swapChainDesc.SampleDesc.Count = 1; swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swapChainDesc.BufferCount = swapChainBufferCount; swapChainDesc.Scaling = DXGI_SCALING_STRETCH; swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED; if (waitableSwapChainMaxLatency) swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; ComPtr baseSwapChain; HRESULT hr = dev->dxgi()->CreateSwapChainForComposition(commandQueue.Get(), &swapChainDesc, nullptr, &baseSwapChain); if (SUCCEEDED(hr)) { if (SUCCEEDED(baseSwapChain.As(&swapChain))) { hr = dcompVisual->SetContent(swapChain.Get()); if (SUCCEEDED(hr)) { hr = dcompTarget->SetRoot(dcompVisual.Get()); if (FAILED(hr)) { qWarning("SetRoot failed for DirectComposition target: %s", qPrintable(comErrorMessage(hr))); windowAlpha = false; } } else { qWarning("SetContent failed for DirectComposition visual: %s", qPrintable(comErrorMessage(hr))); windowAlpha = false; } } else { qWarning("Failed to cast swap chain"); windowAlpha = false; } } else { qWarning("Failed to create swap chain for composition: 0x%x", hr); windowAlpha = false; } } if (!windowAlpha) { DXGI_SWAP_CHAIN_DESC swapChainDesc = {}; swapChainDesc.BufferCount = swapChainBufferCount; swapChainDesc.BufferDesc.Width = windowSize.width() * windowDpr; swapChainDesc.BufferDesc.Height = windowSize.height() * windowDpr; swapChainDesc.BufferDesc.Format = RT_COLOR_FORMAT; swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; // D3D12 requires the flip model swapChainDesc.OutputWindow = hwnd; swapChainDesc.SampleDesc.Count = 1; // Flip does not support MSAA so no choice here swapChainDesc.Windowed = TRUE; if (waitableSwapChainMaxLatency) swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; ComPtr baseSwapChain; HRESULT hr = dev->dxgi()->CreateSwapChain(commandQueue.Get(), &swapChainDesc, &baseSwapChain); if (FAILED(hr)) { qWarning("Failed to create swap chain: %s", qPrintable(comErrorMessage(hr))); return; } hr = baseSwapChain.As(&swapChain); if (FAILED(hr)) { qWarning("Failed to cast swap chain: %s", qPrintable(comErrorMessage(hr))); return; } } dev->dxgi()->MakeWindowAssociation(hwnd, DXGI_MWA_NO_ALT_ENTER); #else DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; swapChainDesc.Width = windowSize.width() * windowDpr; swapChainDesc.Height = windowSize.height() * windowDpr; swapChainDesc.Format = RT_COLOR_FORMAT; swapChainDesc.SampleDesc.Count = 1; swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swapChainDesc.BufferCount = swapChainBufferCount; swapChainDesc.Scaling = DXGI_SCALING_STRETCH; swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED; if (waitableSwapChainMaxLatency) swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; ComPtr baseSwapChain; HRESULT hr = dev->dxgi()->CreateSwapChainForComposition(commandQueue.Get(), &swapChainDesc, nullptr, &baseSwapChain); if (FAILED(hr)) { qWarning("Failed to create swap chain for composition: 0x%x", hr); return; } if (FAILED(baseSwapChain.As(&swapChain))) { qWarning("Failed to cast swap chain"); return; } // The winrt platform plugin returns an ISwapChainPanel* from winId(). ComPtr swapChainPanel = reinterpret_cast(window); ComPtr swapChainPanelNative; if (FAILED(swapChainPanel.As(&swapChainPanelNative))) { qWarning("Failed to cast swap chain panel to native"); return; } hr = QEventDispatcherWinRT::runOnXamlThread([this, &swapChainPanelNative]() { return swapChainPanelNative->SetSwapChain(swapChain.Get()); }); if (FAILED(hr)) { qWarning("Failed to set swap chain on panel: 0x%x", hr); return; } #endif if (waitableSwapChainMaxLatency) { if (FAILED(swapChain->SetMaximumFrameLatency(waitableSwapChainMaxLatency))) qWarning("Failed to set maximum frame latency to %d", waitableSwapChainMaxLatency); swapEvent = swapChain->GetFrameLatencyWaitableObject(); } for (int i = 0; i < frameInFlightCount; ++i) { if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&frameCommandAllocator[i])))) { qWarning("Failed to create command allocator"); return; } } if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(©CommandAllocator)))) { qWarning("Failed to create copy command allocator"); return; } for (int i = 0; i < frameInFlightCount; ++i) { if (!createCbvSrvUavHeap(i, GPU_CBVSRVUAV_DESCRIPTORS)) return; } cpuDescHeapManager.initialize(device); setupDefaultRenderTargets(); if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, frameCommandAllocator[0].Get(), nullptr, IID_PPV_ARGS(&frameCommandList)))) { qWarning("Failed to create command list"); return; } // created in recording state, close it for now frameCommandList->Close(); if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COPY, copyCommandAllocator.Get(), nullptr, IID_PPV_ARGS(©CommandList)))) { qWarning("Failed to create copy command list"); return; } copyCommandList->Close(); frameIndex = 0; presentFence = createCPUWaitableFence(); for (int i = 0; i < frameInFlightCount; ++i) frameFence[i] = createCPUWaitableFence(); if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&textureUploadFence)))) { qWarning("Failed to create fence"); return; } psoCache.setMaxCost(MAX_CACHED_PSO); rootSigCache.setMaxCost(MAX_CACHED_ROOTSIG); if (!mipmapper.initialize(this)) return; if (!devLossTest.initialize(this)) return; currentRenderTarget = 0; initialized = true; } bool QSGD3D12EnginePrivate::createCbvSrvUavHeap(int pframeIndex, int descriptorCount) { D3D12_DESCRIPTOR_HEAP_DESC gpuDescHeapDesc = {}; gpuDescHeapDesc.NumDescriptors = descriptorCount; gpuDescHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; gpuDescHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; if (FAILED(device->CreateDescriptorHeap(&gpuDescHeapDesc, IID_PPV_ARGS(&pframeData[pframeIndex].gpuCbvSrvUavHeap)))) { qWarning("Failed to create shader-visible CBV-SRV-UAV heap"); return false; } pframeData[pframeIndex].gpuCbvSrvUavHeapSize = descriptorCount; return true; } DXGI_SAMPLE_DESC QSGD3D12EnginePrivate::makeSampleDesc(DXGI_FORMAT format, uint samples) { DXGI_SAMPLE_DESC sampleDesc; sampleDesc.Count = 1; sampleDesc.Quality = 0; if (samples > 1) { D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msaaInfo = {}; msaaInfo.Format = format; msaaInfo.SampleCount = samples; if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msaaInfo, sizeof(msaaInfo)))) { if (msaaInfo.NumQualityLevels > 0) { sampleDesc.Count = samples; sampleDesc.Quality = msaaInfo.NumQualityLevels - 1; } else { qWarning("No quality levels for multisampling with sample count %d", samples); } } else { qWarning("Failed to query multisample quality levels for sample count %d", samples); } } return sampleDesc; } ID3D12Resource *QSGD3D12EnginePrivate::createColorBuffer(D3D12_CPU_DESCRIPTOR_HANDLE viewHandle, const QSize &size, const QVector4D &clearColor, uint samples) { D3D12_CLEAR_VALUE clearValue = {}; clearValue.Format = RT_COLOR_FORMAT; clearValue.Color[0] = clearColor.x(); clearValue.Color[1] = clearColor.y(); clearValue.Color[2] = clearColor.z(); clearValue.Color[3] = clearColor.w(); D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; D3D12_RESOURCE_DESC rtDesc = {}; rtDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; rtDesc.Width = size.width(); rtDesc.Height = size.height(); rtDesc.DepthOrArraySize = 1; rtDesc.MipLevels = 1; rtDesc.Format = RT_COLOR_FORMAT; rtDesc.SampleDesc = makeSampleDesc(rtDesc.Format, samples); rtDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; ID3D12Resource *resource = nullptr; const D3D12_RESOURCE_STATES initialState = samples <= 1 ? D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE : D3D12_RESOURCE_STATE_RENDER_TARGET; if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &rtDesc, initialState, &clearValue, IID_PPV_ARGS(&resource)))) { qWarning("Failed to create offscreen render target of size %dx%d", size.width(), size.height()); return nullptr; } device->CreateRenderTargetView(resource, nullptr, viewHandle); return resource; } ID3D12Resource *QSGD3D12EnginePrivate::createDepthStencil(D3D12_CPU_DESCRIPTOR_HANDLE viewHandle, const QSize &size, uint samples) { D3D12_CLEAR_VALUE depthClearValue = {}; depthClearValue.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; depthClearValue.DepthStencil.Depth = 1.0f; depthClearValue.DepthStencil.Stencil = 0; D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_DEFAULT; D3D12_RESOURCE_DESC bufDesc = {}; bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; bufDesc.Width = size.width(); bufDesc.Height = size.height(); bufDesc.DepthOrArraySize = 1; bufDesc.MipLevels = 1; bufDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; bufDesc.SampleDesc = makeSampleDesc(bufDesc.Format, samples); bufDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; bufDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; ID3D12Resource *resource = nullptr; if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &bufDesc, D3D12_RESOURCE_STATE_DEPTH_WRITE, &depthClearValue, IID_PPV_ARGS(&resource)))) { qWarning("Failed to create depth-stencil buffer of size %dx%d", size.width(), size.height()); return nullptr; } D3D12_DEPTH_STENCIL_VIEW_DESC depthStencilDesc = {}; depthStencilDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; depthStencilDesc.ViewDimension = bufDesc.SampleDesc.Count <= 1 ? D3D12_DSV_DIMENSION_TEXTURE2D : D3D12_DSV_DIMENSION_TEXTURE2DMS; device->CreateDepthStencilView(resource, &depthStencilDesc, viewHandle); return resource; } void QSGD3D12EnginePrivate::setupDefaultRenderTargets() { for (int i = 0; i < swapChainBufferCount; ++i) { if (FAILED(swapChain->GetBuffer(i, IID_PPV_ARGS(&backBufferRT[i])))) { qWarning("Failed to get buffer %d from swap chain", i); return; } defaultRTV[i] = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); if (windowSamples == 1) { defaultRT[i] = backBufferRT[i]; device->CreateRenderTargetView(defaultRT[i].Get(), nullptr, defaultRTV[i]); } else { const QSize size(windowSize.width() * windowDpr, windowSize.height() * windowDpr); // Not optimal if the user called setClearColor, but there's so // much we can do. The debug layer warning is suppressed so we're good to go. const QColor cc(Qt::white); const QVector4D clearColor(cc.redF(), cc.greenF(), cc.blueF(), cc.alphaF()); ID3D12Resource *msaaRT = createColorBuffer(defaultRTV[i], size, clearColor, windowSamples); if (msaaRT) defaultRT[i].Attach(msaaRT); } } defaultDSV = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); const QSize size(windowSize.width() * windowDpr, windowSize.height() * windowDpr); ID3D12Resource *ds = createDepthStencil(defaultDSV, size, windowSamples); if (ds) defaultDS.Attach(ds); presentFrameIndex = 0; } void QSGD3D12EnginePrivate::setWindowSize(const QSize &size, float dpr) { if (!initialized || (windowSize == size && windowDpr == dpr)) return; waitGPU(); windowSize = size; windowDpr = dpr; if (Q_UNLIKELY(debug_render())) qDebug() << "resize" << size << dpr; // Clear these, otherwise resizing will fail. defaultDS = nullptr; cpuDescHeapManager.release(defaultDSV, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); for (int i = 0; i < swapChainBufferCount; ++i) { backBufferRT[i] = nullptr; defaultRT[i] = nullptr; cpuDescHeapManager.release(defaultRTV[i], D3D12_DESCRIPTOR_HEAP_TYPE_RTV); } const int w = windowSize.width() * windowDpr; const int h = windowSize.height() * windowDpr; HRESULT hr = swapChain->ResizeBuffers(swapChainBufferCount, w, h, RT_COLOR_FORMAT, waitableSwapChainMaxLatency ? DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT : 0); if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) { deviceManager()->deviceLossDetected(); return; } else if (FAILED(hr)) { qWarning("Failed to resize buffers: %s", qPrintable(comErrorMessage(hr))); return; } setupDefaultRenderTargets(); } void QSGD3D12EnginePrivate::deviceLost() { qWarning("D3D device lost, will attempt to reinitialize"); // Release all resources. This is important because otherwise reinitialization may fail. releaseResources(); // Now in uninitialized state (but 'window' is still valid). Will recreate // all the resources on the next beginFrame(). } QSGD3D12CPUWaitableFence *QSGD3D12EnginePrivate::createCPUWaitableFence() const { QSGD3D12CPUWaitableFence *f = new QSGD3D12CPUWaitableFence; HRESULT hr = device->CreateFence(f->value, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&f->fence)); if (FAILED(hr)) { qWarning("Failed to create fence: %s", qPrintable(comErrorMessage(hr))); return f; } f->event = CreateEvent(nullptr, FALSE, FALSE, nullptr); return f; } void QSGD3D12EnginePrivate::waitForGPU(QSGD3D12CPUWaitableFence *f) const { const UINT64 newValue = f->value.fetchAndAddAcquire(1) + 1; commandQueue->Signal(f->fence.Get(), newValue); if (f->fence->GetCompletedValue() < newValue) { HRESULT hr = f->fence->SetEventOnCompletion(newValue, f->event); if (FAILED(hr)) { qWarning("SetEventOnCompletion failed: %s", qPrintable(comErrorMessage(hr))); return; } WaitForSingleObject(f->event, INFINITE); } } void QSGD3D12EnginePrivate::transitionResource(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList, D3D12_RESOURCE_STATES before, D3D12_RESOURCE_STATES after) const { D3D12_RESOURCE_BARRIER barrier; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Transition.pResource = resource; barrier.Transition.StateBefore = before; barrier.Transition.StateAfter = after; barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; commandList->ResourceBarrier(1, &barrier); } void QSGD3D12EnginePrivate::resolveMultisampledTarget(ID3D12Resource *msaa, ID3D12Resource *resolve, D3D12_RESOURCE_STATES resolveUsage, ID3D12GraphicsCommandList *commandList) const { D3D12_RESOURCE_BARRIER barriers[2]; for (int i = 0; i < _countof(barriers); ++i) { barriers[i].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barriers[i].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barriers[i].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; } barriers[0].Transition.pResource = msaa; barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RESOLVE_SOURCE; barriers[1].Transition.pResource = resolve; barriers[1].Transition.StateBefore = resolveUsage; barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_RESOLVE_DEST; commandList->ResourceBarrier(2, barriers); commandList->ResolveSubresource(resolve, 0, msaa, 0, RT_COLOR_FORMAT); barriers[0].Transition.pResource = msaa; barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_RESOLVE_SOURCE; barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; barriers[1].Transition.pResource = resolve; barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_RESOLVE_DEST; barriers[1].Transition.StateAfter = resolveUsage; commandList->ResourceBarrier(2, barriers); } void QSGD3D12EnginePrivate::uavBarrier(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList) const { D3D12_RESOURCE_BARRIER barrier = {}; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; barrier.UAV.pResource = resource; commandList->ResourceBarrier(1, &barrier); } ID3D12Resource *QSGD3D12EnginePrivate::createBuffer(int size) { ID3D12Resource *buf; D3D12_HEAP_PROPERTIES uploadHeapProp = {}; uploadHeapProp.Type = D3D12_HEAP_TYPE_UPLOAD; D3D12_RESOURCE_DESC bufDesc = {}; bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; bufDesc.Width = size; bufDesc.Height = 1; bufDesc.DepthOrArraySize = 1; bufDesc.MipLevels = 1; bufDesc.Format = DXGI_FORMAT_UNKNOWN; bufDesc.SampleDesc.Count = 1; bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; HRESULT hr = device->CreateCommittedResource(&uploadHeapProp, D3D12_HEAP_FLAG_NONE, &bufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&buf)); if (FAILED(hr)) qWarning("Failed to create buffer resource: %s", qPrintable(comErrorMessage(hr))); return buf; } void QSGD3D12EnginePrivate::ensureBuffer(Buffer *buf) { Buffer::InFlightData &bfd(buf->d[currentPFrameIndex]); // Only enlarge, never shrink const bool newBufferNeeded = bfd.buffer ? (buf->cpuDataRef.size > bfd.resourceSize) : true; if (newBufferNeeded) { // Round it up and overallocate a little bit so that a subsequent // buffer contents rebuild with a slightly larger total size does // not lead to creating a new buffer. const quint32 sz = alignedSize(buf->cpuDataRef.size, 4096); if (Q_UNLIKELY(debug_buffer())) qDebug("new buffer[pf=%d] of size %d (actual data size %d)", currentPFrameIndex, sz, buf->cpuDataRef.size); bfd.buffer.Attach(createBuffer(sz)); bfd.resourceSize = sz; } // Cache the actual data size in the per-in-flight-frame data as well. bfd.dataSize = buf->cpuDataRef.size; } void QSGD3D12EnginePrivate::updateBuffer(Buffer *buf) { if (buf->cpuDataRef.dirty.isEmpty()) return; Buffer::InFlightData &bfd(buf->d[currentPFrameIndex]); quint8 *p = nullptr; const D3D12_RANGE readRange = { 0, 0 }; if (FAILED(bfd.buffer->Map(0, &readRange, reinterpret_cast(&p)))) { qWarning("Map failed for buffer of size %d", buf->cpuDataRef.size); return; } for (const auto &r : qAsConst(buf->cpuDataRef.dirty)) { if (Q_UNLIKELY(debug_buffer())) qDebug("%p o %d s %d", buf, r.first, r.second); memcpy(p + r.first, buf->cpuDataRef.p + r.first, r.second); } bfd.buffer->Unmap(0, nullptr); buf->cpuDataRef.dirty.clear(); } void QSGD3D12EnginePrivate::ensureDevice() { if (!initialized && window) initialize(window, windowSize, windowDpr, windowSamples, windowAlpha); } void QSGD3D12EnginePrivate::beginFrame() { if (inFrame && !activeLayers) qFatal("beginFrame called again without an endFrame, frame index was %d", frameIndex); if (Q_UNLIKELY(debug_render())) qDebug() << "***** begin frame, logical" << frameIndex << "present" << presentFrameIndex << "layer" << activeLayers; if (inFrame && activeLayers) { if (Q_UNLIKELY(debug_render())) qDebug("frame %d already in progress", frameIndex); if (!currentLayerDepth) { // There are layers and the real frame preparation starts now. Prepare for present. beginFrameDraw(); } return; } inFrame = true; // The device may have been lost. This is the point to attempt to start // again from scratch. Except when it is not. Operations that can happen // out of frame (e.g. textures, render targets) may trigger reinit earlier // than beginFrame. ensureDevice(); // Wait for a buffer to be available for Present, if the waitable event is in use. if (waitableSwapChainMaxLatency) WaitForSingleObject(swapEvent, INFINITE); // Block if needed. With 2 frames in flight frame N waits for frame N - 2, but not N - 1, to finish. currentPFrameIndex = frameIndex % frameInFlightCount; if (frameIndex >= frameInFlightCount) { ID3D12Fence *fence = frameFence[currentPFrameIndex]->fence.Get(); HANDLE event = frameFence[currentPFrameIndex]->event; // Frame fence values start from 1, hence the +1. const quint64 inFlightFenceValue = frameIndex - frameInFlightCount + 1; if (fence->GetCompletedValue() < inFlightFenceValue) { fence->SetEventOnCompletion(inFlightFenceValue, event); WaitForSingleObject(event, INFINITE); } frameCommandAllocator[currentPFrameIndex]->Reset(); } PersistentFrameData &pfd(pframeData[currentPFrameIndex]); pfd.cbvSrvUavNextFreeDescriptorIndex = 0; for (Buffer &b : buffers) { if (b.entryInUse()) b.d[currentPFrameIndex].dirty.clear(); } if (frameIndex >= frameInFlightCount - 1) { // Now sync the buffer changes from the previous, potentially still in // flight, frames. This is done by taking the ranges dirtied in those // frames and adding them to the global CPU-side buffer's dirty list, // as if this frame changed those ranges. (however, dirty ranges // inherited this way are not added to this frame's persistent // per-frame dirty list because the next frame after this one should // inherit this frame's genuine changes only, the rest will come from // the earlier ones) for (int delta = frameInFlightCount - 1; delta >= 1; --delta) { const int prevPFrameIndex = (frameIndex - delta) % frameInFlightCount; PersistentFrameData &prevFrameData(pframeData[prevPFrameIndex]); for (uint id : qAsConst(prevFrameData.buffersUsedInFrame)) { Buffer &b(buffers[id - 1]); if (b.d[currentPFrameIndex].buffer && b.d[currentPFrameIndex].dataSize == b.cpuDataRef.size) { if (Q_UNLIKELY(debug_buffer())) qDebug() << "frame" << frameIndex << "takes dirty" << b.d[prevPFrameIndex].dirty << "from frame" << frameIndex - delta << "for buffer" << id; for (const auto &range : qAsConst(b.d[prevPFrameIndex].dirty)) addDirtyRange(&b.cpuDataRef.dirty, range.first, range.second, b.cpuDataRef.size); } else { if (Q_UNLIKELY(debug_buffer())) qDebug() << "frame" << frameIndex << "makes all dirty from frame" << frameIndex - delta << "for buffer" << id; addDirtyRange(&b.cpuDataRef.dirty, 0, b.cpuDataRef.size, b.cpuDataRef.size); } } } } if (frameIndex >= frameInFlightCount) { // Do some texture upload bookkeeping. const quint64 finishedFrameIndex = frameIndex - frameInFlightCount; // we know since we just blocked for this // pfd conveniently refers to the same slot that was used by that frame if (!pfd.pendingTextureUploads.isEmpty()) { if (Q_UNLIKELY(debug_texture())) qDebug("Removing texture upload data for frame %d", finishedFrameIndex); for (uint id : qAsConst(pfd.pendingTextureUploads)) { const int idx = id - 1; Texture &t(textures[idx]); // fenceValue is 0 when the previous frame cleared it, skip in // this case. Skip also when fenceValue > the value it was when // adding the last GPU wait - this is the case when more // uploads were queued for the same texture in the meantime. if (t.fenceValue && t.fenceValue == t.lastWaitFenceValue) { t.fenceValue = 0; t.lastWaitFenceValue = 0; t.stagingBuffers.clear(); t.stagingHeaps.clear(); if (Q_UNLIKELY(debug_texture())) qDebug("Cleaned staging data for texture %u", id); } } pfd.pendingTextureUploads.clear(); if (!pfd.pendingTextureMipMap.isEmpty()) { if (Q_UNLIKELY(debug_texture())) qDebug() << "cleaning mipmap generation data for " << pfd.pendingTextureMipMap; // no special cleanup is needed as mipmap generation uses the frame's resources pfd.pendingTextureMipMap.clear(); } bool hasPending = false; for (int delta = 1; delta < frameInFlightCount; ++delta) { const PersistentFrameData &prevFrameData(pframeData[(frameIndex - delta) % frameInFlightCount]); if (!prevFrameData.pendingTextureUploads.isEmpty()) { hasPending = true; break; } } if (!hasPending) { if (Q_UNLIKELY(debug_texture())) qDebug("no more pending textures"); copyCommandAllocator->Reset(); } } // Do the deferred deletes. if (!pfd.deleteQueue.isEmpty()) { for (PersistentFrameData::DeleteQueueEntry &e : pfd.deleteQueue) { e.res = nullptr; e.descHeap = nullptr; if (e.cpuDescriptorPtr) { D3D12_CPU_DESCRIPTOR_HANDLE h = { e.cpuDescriptorPtr }; cpuDescHeapManager.release(h, e.descHeapType); } } pfd.deleteQueue.clear(); } // Deferred deletes issued outside a begin-endFrame go to the next // frame's out-of-frame delete queue as these cannot be executed in the // next beginFrame, only in next + frameInFlightCount. Move to the // normal queue if this is the next beginFrame. if (!pfd.outOfFrameDeleteQueue.isEmpty()) { pfd.deleteQueue = pfd.outOfFrameDeleteQueue; pfd.outOfFrameDeleteQueue.clear(); } // Mark released texture, buffer, etc. slots free. if (!pfd.pendingReleases.isEmpty()) { for (const auto &pr : qAsConst(pfd.pendingReleases)) { Q_ASSERT(pr.id); if (pr.type == PersistentFrameData::PendingRelease::TypeTexture) { Texture &t(textures[pr.id - 1]); Q_ASSERT(t.entryInUse()); t.flags &= ~RenderTarget::EntryInUse; // createTexture() can now reuse this entry t.texture = nullptr; } else if (pr.type == PersistentFrameData::PendingRelease::TypeBuffer) { Buffer &b(buffers[pr.id - 1]); Q_ASSERT(b.entryInUse()); b.flags &= ~Buffer::EntryInUse; for (int i = 0; i < frameInFlightCount; ++i) b.d[i].buffer = nullptr; } else { qFatal("Corrupt pending release list, type %d", pr.type); } } pfd.pendingReleases.clear(); } if (!pfd.outOfFramePendingReleases.isEmpty()) { pfd.pendingReleases = pfd.outOfFramePendingReleases; pfd.outOfFramePendingReleases.clear(); } } pfd.buffersUsedInFrame.clear(); beginDrawCalls(); // Prepare for present if this is a frame without layers. if (!activeLayers) beginFrameDraw(); } void QSGD3D12EnginePrivate::beginDrawCalls() { frameCommandList->Reset(frameCommandAllocator[frameIndex % frameInFlightCount].Get(), nullptr); commandList = frameCommandList.Get(); invalidateCachedFrameState(); } void QSGD3D12EnginePrivate::invalidateCachedFrameState() { tframeData.drawingMode = QSGGeometry::DrawingMode(-1); tframeData.currentIndexBuffer = 0; tframeData.activeTextureCount = 0; tframeData.drawCount = 0; tframeData.lastPso = nullptr; tframeData.lastRootSig = nullptr; tframeData.descHeapSet = false; } void QSGD3D12EnginePrivate::restoreFrameState(bool minimal) { queueSetRenderTarget(currentRenderTarget); if (!minimal) { queueViewport(tframeData.viewport); queueScissor(tframeData.scissor); queueSetBlendFactor(tframeData.blendFactor); queueSetStencilRef(tframeData.stencilRef); } finalizePipeline(tframeData.pipelineState); } void QSGD3D12EnginePrivate::beginFrameDraw() { if (windowSamples == 1) transitionResource(defaultRT[presentFrameIndex % swapChainBufferCount].Get(), commandList, D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET); } void QSGD3D12EnginePrivate::endFrame() { if (!inFrame) qFatal("endFrame called without beginFrame, frame index %d", frameIndex); if (Q_UNLIKELY(debug_render())) qDebug("***** end frame"); endDrawCalls(true); commandQueue->Signal(frameFence[frameIndex % frameInFlightCount]->fence.Get(), frameIndex + 1); ++frameIndex; inFrame = false; } void QSGD3D12EnginePrivate::endDrawCalls(bool lastInFrame) { PersistentFrameData &pfd(pframeData[currentPFrameIndex]); // Now is the time to sync all the changed areas in the buffers. if (Q_UNLIKELY(debug_buffer())) qDebug() << "buffers used in drawcall set" << pfd.buffersUsedInDrawCallSet; for (uint id : qAsConst(pfd.buffersUsedInDrawCallSet)) updateBuffer(&buffers[id - 1]); pfd.buffersUsedInFrame += pfd.buffersUsedInDrawCallSet; pfd.buffersUsedInDrawCallSet.clear(); // Add a wait on the 3D queue for the relevant texture uploads on the copy queue. if (!pfd.pendingTextureUploads.isEmpty()) { quint64 topFenceValue = 0; for (uint id : qAsConst(pfd.pendingTextureUploads)) { const int idx = id - 1; Texture &t(textures[idx]); Q_ASSERT(t.fenceValue); // skip if already added a Wait in the previous frame if (t.lastWaitFenceValue == t.fenceValue) continue; t.lastWaitFenceValue = t.fenceValue; if (t.fenceValue > topFenceValue) topFenceValue = t.fenceValue; if (t.mipmap()) pfd.pendingTextureMipMap.insert(id); } if (topFenceValue) { if (Q_UNLIKELY(debug_texture())) qDebug("added wait for texture fence %llu", topFenceValue); commandQueue->Wait(textureUploadFence.Get(), topFenceValue); // Generate mipmaps after the wait, when necessary. if (!pfd.pendingTextureMipMap.isEmpty()) { if (Q_UNLIKELY(debug_texture())) qDebug() << "starting mipmap generation for" << pfd.pendingTextureMipMap; for (uint id : qAsConst(pfd.pendingTextureMipMap)) mipmapper.queueGenerate(textures[id - 1]); } } } if (lastInFrame) { // Resolve and transition the backbuffer for present, if needed. const int idx = presentFrameIndex % swapChainBufferCount; if (windowSamples == 1) { transitionResource(defaultRT[idx].Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT); } else { if (Q_UNLIKELY(debug_render())) { const D3D12_RESOURCE_DESC desc = defaultRT[idx]->GetDesc(); qDebug("added resolve for multisampled render target (count %d, quality %d)", desc.SampleDesc.Count, desc.SampleDesc.Quality); } resolveMultisampledTarget(defaultRT[idx].Get(), backBufferRT[idx].Get(), D3D12_RESOURCE_STATE_PRESENT, commandList); } if (activeLayers) { if (Q_UNLIKELY(debug_render())) qDebug("this frame had %d layers", activeLayers); activeLayers = 0; } } // Go! HRESULT hr = frameCommandList->Close(); if (FAILED(hr)) { qWarning("Failed to close command list: %s", qPrintable(comErrorMessage(hr))); if (hr == E_INVALIDARG) qWarning("Invalid arguments. Some of the commands in the list is invalid in some way."); } ID3D12CommandList *commandLists[] = { frameCommandList.Get() }; commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists); commandList = nullptr; } void QSGD3D12EnginePrivate::beginLayer() { if (inFrame && !activeLayers) qFatal("Layer rendering cannot be started while a frame is active"); if (Q_UNLIKELY(debug_render())) qDebug("===== beginLayer active %d depth %d (inFrame=%d)", activeLayers, currentLayerDepth, inFrame); ++activeLayers; ++currentLayerDepth; // Do an early beginFrame. With multiple layers this results in // beginLayer - beginFrame - endLayer - beginLayer - beginFrame - endLayer - ... - (*) beginFrame - endFrame // where (*) denotes the start of the preparation of the actual, non-layer frame. if (activeLayers == 1) beginFrame(); } void QSGD3D12EnginePrivate::endLayer() { if (!inFrame || !activeLayers || !currentLayerDepth) qFatal("Mismatched endLayer"); if (Q_UNLIKELY(debug_render())) qDebug("===== endLayer active %d depth %d", activeLayers, currentLayerDepth); --currentLayerDepth; // Do not touch activeLayers. It remains valid until endFrame. } // Root signature: // [0] CBV - always present // [1] table with one SRV per texture (must be a table since root descriptor SRVs cannot be textures) - optional // one static sampler per texture - optional // // SRVs can be created freely via QSGD3D12CPUDescriptorHeapManager and stored // in QSGD3D12TextureView. The engine will copy them onto a dedicated, // shader-visible CBV-SRV-UAV heap in the correct order. void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipelineState) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } tframeData.pipelineState = pipelineState; RootSigCacheEntry *cachedRootSig = rootSigCache[pipelineState.shaders.rootSig]; if (!cachedRootSig) { if (Q_UNLIKELY(debug_render())) qDebug("NEW ROOTSIG"); cachedRootSig = new RootSigCacheEntry; D3D12_ROOT_PARAMETER rootParams[4]; int rootParamCount = 0; rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParams[0].Descriptor.ShaderRegister = 0; // b0 rootParams[0].Descriptor.RegisterSpace = 0; ++rootParamCount; D3D12_DESCRIPTOR_RANGE tvDescRange; if (pipelineState.shaders.rootSig.textureViewCount > 0) { rootParams[rootParamCount].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[rootParamCount].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; rootParams[rootParamCount].DescriptorTable.NumDescriptorRanges = 1; tvDescRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; tvDescRange.NumDescriptors = pipelineState.shaders.rootSig.textureViewCount; tvDescRange.BaseShaderRegister = 0; // t0, t1, ... tvDescRange.RegisterSpace = 0; tvDescRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; rootParams[rootParamCount].DescriptorTable.pDescriptorRanges = &tvDescRange; ++rootParamCount; } Q_ASSERT(rootParamCount <= _countof(rootParams)); D3D12_ROOT_SIGNATURE_DESC desc; desc.NumParameters = rootParamCount; desc.pParameters = rootParams; // Mixing up samplers and resource views in QSGD3D12TextureView means // that the number of static samplers has to match the number of // textures. This is not really ideal in general but works for Quick's use cases. // The shaders can still choose to declare and use fewer samplers, if they want to. desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViewCount; D3D12_STATIC_SAMPLER_DESC staticSamplers[8]; int sdIdx = 0; Q_ASSERT(pipelineState.shaders.rootSig.textureViewCount <= _countof(staticSamplers)); for (int i = 0; i < pipelineState.shaders.rootSig.textureViewCount; ++i) { const QSGD3D12TextureView &tv(pipelineState.shaders.rootSig.textureViews[i]); D3D12_STATIC_SAMPLER_DESC sd = {}; sd.Filter = D3D12_FILTER(tv.filter); sd.AddressU = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeHoriz); sd.AddressV = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeVert); sd.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; sd.MinLOD = 0.0f; sd.MaxLOD = D3D12_FLOAT32_MAX; sd.ShaderRegister = sdIdx; // t0, t1, ... sd.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; staticSamplers[sdIdx++] = sd; } desc.pStaticSamplers = staticSamplers; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; ComPtr signature; ComPtr error; if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) { QByteArray msg(static_cast(error->GetBufferPointer()), error->GetBufferSize()); qWarning("Failed to serialize root signature: %s", qPrintable(msg)); return; } if (FAILED(device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&cachedRootSig->rootSig)))) { qWarning("Failed to create root signature"); return; } rootSigCache.insert(pipelineState.shaders.rootSig, cachedRootSig); } PSOCacheEntry *cachedPso = psoCache[pipelineState]; if (!cachedPso) { if (Q_UNLIKELY(debug_render())) qDebug("NEW PSO"); cachedPso = new PSOCacheEntry; D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; D3D12_INPUT_ELEMENT_DESC inputElements[QSGD3D12_MAX_INPUT_ELEMENTS]; int ieIdx = 0; for (int i = 0; i < pipelineState.inputElementCount; ++i) { const QSGD3D12InputElement &ie(pipelineState.inputElements[i]); D3D12_INPUT_ELEMENT_DESC ieDesc = {}; ieDesc.SemanticName = ie.semanticName; ieDesc.SemanticIndex = ie.semanticIndex; ieDesc.Format = DXGI_FORMAT(ie.format); ieDesc.InputSlot = ie.slot; ieDesc.AlignedByteOffset = ie.offset; ieDesc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; if (Q_UNLIKELY(debug_render())) qDebug("input [%d]: %s %d 0x%x %d", ieIdx, ie.semanticName, ie.offset, ie.format, ie.slot); inputElements[ieIdx++] = ieDesc; } psoDesc.InputLayout = { inputElements, UINT(ieIdx) }; psoDesc.pRootSignature = cachedRootSig->rootSig.Get(); D3D12_SHADER_BYTECODE vshader; vshader.pShaderBytecode = pipelineState.shaders.vs; vshader.BytecodeLength = pipelineState.shaders.vsSize; D3D12_SHADER_BYTECODE pshader; pshader.pShaderBytecode = pipelineState.shaders.ps; pshader.BytecodeLength = pipelineState.shaders.psSize; psoDesc.VS = vshader; psoDesc.PS = pshader; D3D12_RASTERIZER_DESC rastDesc = {}; rastDesc.FillMode = D3D12_FILL_MODE_SOLID; rastDesc.CullMode = D3D12_CULL_MODE(pipelineState.cullMode); rastDesc.FrontCounterClockwise = pipelineState.frontCCW; rastDesc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; rastDesc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; rastDesc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; rastDesc.DepthClipEnable = TRUE; psoDesc.RasterizerState = rastDesc; D3D12_BLEND_DESC blendDesc = {}; if (pipelineState.blend == QSGD3D12PipelineState::BlendNone) { D3D12_RENDER_TARGET_BLEND_DESC noBlendDesc = {}; noBlendDesc.RenderTargetWriteMask = pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0; blendDesc.RenderTarget[0] = noBlendDesc; } else if (pipelineState.blend == QSGD3D12PipelineState::BlendPremul) { const D3D12_RENDER_TARGET_BLEND_DESC premulBlendDesc = { TRUE, FALSE, D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, D3D12_LOGIC_OP_NOOP, UINT8(pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0) }; blendDesc.RenderTarget[0] = premulBlendDesc; } else if (pipelineState.blend == QSGD3D12PipelineState::BlendColor) { const D3D12_RENDER_TARGET_BLEND_DESC colorBlendDesc = { TRUE, FALSE, D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_INV_SRC_COLOR, D3D12_BLEND_OP_ADD, D3D12_BLEND_BLEND_FACTOR, D3D12_BLEND_INV_SRC_ALPHA, D3D12_BLEND_OP_ADD, D3D12_LOGIC_OP_NOOP, UINT8(pipelineState.colorWrite ? D3D12_COLOR_WRITE_ENABLE_ALL : 0) }; blendDesc.RenderTarget[0] = colorBlendDesc; } psoDesc.BlendState = blendDesc; psoDesc.DepthStencilState.DepthEnable = pipelineState.depthEnable; psoDesc.DepthStencilState.DepthWriteMask = pipelineState.depthWrite ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC(pipelineState.depthFunc); psoDesc.DepthStencilState.StencilEnable = pipelineState.stencilEnable; psoDesc.DepthStencilState.StencilReadMask = psoDesc.DepthStencilState.StencilWriteMask = 0xFF; D3D12_DEPTH_STENCILOP_DESC stencilOpDesc = { D3D12_STENCIL_OP(pipelineState.stencilFailOp), D3D12_STENCIL_OP(pipelineState.stencilDepthFailOp), D3D12_STENCIL_OP(pipelineState.stencilPassOp), D3D12_COMPARISON_FUNC(pipelineState.stencilFunc) }; psoDesc.DepthStencilState.FrontFace = psoDesc.DepthStencilState.BackFace = stencilOpDesc; psoDesc.SampleMask = UINT_MAX; psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE(pipelineState.topologyType); psoDesc.NumRenderTargets = 1; psoDesc.RTVFormats[0] = RT_COLOR_FORMAT; psoDesc.DSVFormat = DXGI_FORMAT_D24_UNORM_S8_UINT; psoDesc.SampleDesc = defaultRT[0]->GetDesc().SampleDesc; HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&cachedPso->pso)); if (FAILED(hr)) { qWarning("Failed to create graphics pipeline state: %s", qPrintable(comErrorMessage(hr))); return; } psoCache.insert(pipelineState, cachedPso); } if (cachedPso->pso.Get() != tframeData.lastPso) { tframeData.lastPso = cachedPso->pso.Get(); commandList->SetPipelineState(tframeData.lastPso); } if (cachedRootSig->rootSig.Get() != tframeData.lastRootSig) { tframeData.lastRootSig = cachedRootSig->rootSig.Get(); commandList->SetGraphicsRootSignature(tframeData.lastRootSig); } if (pipelineState.shaders.rootSig.textureViewCount > 0) setDescriptorHeaps(); } void QSGD3D12EnginePrivate::setDescriptorHeaps(bool force) { if (force || !tframeData.descHeapSet) { tframeData.descHeapSet = true; ID3D12DescriptorHeap *heaps[] = { pframeData[currentPFrameIndex].gpuCbvSrvUavHeap.Get() }; commandList->SetDescriptorHeaps(_countof(heaps), heaps); } } void QSGD3D12EnginePrivate::queueViewport(const QRect &rect) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } tframeData.viewport = rect; const D3D12_VIEWPORT viewport = { float(rect.x()), float(rect.y()), float(rect.width()), float(rect.height()), 0, 1 }; commandList->RSSetViewports(1, &viewport); } void QSGD3D12EnginePrivate::queueScissor(const QRect &rect) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } tframeData.scissor = rect; const D3D12_RECT scissorRect = { rect.x(), rect.y(), rect.x() + rect.width(), rect.y() + rect.height() }; commandList->RSSetScissorRects(1, &scissorRect); } void QSGD3D12EnginePrivate::queueSetRenderTarget(uint id) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle; D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle; if (!id) { rtvHandle = defaultRTV[presentFrameIndex % swapChainBufferCount]; dsvHandle = defaultDSV; } else { const int idx = id - 1; Q_ASSERT(idx < renderTargets.count() && renderTargets[idx].entryInUse()); RenderTarget &rt(renderTargets[idx]); rtvHandle = rt.rtv; dsvHandle = rt.dsv; if (!(rt.flags & RenderTarget::NeedsReadBarrier)) { rt.flags |= RenderTarget::NeedsReadBarrier; if (!(rt.flags & RenderTarget::Multisample)) transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET); } } commandList->OMSetRenderTargets(1, &rtvHandle, FALSE, &dsvHandle); currentRenderTarget = id; } void QSGD3D12EnginePrivate::queueClearRenderTarget(const QColor &color) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } const float clearColor[] = { float(color.redF()), float(color.blueF()), float(color.greenF()), float(color.alphaF()) }; D3D12_CPU_DESCRIPTOR_HANDLE rtv = !currentRenderTarget ? defaultRTV[presentFrameIndex % swapChainBufferCount] : renderTargets[currentRenderTarget - 1].rtv; commandList->ClearRenderTargetView(rtv, clearColor, 0, nullptr); } void QSGD3D12EnginePrivate::queueClearDepthStencil(float depthValue, quint8 stencilValue, QSGD3D12Engine::ClearFlags which) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } D3D12_CPU_DESCRIPTOR_HANDLE dsv = !currentRenderTarget ? defaultDSV : renderTargets[currentRenderTarget - 1].dsv; commandList->ClearDepthStencilView(dsv, D3D12_CLEAR_FLAGS(int(which)), depthValue, stencilValue, 0, nullptr); } void QSGD3D12EnginePrivate::queueSetBlendFactor(const QVector4D &factor) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } tframeData.blendFactor = factor; const float f[4] = { factor.x(), factor.y(), factor.z(), factor.w() }; commandList->OMSetBlendFactor(f); } void QSGD3D12EnginePrivate::queueSetStencilRef(quint32 ref) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } tframeData.stencilRef = ref; commandList->OMSetStencilRef(ref); } void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams ¶ms) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } const bool skip = tframeData.scissor.isEmpty(); PersistentFrameData &pfd(pframeData[currentPFrameIndex]); pfd.buffersUsedInDrawCallSet.insert(params.vertexBuf); const int vertexBufIdx = params.vertexBuf - 1; Q_ASSERT(params.vertexBuf && vertexBufIdx < buffers.count() && buffers[vertexBufIdx].entryInUse()); pfd.buffersUsedInDrawCallSet.insert(params.constantBuf); const int constantBufIdx = params.constantBuf - 1; Q_ASSERT(params.constantBuf && constantBufIdx < buffers.count() && buffers[constantBufIdx].entryInUse()); int indexBufIdx = -1; if (params.indexBuf) { pfd.buffersUsedInDrawCallSet.insert(params.indexBuf); indexBufIdx = params.indexBuf - 1; Q_ASSERT(indexBufIdx < buffers.count() && buffers[indexBufIdx].entryInUse()); } // Ensure buffers are created but do not copy the data here, leave that to endDrawCalls(). ensureBuffer(&buffers[vertexBufIdx]); ensureBuffer(&buffers[constantBufIdx]); if (indexBufIdx >= 0) ensureBuffer(&buffers[indexBufIdx]); // Set the CBV. if (!skip && params.cboOffset >= 0) { ID3D12Resource *cbuf = buffers[constantBufIdx].d[currentPFrameIndex].buffer.Get(); if (cbuf) commandList->SetGraphicsRootConstantBufferView(0, cbuf->GetGPUVirtualAddress() + params.cboOffset); } // Set up vertex and index buffers. ID3D12Resource *vbuf = buffers[vertexBufIdx].d[currentPFrameIndex].buffer.Get(); ID3D12Resource *ibuf = indexBufIdx >= 0 && params.startIndexIndex >= 0 ? buffers[indexBufIdx].d[currentPFrameIndex].buffer.Get() : nullptr; if (!skip && params.mode != tframeData.drawingMode) { D3D_PRIMITIVE_TOPOLOGY topology; switch (params.mode) { case QSGGeometry::DrawPoints: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; case QSGGeometry::DrawLines: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; case QSGGeometry::DrawLineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case QSGGeometry::DrawTriangles: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case QSGGeometry::DrawTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; default: qFatal("Unsupported drawing mode 0x%x", params.mode); break; } commandList->IASetPrimitiveTopology(topology); tframeData.drawingMode = params.mode; } if (!skip) { D3D12_VERTEX_BUFFER_VIEW vbv; vbv.BufferLocation = vbuf->GetGPUVirtualAddress() + params.vboOffset; vbv.SizeInBytes = params.vboSize; vbv.StrideInBytes = params.vboStride; // must be set after the topology commandList->IASetVertexBuffers(0, 1, &vbv); } if (!skip && params.startIndexIndex >= 0 && ibuf && tframeData.currentIndexBuffer != params.indexBuf) { tframeData.currentIndexBuffer = params.indexBuf; D3D12_INDEX_BUFFER_VIEW ibv; ibv.BufferLocation = ibuf->GetGPUVirtualAddress(); ibv.SizeInBytes = buffers[indexBufIdx].cpuDataRef.size; ibv.Format = DXGI_FORMAT(params.indexFormat); commandList->IASetIndexBuffer(&ibv); } // Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap. Q_ASSERT(tframeData.activeTextureCount == tframeData.pipelineState.shaders.rootSig.textureViewCount); if (tframeData.activeTextureCount > 0) { if (!skip) { ensureGPUDescriptorHeap(tframeData.activeTextureCount); const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart(); dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; for (int i = 0; i < tframeData.activeTextureCount; ++i) { const TransientFrameData::ActiveTexture &t(tframeData.activeTextures[i]); Q_ASSERT(t.id); const int idx = t.id - 1; const bool isTex = t.type == TransientFrameData::ActiveTexture::TypeTexture; device->CopyDescriptorsSimple(1, dst, isTex ? textures[idx].srv : renderTargets[idx].srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); dst.ptr += stride; } D3D12_GPU_DESCRIPTOR_HANDLE gpuAddr = pfd.gpuCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart(); gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; commandList->SetGraphicsRootDescriptorTable(1, gpuAddr); pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextureCount; } tframeData.activeTextureCount = 0; } // Add the draw call. if (!skip) { ++tframeData.drawCount; if (params.startIndexIndex >= 0) commandList->DrawIndexedInstanced(params.count, 1, params.startIndexIndex, 0, 0); else commandList->DrawInstanced(params.count, 1, 0, 0); } if (tframeData.drawCount == MAX_DRAW_CALLS_PER_LIST) { if (Q_UNLIKELY(debug_render())) qDebug("Limit of %d draw calls reached, executing command list", MAX_DRAW_CALLS_PER_LIST); // submit the command list endDrawCalls(); // start a new one beginDrawCalls(); // prepare for the upcoming drawcalls restoreFrameState(); } } void QSGD3D12EnginePrivate::ensureGPUDescriptorHeap(int cbvSrvUavDescriptorCount) { PersistentFrameData &pfd(pframeData[currentPFrameIndex]); int newSize = pfd.gpuCbvSrvUavHeapSize; while (pfd.cbvSrvUavNextFreeDescriptorIndex + cbvSrvUavDescriptorCount > newSize) newSize *= 2; if (newSize != pfd.gpuCbvSrvUavHeapSize) { if (Q_UNLIKELY(debug_descheap())) qDebug("Out of space for SRVs, creating new CBV-SRV-UAV descriptor heap with descriptor count %d", newSize); deferredDelete(pfd.gpuCbvSrvUavHeap); createCbvSrvUavHeap(currentPFrameIndex, newSize); setDescriptorHeaps(true); pfd.cbvSrvUavNextFreeDescriptorIndex = 0; } } void QSGD3D12EnginePrivate::present() { if (!initialized) return; if (Q_UNLIKELY(debug_render())) qDebug("--- present with vsync ---"); // This call will not block the CPU unless at least 3 buffers are queued, // unless the waitable frame latency event is enabled. Then the latency of // 3 is changed to whatever value desired, and blocking happens in // beginFrame. If none of these hold, the fence-based wait in beginFrame // throttles. Vsync (interval 1) is always enabled. HRESULT hr = swapChain->Present(1, 0); if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) { deviceManager()->deviceLossDetected(); return; } else if (FAILED(hr)) { qWarning("Present failed: %s", qPrintable(comErrorMessage(hr))); return; } #ifndef Q_OS_WINRT if (dcompDevice) dcompDevice->Commit(); #endif ++presentFrameIndex; } void QSGD3D12EnginePrivate::waitGPU() { if (!initialized) return; if (Q_UNLIKELY(debug_render())) qDebug("--- blocking wait for GPU ---"); waitForGPU(presentFence); } template uint newId(T *tbl) { uint id = 0; for (int i = 0; i < tbl->count(); ++i) { if (!(*tbl)[i].entryInUse()) { id = i + 1; break; } } if (!id) { tbl->resize(tbl->size() + 1); id = tbl->count(); } (*tbl)[id - 1].flags = 0x01; // reset flags and set EntryInUse return id; } template void syncEntryFlags(T *e, int flag, bool b) { if (b) e->flags |= flag; else e->flags &= ~flag; } uint QSGD3D12EnginePrivate::genBuffer() { return newId(&buffers); } void QSGD3D12EnginePrivate::releaseBuffer(uint id) { if (!id || !initialized) return; const int idx = id - 1; Q_ASSERT(idx < buffers.count()); if (Q_UNLIKELY(debug_buffer())) qDebug("releasing buffer %u", id); Buffer &b(buffers[idx]); if (!b.entryInUse()) return; // Do not null out and do not mark the entry reusable yet. // Do that only when the frames potentially in flight have finished for sure. for (int i = 0; i < frameInFlightCount; ++i) { if (b.d[i].buffer) deferredDelete(b.d[i].buffer); } QSet *pendingReleasesSet = inFrame ? &pframeData[currentPFrameIndex].pendingReleases : &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFramePendingReleases; pendingReleasesSet->insert(PersistentFrameData::PendingRelease(PersistentFrameData::PendingRelease::TypeBuffer, id)); } void QSGD3D12EnginePrivate::resetBuffer(uint id, const quint8 *data, int size) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } Q_ASSERT(id); const int idx = id - 1; Q_ASSERT(idx < buffers.count() && buffers[idx].entryInUse()); Buffer &b(buffers[idx]); if (Q_UNLIKELY(debug_buffer())) qDebug("reset buffer %u, size %d", id, size); b.cpuDataRef.p = data; b.cpuDataRef.size = size; b.cpuDataRef.dirty.clear(); b.d[currentPFrameIndex].dirty.clear(); if (size > 0) { const QPair range = qMakePair(0, size); b.cpuDataRef.dirty.append(range); b.d[currentPFrameIndex].dirty.append(range); } } void QSGD3D12EnginePrivate::addDirtyRange(DirtyList *dirty, int offset, int size, int bufferSize) { // Bail out when the dirty list already spans the entire buffer. if (!dirty->isEmpty()) { if (dirty->at(0).first == 0 && dirty->at(0).second == bufferSize) return; } const QPair range = qMakePair(offset, size); if (!dirty->contains(range)) dirty->append(range); } void QSGD3D12EnginePrivate::markBufferDirty(uint id, int offset, int size) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } Q_ASSERT(id); const int idx = id - 1; Q_ASSERT(idx < buffers.count() && buffers[idx].entryInUse()); Buffer &b(buffers[idx]); addDirtyRange(&b.cpuDataRef.dirty, offset, size, b.cpuDataRef.size); addDirtyRange(&b.d[currentPFrameIndex].dirty, offset, size, b.cpuDataRef.size); } uint QSGD3D12EnginePrivate::genTexture() { const uint id = newId(&textures); textures[id - 1].fenceValue = 0; return id; } static inline DXGI_FORMAT textureFormat(QImage::Format format, bool wantsAlpha, bool mipmap, bool force32bit, QImage::Format *imageFormat, int *bytesPerPixel) { DXGI_FORMAT f = DXGI_FORMAT_R8G8B8A8_UNORM; QImage::Format convFormat = format; int bpp = 4; if (!mipmap) { switch (format) { case QImage::Format_Grayscale8: case QImage::Format_Indexed8: case QImage::Format_Alpha8: if (!force32bit) { f = DXGI_FORMAT_R8_UNORM; bpp = 1; } else { convFormat = QImage::Format_RGBA8888; } break; case QImage::Format_RGB32: f = DXGI_FORMAT_B8G8R8A8_UNORM; break; case QImage::Format_ARGB32: f = DXGI_FORMAT_B8G8R8A8_UNORM; convFormat = wantsAlpha ? QImage::Format_ARGB32_Premultiplied : QImage::Format_RGB32; break; case QImage::Format_ARGB32_Premultiplied: f = DXGI_FORMAT_B8G8R8A8_UNORM; convFormat = wantsAlpha ? format : QImage::Format_RGB32; break; default: convFormat = wantsAlpha ? QImage::Format_RGBA8888_Premultiplied : QImage::Format_RGBX8888; break; } } else { // Mipmap generation needs unordered access and BGRA is not an option for that. Stick to RGBA. convFormat = wantsAlpha ? QImage::Format_RGBA8888_Premultiplied : QImage::Format_RGBX8888; } if (imageFormat) *imageFormat = convFormat; if (bytesPerPixel) *bytesPerPixel = bpp; return f; } static inline QImage::Format imageFormatForTexture(DXGI_FORMAT format) { QImage::Format f = QImage::Format_Invalid; switch (format) { case DXGI_FORMAT_R8G8B8A8_UNORM: f = QImage::Format_RGBA8888_Premultiplied; break; case DXGI_FORMAT_B8G8R8A8_UNORM: f = QImage::Format_ARGB32_Premultiplied; break; case DXGI_FORMAT_R8_UNORM: f = QImage::Format_Grayscale8; break; default: break; } return f; } void QSGD3D12EnginePrivate::createTexture(uint id, const QSize &size, QImage::Format format, QSGD3D12Engine::TextureCreateFlags createFlags) { ensureDevice(); Q_ASSERT(id); const int idx = id - 1; Q_ASSERT(idx < textures.count() && textures[idx].entryInUse()); Texture &t(textures[idx]); syncEntryFlags(&t, Texture::Alpha, createFlags & QSGD3D12Engine::TextureWithAlpha); syncEntryFlags(&t, Texture::MipMap, createFlags & QSGD3D12Engine::TextureWithMipMaps); const QSize adjustedSize = !t.mipmap() ? size : QSGD3D12Engine::mipMapAdjustedSourceSize(size); D3D12_HEAP_PROPERTIES defaultHeapProp = {}; defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT; D3D12_RESOURCE_DESC textureDesc = {}; textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; textureDesc.Width = adjustedSize.width(); textureDesc.Height = adjustedSize.height(); textureDesc.DepthOrArraySize = 1; textureDesc.MipLevels = !t.mipmap() ? 1 : QSGD3D12Engine::mipMapLevels(adjustedSize); textureDesc.Format = textureFormat(format, t.alpha(), t.mipmap(), createFlags.testFlag(QSGD3D12Engine::TextureAlways32Bit), nullptr, nullptr); textureDesc.SampleDesc.Count = 1; textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; if (t.mipmap()) textureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&t.texture)); if (FAILED(hr)) { qWarning("Failed to create texture resource: %s", qPrintable(comErrorMessage(hr))); return; } t.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srvDesc.Format = textureDesc.Format; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Texture2D.MipLevels = textureDesc.MipLevels; device->CreateShaderResourceView(t.texture.Get(), &srvDesc, t.srv); if (t.mipmap()) { // Mipmap generation will need an UAV for each level that needs to be generated. t.mipUAVs.clear(); for (int level = 1; level < textureDesc.MipLevels; ++level) { D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; uavDesc.Format = textureDesc.Format; uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; uavDesc.Texture2D.MipSlice = level; D3D12_CPU_DESCRIPTOR_HANDLE h = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); device->CreateUnorderedAccessView(t.texture.Get(), nullptr, &uavDesc, h); t.mipUAVs.append(h); } } if (Q_UNLIKELY(debug_texture())) qDebug("created texture %u, size %dx%d, miplevels %d", id, adjustedSize.width(), adjustedSize.height(), textureDesc.MipLevels); } void QSGD3D12EnginePrivate::queueTextureResize(uint id, const QSize &size) { Q_ASSERT(id); const int idx = id - 1; Q_ASSERT(idx < textures.count() && textures[idx].entryInUse()); Texture &t(textures[idx]); if (!t.texture) { qWarning("Cannot resize non-created texture %u", id); return; } if (t.mipmap()) { qWarning("Cannot resize mipmapped texture %u", id); return; } if (Q_UNLIKELY(debug_texture())) qDebug("resizing texture %u, size %dx%d", id, size.width(), size.height()); D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc(); textureDesc.Width = size.width(); textureDesc.Height = size.height(); D3D12_HEAP_PROPERTIES defaultHeapProp = {}; defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT; ComPtr oldTexture = t.texture; deferredDelete(t.texture); HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&t.texture)); if (FAILED(hr)) { qWarning("Failed to create resized texture resource: %s", qPrintable(comErrorMessage(hr))); return; } deferredDelete(t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); t.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srvDesc.Format = textureDesc.Format; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; srvDesc.Texture2D.MipLevels = textureDesc.MipLevels; device->CreateShaderResourceView(t.texture.Get(), &srvDesc, t.srv); D3D12_TEXTURE_COPY_LOCATION dstLoc; dstLoc.pResource = t.texture.Get(); dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dstLoc.SubresourceIndex = 0; D3D12_TEXTURE_COPY_LOCATION srcLoc; srcLoc.pResource = oldTexture.Get(); srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; srcLoc.SubresourceIndex = 0; copyCommandList->Reset(copyCommandAllocator.Get(), nullptr); copyCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); copyCommandList->Close(); ID3D12CommandList *commandLists[] = { copyCommandList.Get() }; copyCommandQueue->ExecuteCommandLists(_countof(commandLists), commandLists); t.fenceValue = nextTextureUploadFenceValue.fetchAndAddAcquire(1) + 1; copyCommandQueue->Signal(textureUploadFence.Get(), t.fenceValue); if (Q_UNLIKELY(debug_texture())) qDebug("submitted old content copy for texture %u on the copy queue, fence %llu", id, t.fenceValue); } void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QVector &images, const QVector &dstPos, QSGD3D12Engine::TextureUploadFlags flags) { Q_ASSERT(id); Q_ASSERT(images.count() == dstPos.count()); if (images.isEmpty()) return; const int idx = id - 1; Q_ASSERT(idx < textures.count() && textures[idx].entryInUse()); Texture &t(textures[idx]); Q_ASSERT(t.texture); // When mipmapping is not in use, image can be smaller than the size passed // to createTexture() and dstPos can specify a non-zero destination position. if (t.mipmap() && (images.count() != 1 || dstPos.count() != 1 || !dstPos[0].isNull())) { qWarning("Mipmapped textures (%u) do not support partial uploads", id); return; } // Make life simpler by disallowing queuing a new mipmapped upload before the previous one finishes. if (t.mipmap() && t.fenceValue) { qWarning("Attempted to queue mipmapped texture upload (%u) while a previous upload is still in progress", id); return; } t.fenceValue = nextTextureUploadFenceValue.fetchAndAddAcquire(1) + 1; if (Q_UNLIKELY(debug_texture())) qDebug("adding upload for texture %u on the copy queue, fence %llu", id, t.fenceValue); D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc(); const QSize adjustedTextureSize(textureDesc.Width, textureDesc.Height); int totalSize = 0; for (const QImage &image : images) { int bytesPerPixel; textureFormat(image.format(), t.alpha(), t.mipmap(), flags.testFlag(QSGD3D12Engine::TextureUploadAlways32Bit), nullptr, &bytesPerPixel); const int w = !t.mipmap() ? image.width() : adjustedTextureSize.width(); const int h = !t.mipmap() ? image.height() : adjustedTextureSize.height(); const int stride = alignedSize(w * bytesPerPixel, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); totalSize += alignedSize(h * stride, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); } if (Q_UNLIKELY(debug_texture())) qDebug("%d sub-uploads, heap size %d bytes", images.count(), totalSize); // Instead of individual committed resources for each upload buffer, // allocate only once and use placed resources. D3D12_HEAP_PROPERTIES uploadHeapProp = {}; uploadHeapProp.Type = D3D12_HEAP_TYPE_UPLOAD; D3D12_HEAP_DESC uploadHeapDesc = {}; uploadHeapDesc.SizeInBytes = totalSize; uploadHeapDesc.Properties = uploadHeapProp; uploadHeapDesc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; Texture::StagingHeap sheap; if (FAILED(device->CreateHeap(&uploadHeapDesc, IID_PPV_ARGS(&sheap.heap)))) { qWarning("Failed to create texture upload heap of size %d", totalSize); return; } t.stagingHeaps.append(sheap); copyCommandList->Reset(copyCommandAllocator.Get(), nullptr); int placedOffset = 0; for (int i = 0; i < images.count(); ++i) { QImage::Format convFormat; int bytesPerPixel; textureFormat(images[i].format(), t.alpha(), t.mipmap(), flags.testFlag(QSGD3D12Engine::TextureUploadAlways32Bit), &convFormat, &bytesPerPixel); if (Q_UNLIKELY(debug_texture() && i == 0)) qDebug("source image format %d, target format %d, bpp %d", images[i].format(), convFormat, bytesPerPixel); QImage convImage = images[i].format() == convFormat ? images[i] : images[i].convertToFormat(convFormat); if (t.mipmap() && adjustedTextureSize != convImage.size()) convImage = convImage.scaled(adjustedTextureSize, Qt::IgnoreAspectRatio, Qt::SmoothTransformation); const int stride = alignedSize(convImage.width() * bytesPerPixel, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); D3D12_RESOURCE_DESC bufDesc = {}; bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; bufDesc.Width = stride * convImage.height(); bufDesc.Height = 1; bufDesc.DepthOrArraySize = 1; bufDesc.MipLevels = 1; bufDesc.Format = DXGI_FORMAT_UNKNOWN; bufDesc.SampleDesc.Count = 1; bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; Texture::StagingBuffer sbuf; if (FAILED(device->CreatePlacedResource(sheap.heap.Get(), placedOffset, &bufDesc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&sbuf.buffer)))) { qWarning("Failed to create texture upload buffer"); return; } quint8 *p = nullptr; const D3D12_RANGE readRange = { 0, 0 }; if (FAILED(sbuf.buffer->Map(0, &readRange, reinterpret_cast(&p)))) { qWarning("Map failed (texture upload buffer)"); return; } for (int y = 0, ye = convImage.height(); y < ye; ++y) { memcpy(p, convImage.constScanLine(y), convImage.width() * bytesPerPixel); p += stride; } sbuf.buffer->Unmap(0, nullptr); D3D12_TEXTURE_COPY_LOCATION dstLoc; dstLoc.pResource = t.texture.Get(); dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dstLoc.SubresourceIndex = 0; D3D12_TEXTURE_COPY_LOCATION srcLoc; srcLoc.pResource = sbuf.buffer.Get(); srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; srcLoc.PlacedFootprint.Offset = 0; srcLoc.PlacedFootprint.Footprint.Format = textureDesc.Format; srcLoc.PlacedFootprint.Footprint.Width = convImage.width(); srcLoc.PlacedFootprint.Footprint.Height = convImage.height(); srcLoc.PlacedFootprint.Footprint.Depth = 1; srcLoc.PlacedFootprint.Footprint.RowPitch = stride; copyCommandList->CopyTextureRegion(&dstLoc, dstPos[i].x(), dstPos[i].y(), 0, &srcLoc, nullptr); t.stagingBuffers.append(sbuf); placedOffset += alignedSize(bufDesc.Width, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); } copyCommandList->Close(); ID3D12CommandList *commandLists[] = { copyCommandList.Get() }; copyCommandQueue->ExecuteCommandLists(_countof(commandLists), commandLists); copyCommandQueue->Signal(textureUploadFence.Get(), t.fenceValue); } void QSGD3D12EnginePrivate::releaseTexture(uint id) { if (!id || !initialized) return; const int idx = id - 1; Q_ASSERT(idx < textures.count()); if (Q_UNLIKELY(debug_texture())) qDebug("releasing texture %d", id); Texture &t(textures[idx]); if (!t.entryInUse()) return; if (t.texture) { deferredDelete(t.texture); deferredDelete(t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); for (D3D12_CPU_DESCRIPTOR_HANDLE h : t.mipUAVs) deferredDelete(h, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } QSet *pendingReleasesSet = inFrame ? &pframeData[currentPFrameIndex].pendingReleases : &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFramePendingReleases; pendingReleasesSet->insert(PersistentFrameData::PendingRelease(PersistentFrameData::PendingRelease::TypeTexture, id)); } void QSGD3D12EnginePrivate::useTexture(uint id) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } Q_ASSERT(id); const int idx = id - 1; Q_ASSERT(idx < textures.count() && textures[idx].entryInUse()); // Within one frame the order of calling this function determines the // texture register (0, 1, ...) so fill up activeTextures accordingly. tframeData.activeTextures[tframeData.activeTextureCount++] = TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id); if (textures[idx].fenceValue) pframeData[currentPFrameIndex].pendingTextureUploads.insert(id); } bool QSGD3D12EnginePrivate::MipMapGen::initialize(QSGD3D12EnginePrivate *enginePriv) { engine = enginePriv; D3D12_STATIC_SAMPLER_DESC sampler = {}; sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; sampler.MinLOD = 0.0f; sampler.MaxLOD = D3D12_FLOAT32_MAX; D3D12_DESCRIPTOR_RANGE descRange[2]; descRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; descRange[0].NumDescriptors = 1; descRange[0].BaseShaderRegister = 0; // t0 descRange[0].RegisterSpace = 0; descRange[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; descRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; descRange[1].NumDescriptors = 4; descRange[1].BaseShaderRegister = 0; // u0..u3 descRange[1].RegisterSpace = 0; descRange[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; // Split into two to allow switching between the first and second set of UAVs later. D3D12_ROOT_PARAMETER rootParameters[3]; rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParameters[0].DescriptorTable.NumDescriptorRanges = 1; rootParameters[0].DescriptorTable.pDescriptorRanges = &descRange[0]; rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParameters[1].DescriptorTable.NumDescriptorRanges = 1; rootParameters[1].DescriptorTable.pDescriptorRanges = &descRange[1]; rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; rootParameters[2].Constants.Num32BitValues = 4; // uint2 mip1Size, uint sampleLevel, uint totalMips rootParameters[2].Constants.ShaderRegister = 0; // b0 rootParameters[2].Constants.RegisterSpace = 0; D3D12_ROOT_SIGNATURE_DESC desc = {}; desc.NumParameters = 3; desc.pParameters = rootParameters; desc.NumStaticSamplers = 1; desc.pStaticSamplers = &sampler; ComPtr signature; ComPtr error; if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) { QByteArray msg(static_cast(error->GetBufferPointer()), error->GetBufferSize()); qWarning("Failed to serialize compute root signature: %s", qPrintable(msg)); return false; } if (FAILED(engine->device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&rootSig)))) { qWarning("Failed to create compute root signature"); return false; } D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; psoDesc.pRootSignature = rootSig.Get(); psoDesc.CS.pShaderBytecode = g_CS_Generate4MipMaps; psoDesc.CS.BytecodeLength = sizeof(g_CS_Generate4MipMaps); if (FAILED(engine->device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&pipelineState)))) { qWarning("Failed to create compute pipeline state"); return false; } return true; } void QSGD3D12EnginePrivate::MipMapGen::releaseResources() { pipelineState = nullptr; rootSig = nullptr; } // The mipmap generator is used to insert commands on the main 3D queue. It is // guaranteed that the queue has a wait for the base texture level upload // before invoking queueGenerate(). There can be any number of invocations // without waiting for earlier ones to finish. finished() is invoked when it is // known for sure that frame containing the upload and mipmap generation has // finished on the GPU. void QSGD3D12EnginePrivate::MipMapGen::queueGenerate(const Texture &t) { D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc(); engine->commandList->SetPipelineState(pipelineState.Get()); engine->commandList->SetComputeRootSignature(rootSig.Get()); // 1 SRV + (miplevels - 1) UAVs const int descriptorCount = 1 + (textureDesc.MipLevels - 1); engine->ensureGPUDescriptorHeap(descriptorCount); // The descriptor heap is set on the command list either because the // ensure() call above resized, or, typically, due to a texture-dependent // draw call earlier. engine->transitionResource(t.texture.Get(), engine->commandList, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); QSGD3D12EnginePrivate::PersistentFrameData &pfd(engine->pframeData[engine->currentPFrameIndex]); const uint stride = engine->cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_CPU_DESCRIPTOR_HANDLE h = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart(); h.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; engine->device->CopyDescriptorsSimple(1, h, t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); h.ptr += stride; for (int level = 1; level < textureDesc.MipLevels; ++level, h.ptr += stride) engine->device->CopyDescriptorsSimple(1, h, t.mipUAVs[level - 1], D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_GPU_DESCRIPTOR_HANDLE gpuAddr = pfd.gpuCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart(); gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; engine->commandList->SetComputeRootDescriptorTable(0, gpuAddr); gpuAddr.ptr += stride; // now points to the first UAV for (int level = 1; level < textureDesc.MipLevels; level += 4, gpuAddr.ptr += stride * 4) { engine->commandList->SetComputeRootDescriptorTable(1, gpuAddr); QSize sz(textureDesc.Width, textureDesc.Height); sz.setWidth(qMax(1, sz.width() >> level)); sz.setHeight(qMax(1, sz.height() >> level)); const quint32 constants[4] = { quint32(sz.width()), quint32(sz.height()), quint32(level - 1), quint32(textureDesc.MipLevels - 1) }; engine->commandList->SetComputeRoot32BitConstants(2, 4, constants, 0); engine->commandList->Dispatch(sz.width(), sz.height(), 1); engine->uavBarrier(t.texture.Get(), engine->commandList); } engine->transitionResource(t.texture.Get(), engine->commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); pfd.cbvSrvUavNextFreeDescriptorIndex += descriptorCount; } void QSGD3D12EnginePrivate::deferredDelete(ComPtr res) { PersistentFrameData::DeleteQueueEntry e; e.res = res; QVector *dq = inFrame ? &pframeData[currentPFrameIndex].deleteQueue : &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue; (*dq) << e; } void QSGD3D12EnginePrivate::deferredDelete(ComPtr dh) { PersistentFrameData::DeleteQueueEntry e; e.descHeap = dh; QVector *dq = inFrame ? &pframeData[currentPFrameIndex].deleteQueue : &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue; (*dq) << e; } void QSGD3D12EnginePrivate::deferredDelete(D3D12_CPU_DESCRIPTOR_HANDLE h, D3D12_DESCRIPTOR_HEAP_TYPE type) { PersistentFrameData::DeleteQueueEntry e; e.cpuDescriptorPtr = h.ptr; e.descHeapType = type; QVector *dq = inFrame ? &pframeData[currentPFrameIndex].deleteQueue : &pframeData[(currentPFrameIndex + 1) % frameInFlightCount].outOfFrameDeleteQueue; (*dq) << e; } uint QSGD3D12EnginePrivate::genRenderTarget() { return newId(&renderTargets); } void QSGD3D12EnginePrivate::createRenderTarget(uint id, const QSize &size, const QVector4D &clearColor, uint samples) { ensureDevice(); Q_ASSERT(id); const int idx = id - 1; Q_ASSERT(idx < renderTargets.count() && renderTargets[idx].entryInUse()); RenderTarget &rt(renderTargets[idx]); rt.rtv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); rt.dsv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); rt.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); ID3D12Resource *res = createColorBuffer(rt.rtv, size, clearColor, samples); if (res) rt.color.Attach(res); ID3D12Resource *dsres = createDepthStencil(rt.dsv, size, samples); if (dsres) rt.ds.Attach(dsres); const bool multisample = rt.color->GetDesc().SampleDesc.Count > 1; syncEntryFlags(&rt, RenderTarget::Multisample, multisample); if (!multisample) { device->CreateShaderResourceView(rt.color.Get(), nullptr, rt.srv); } else { D3D12_HEAP_PROPERTIES defaultHeapProp = {}; defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT; D3D12_RESOURCE_DESC textureDesc = {}; textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; textureDesc.Width = size.width(); textureDesc.Height = size.height(); textureDesc.DepthOrArraySize = 1; textureDesc.MipLevels = 1; textureDesc.Format = RT_COLOR_FORMAT; textureDesc.SampleDesc.Count = 1; textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&rt.colorResolve)); if (FAILED(hr)) { qWarning("Failed to create resolve buffer: %s", qPrintable(comErrorMessage(hr))); return; } device->CreateShaderResourceView(rt.colorResolve.Get(), nullptr, rt.srv); } if (Q_UNLIKELY(debug_render())) qDebug("created new render target %u, size %dx%d, samples %d", id, size.width(), size.height(), samples); } void QSGD3D12EnginePrivate::releaseRenderTarget(uint id) { if (!id || !initialized) return; const int idx = id - 1; Q_ASSERT(idx < renderTargets.count()); RenderTarget &rt(renderTargets[idx]); if (!rt.entryInUse()) return; if (Q_UNLIKELY(debug_render())) qDebug("releasing render target %u", id); if (rt.colorResolve) { deferredDelete(rt.colorResolve); rt.colorResolve = nullptr; } if (rt.color) { deferredDelete(rt.color); rt.color = nullptr; deferredDelete(rt.rtv, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); deferredDelete(rt.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } if (rt.ds) { deferredDelete(rt.ds); rt.ds = nullptr; deferredDelete(rt.dsv, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); } rt.flags &= ~RenderTarget::EntryInUse; } void QSGD3D12EnginePrivate::useRenderTargetAsTexture(uint id) { if (!inFrame) { qWarning("%s: Cannot be called outside begin/endFrame", __FUNCTION__); return; } Q_ASSERT(id); const int idx = id - 1; Q_ASSERT(idx < renderTargets.count()); RenderTarget &rt(renderTargets[idx]); Q_ASSERT(rt.entryInUse() && rt.color); if (rt.flags & RenderTarget::NeedsReadBarrier) { rt.flags &= ~RenderTarget::NeedsReadBarrier; if (rt.flags & RenderTarget::Multisample) resolveMultisampledTarget(rt.color.Get(), rt.colorResolve.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, commandList); else transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } tframeData.activeTextures[tframeData.activeTextureCount++] = TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id); } QImage QSGD3D12EnginePrivate::executeAndWaitReadbackRenderTarget(uint id) { // Readback due to QQuickWindow::grabWindow() happens outside // begin-endFrame, but QQuickItemGrabResult leads to rendering a layer // without a real frame afterwards and triggering readback. This has to be // supported as well. if (inFrame && (!activeLayers || currentLayerDepth)) { qWarning("%s: Cannot be called while frame preparation is active", __FUNCTION__); return QImage(); } // Due to the above we insert a fake "real" frame when a layer was just rendered into. if (inFrame) { beginFrame(); endFrame(); } frameCommandList->Reset(frameCommandAllocator[frameIndex % frameInFlightCount].Get(), nullptr); D3D12_RESOURCE_STATES bstate; bool needsBarrier = false; ID3D12Resource *rtRes; if (id == 0) { const int idx = presentFrameIndex % swapChainBufferCount; if (windowSamples > 1) { resolveMultisampledTarget(defaultRT[idx].Get(), backBufferRT[idx].Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, frameCommandList.Get()); } else { bstate = D3D12_RESOURCE_STATE_PRESENT; needsBarrier = true; } rtRes = backBufferRT[idx].Get(); } else { const int idx = id - 1; Q_ASSERT(idx < renderTargets.count()); RenderTarget &rt(renderTargets[idx]); Q_ASSERT(rt.entryInUse() && rt.color); if (rt.flags & RenderTarget::Multisample) { resolveMultisampledTarget(rt.color.Get(), rt.colorResolve.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, frameCommandList.Get()); rtRes = rt.colorResolve.Get(); } else { rtRes = rt.color.Get(); bstate = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; needsBarrier = true; } } ComPtr readbackBuf; D3D12_RESOURCE_DESC rtDesc = rtRes->GetDesc(); UINT64 textureByteSize = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT textureLayout = {}; device->GetCopyableFootprints(&rtDesc, 0, 1, 0, &textureLayout, nullptr, nullptr, &textureByteSize); D3D12_HEAP_PROPERTIES heapProp = {}; heapProp.Type = D3D12_HEAP_TYPE_READBACK; D3D12_RESOURCE_DESC bufDesc = {}; bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; bufDesc.Width = textureByteSize; bufDesc.Height = 1; bufDesc.DepthOrArraySize = 1; bufDesc.MipLevels = 1; bufDesc.Format = DXGI_FORMAT_UNKNOWN; bufDesc.SampleDesc.Count = 1; bufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; if (FAILED(device->CreateCommittedResource(&heapProp, D3D12_HEAP_FLAG_NONE, &bufDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&readbackBuf)))) { qWarning("Failed to create committed resource (readback buffer)"); return QImage(); } D3D12_TEXTURE_COPY_LOCATION dstLoc; dstLoc.pResource = readbackBuf.Get(); dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; dstLoc.PlacedFootprint = textureLayout; D3D12_TEXTURE_COPY_LOCATION srcLoc; srcLoc.pResource = rtRes; srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; srcLoc.SubresourceIndex = 0; ID3D12GraphicsCommandList *cl = frameCommandList.Get(); if (needsBarrier) transitionResource(rtRes, cl, bstate, D3D12_RESOURCE_STATE_COPY_SOURCE); cl->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); if (needsBarrier) transitionResource(rtRes, cl, D3D12_RESOURCE_STATE_COPY_SOURCE, bstate); cl->Close(); ID3D12CommandList *commandLists[] = { cl }; commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists); QScopedPointer f(createCPUWaitableFence()); waitForGPU(f.data()); // uh oh QImage::Format fmt = imageFormatForTexture(rtDesc.Format); if (fmt == QImage::Format_Invalid) { qWarning("Could not map render target format %d to a QImage format", rtDesc.Format); return QImage(); } QImage img(rtDesc.Width, rtDesc.Height, fmt); quint8 *p = nullptr; const D3D12_RANGE readRange = { 0, 0 }; if (FAILED(readbackBuf->Map(0, &readRange, reinterpret_cast(&p)))) { qWarning("Mapping the readback buffer failed"); return QImage(); } const int bpp = 4; // ### if (id == 0) { for (UINT y = 0; y < rtDesc.Height; ++y) { quint8 *dst = img.scanLine(y); memcpy(dst, p, rtDesc.Width * bpp); p += textureLayout.Footprint.RowPitch; } } else { for (int y = rtDesc.Height - 1; y >= 0; --y) { quint8 *dst = img.scanLine(y); memcpy(dst, p, rtDesc.Width * bpp); p += textureLayout.Footprint.RowPitch; } } readbackBuf->Unmap(0, nullptr); return img; } void QSGD3D12EnginePrivate::simulateDeviceLoss() { qWarning("QSGD3D12Engine: Triggering device loss via TDR"); devLossTest.killDevice(); } bool QSGD3D12EnginePrivate::DeviceLossTester::initialize(QSGD3D12EnginePrivate *enginePriv) { engine = enginePriv; #ifdef DEVLOSS_TEST D3D12_DESCRIPTOR_RANGE descRange[2]; descRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; descRange[0].NumDescriptors = 1; descRange[0].BaseShaderRegister = 0; descRange[0].RegisterSpace = 0; descRange[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; descRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; descRange[1].NumDescriptors = 1; descRange[1].BaseShaderRegister = 0; descRange[1].RegisterSpace = 0; descRange[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; D3D12_ROOT_PARAMETER param; param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; param.DescriptorTable.NumDescriptorRanges = 2; param.DescriptorTable.pDescriptorRanges = descRange; D3D12_ROOT_SIGNATURE_DESC desc = {}; desc.NumParameters = 1; desc.pParameters = ¶m; ComPtr signature; ComPtr error; if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) { QByteArray msg(static_cast(error->GetBufferPointer()), error->GetBufferSize()); qWarning("Failed to serialize compute root signature: %s", qPrintable(msg)); return false; } if (FAILED(engine->device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&computeRootSignature)))) { qWarning("Failed to create compute root signature"); return false; } D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; psoDesc.pRootSignature = computeRootSignature.Get(); psoDesc.CS.pShaderBytecode = g_timeout; psoDesc.CS.BytecodeLength = sizeof(g_timeout); if (FAILED(engine->device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&computeState)))) { qWarning("Failed to create compute pipeline state"); return false; } #endif return true; } void QSGD3D12EnginePrivate::DeviceLossTester::releaseResources() { computeState = nullptr; computeRootSignature = nullptr; } void QSGD3D12EnginePrivate::DeviceLossTester::killDevice() { #ifdef DEVLOSS_TEST ID3D12CommandAllocator *ca = engine->frameCommandAllocator[engine->frameIndex % engine->frameInFlightCount].Get(); ID3D12GraphicsCommandList *cl = engine->frameCommandList.Get(); cl->Reset(ca, computeState.Get()); cl->SetComputeRootSignature(computeRootSignature.Get()); cl->Dispatch(256, 1, 1); cl->Close(); ID3D12CommandList *commandLists[] = { cl }; engine->commandQueue->ExecuteCommandLists(_countof(commandLists), commandLists); engine->waitGPU(); #endif } void *QSGD3D12EnginePrivate::getResource(QSGRendererInterface::Resource resource) const { switch (resource) { case QSGRendererInterface::DeviceResource: return device; case QSGRendererInterface::CommandQueueResource: return commandQueue.Get(); case QSGRendererInterface::CommandListResource: return commandList; default: break; } return nullptr; } QT_END_NAMESPACE