diff options
author | Laszlo Agocs <laszlo.agocs@theqtcompany.com> | 2016-03-11 15:54:25 +0100 |
---|---|---|
committer | Laszlo Agocs <laszlo.agocs@theqtcompany.com> | 2016-03-15 11:55:09 +0000 |
commit | 843f72bd317f195ee31921acf8132074a7c50d4a (patch) | |
tree | 86bb7a4363fe4696864bc6f42de9f209417d0849 /src | |
parent | 9924cee26c8fecf5a2de3e07a117de588557b19b (diff) |
D3D12: Add mipmaps and smooth texture material
The smooth material is requested by antialiasing: true. This is
not the same as setting smooth: true which changes the filtering.
Mipmap support is only available for power-of-two textures for now.
Others will get scaled.
In addition, texture releases are now properly deferred and unnecessary
Wait() calls are avoided.
Change-Id: Ib9d909b1c4e2158564ab8bcfb933c088cf0da239
Reviewed-by: Andy Nichols <andy.nichols@theqtcompany.com>
Diffstat (limited to 'src')
12 files changed, 722 insertions, 134 deletions
diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials.cpp b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials.cpp index 34ca84e90f..d5f6246c5d 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials.cpp +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials.cpp @@ -45,6 +45,8 @@ #include "ps_smoothcolor.hlslh" #include "vs_texture.hlslh" #include "ps_texture.hlslh" +#include "vs_smoothtexture.hlslh" +#include "ps_smoothtexture.hlslh" QT_BEGIN_NAMESPACE @@ -244,4 +246,94 @@ QSGD3D12Material::UpdateResults QSGD3D12TextureMaterial::updatePipeline(const Re return r; } +QSGD3D12SmoothTextureMaterial::QSGD3D12SmoothTextureMaterial() +{ + setFlag(RequiresFullMatrixExceptTranslate, true); + setFlag(Blending, true); +} + +QSGMaterialType QSGD3D12SmoothTextureMaterial::mtype; + +QSGMaterialType *QSGD3D12SmoothTextureMaterial::type() const +{ + return &QSGD3D12SmoothTextureMaterial::mtype; +} + +int QSGD3D12SmoothTextureMaterial::compare(const QSGMaterial *other) const +{ + Q_ASSERT(other && type() == other->type()); + const QSGD3D12SmoothTextureMaterial *o = static_cast<const QSGD3D12SmoothTextureMaterial *>(other); + if (int diff = m_texture->textureId() - o->texture()->textureId()) + return diff; + return int(m_filtering) - int(o->m_filtering); +} + +static const int SMOOTH_TEXTURE_CB_SIZE_0 = 16 * sizeof(float); // float4x4 +static const int SMOOTH_TEXTURE_CB_SIZE_1 = sizeof(float); // float +static const int SMOOTH_TEXTURE_CB_SIZE_2 = 2 * sizeof(float); // float2 +static const int SMOOTH_TEXTURE_CB_SIZE = SMOOTH_TEXTURE_CB_SIZE_0 + SMOOTH_TEXTURE_CB_SIZE_1 + SMOOTH_TEXTURE_CB_SIZE_2; + +int QSGD3D12SmoothTextureMaterial::constantBufferSize() const +{ + return QSGD3D12Engine::alignedConstantBufferSize(SMOOTH_TEXTURE_CB_SIZE); +} + +void QSGD3D12SmoothTextureMaterial::preparePipeline(QSGD3D12ShaderState *shaders) +{ + shaders->vs = g_VS_SmoothTexture; + shaders->vsSize = sizeof(g_VS_SmoothTexture); + shaders->ps = g_PS_SmoothTexture; + shaders->psSize = sizeof(g_PS_SmoothTexture); + + shaders->rootSig.textureViews.resize(1); +} + +QSGD3D12Material::UpdateResults QSGD3D12SmoothTextureMaterial::updatePipeline(const RenderState &state, + QSGD3D12ShaderState *shaders, + quint8 *constantBuffer) +{ + QSGD3D12Material::UpdateResults r = 0; + quint8 *p = constantBuffer; + + if (state.isMatrixDirty()) { + memcpy(p, state.combinedMatrix().constData(), SMOOTH_TEXTURE_CB_SIZE_0); + r |= UpdatedConstantBuffer; + } + p += SMOOTH_TEXTURE_CB_SIZE_0; + + if (state.isOpacityDirty()) { + const float opacity = state.opacity(); + memcpy(p, &opacity, SMOOTH_TEXTURE_CB_SIZE_1); + r |= UpdatedConstantBuffer; + } + p += SMOOTH_TEXTURE_CB_SIZE_1; + + if (state.isMatrixDirty()) { + const QRect viewport = state.viewportRect(); + const float v[] = { 2.0f / viewport.width(), 2.0f / viewport.height() }; + memcpy(p, v, SMOOTH_TEXTURE_CB_SIZE_2); + r |= UpdatedConstantBuffer; + } + + Q_ASSERT(m_texture); + m_texture->setFiltering(m_filtering); + m_texture->setMipmapFiltering(m_mipmap_filtering); + m_texture->setHorizontalWrapMode(m_horizontal_wrap); + m_texture->setVerticalWrapMode(m_vertical_wrap); + + QSGD3D12TextureView &tv(shaders->rootSig.textureViews[0]); + if (m_filtering == QSGTexture::Linear) + tv.filter = m_mipmap_filtering == QSGTexture::Linear + ? QSGD3D12TextureView::FilterLinear : QSGD3D12TextureView::FilterMinMagLinearMipNearest; + else + tv.filter = m_mipmap_filtering == QSGTexture::Linear + ? QSGD3D12TextureView::FilterMinMagNearestMipLinear : QSGD3D12TextureView::FilterNearest; + tv.addressModeHoriz = m_horizontal_wrap == QSGTexture::ClampToEdge ? QSGD3D12TextureView::AddressClamp : QSGD3D12TextureView::AddressWrap; + tv.addressModeVert = m_vertical_wrap == QSGTexture::ClampToEdge ? QSGD3D12TextureView::AddressClamp : QSGD3D12TextureView::AddressWrap; + + m_texture->bind(); + + return r; +} + QT_END_NAMESPACE diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials_p.h b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials_p.h index 187d8fbd69..5ed65ed289 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials_p.h +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12builtinmaterials_p.h @@ -126,6 +126,45 @@ private: QSGTexture::WrapMode m_vertical_wrap = QSGTexture::ClampToEdge; }; +class QSGD3D12SmoothTextureMaterial : public QSGD3D12Material +{ +public: + QSGD3D12SmoothTextureMaterial(); + + QSGMaterialType *type() const override; + int compare(const QSGMaterial *other) const override; + + virtual int constantBufferSize() const override; + void preparePipeline(QSGD3D12ShaderState *shaders) override; + UpdateResults updatePipeline(const RenderState &state, + QSGD3D12ShaderState *shaders, + quint8 *constantBuffer) override; + + void setTexture(QSGTexture *texture) { m_texture = texture; } + QSGTexture *texture() const { return m_texture; } + + void setMipmapFiltering(QSGTexture::Filtering filter) { m_mipmap_filtering = filter; } + QSGTexture::Filtering mipmapFiltering() const { return m_mipmap_filtering; } + + void setFiltering(QSGTexture::Filtering filter) { m_filtering = filter; } + QSGTexture::Filtering filtering() const { return m_filtering; } + + void setHorizontalWrapMode(QSGTexture::WrapMode hwrap) { m_horizontal_wrap = hwrap; } + QSGTexture::WrapMode horizontalWrapMode() const { return m_horizontal_wrap; } + + void setVerticalWrapMode(QSGTexture::WrapMode vwrap) { m_vertical_wrap = vwrap; } + QSGTexture::WrapMode verticalWrapMode() const { return m_vertical_wrap; } + +private: + static QSGMaterialType mtype; + + QSGTexture *m_texture = nullptr; + QSGTexture::Filtering m_filtering = QSGTexture::Nearest; + QSGTexture::Filtering m_mipmap_filtering = QSGTexture::None; + QSGTexture::WrapMode m_horizontal_wrap = QSGTexture::ClampToEdge; + QSGTexture::WrapMode m_vertical_wrap = QSGTexture::ClampToEdge; +}; + QT_END_NAMESPACE #endif // QSGD3D12BUILTINMATERIALS_P_H diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine.cpp b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine.cpp index 5d83361ce5..bb1071cc28 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine.cpp +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine.cpp @@ -39,8 +39,10 @@ #include "qsgd3d12engine_p.h" #include "qsgd3d12engine_p_p.h" +#include "cs_mipmapgen.hlslh" #include <QString> #include <QColor> +#include <qmath.h> #include <QtCore/private/qsimd_p.h> QT_BEGIN_NAMESPACE @@ -58,7 +60,7 @@ static const int MAX_DRAW_CALLS_PER_LIST = 128; static const int MAX_CACHED_ROOTSIG = 16; static const int MAX_CACHED_PSO = 64; -static const int GPU_CBVSRVUAV_DESCRIPTORS = 256; +static const int GPU_CBVSRVUAV_DESCRIPTORS = 512; static const int BUCKETS_PER_HEAP = 8; // must match freeMap static const int DESCRIPTORS_PER_BUCKET = 32; // the bit map (freeMap) is quint32 @@ -392,9 +394,14 @@ void QSGD3D12Engine::waitGPU() d->waitGPU(); } -uint QSGD3D12Engine::createTexture(QImage::Format format, const QSize &size, TextureCreateFlags flags) +uint QSGD3D12Engine::genTexture() { - return d->createTexture(format, size, flags); + return d->genTexture(); +} + +void QSGD3D12Engine::createTextureAsync(uint id, const QImage &image, TextureCreateFlags flags) +{ + return d->createTextureAsync(id, image, flags); } void QSGD3D12Engine::releaseTexture(uint id) @@ -407,11 +414,6 @@ SIZE_T QSGD3D12Engine::textureSRV(uint id) const return d->textureSRV(id); } -void QSGD3D12Engine::queueTextureUpload(uint id, const QImage &image, TextureUploadFlags flags) -{ - return d->queueTextureUpload(id, image, flags); -} - void QSGD3D12Engine::activateTexture(uint id) { d->activateTexture(id); @@ -480,11 +482,40 @@ QSGD3D12Format QSGD3D12Engine::toDXGIFormat(QSGGeometry::Type sgtype, int tupleS return format; } +int QSGD3D12Engine::mipMapLevels(const QSize &size) +{ + return ceil(log2(qMax(size.width(), size.height()))) + 1; +} + +inline static bool isPowerOfTwo(int x) +{ + // Assumption: x >= 1 + return x == (x & -x); +} + +QSize QSGD3D12Engine::mipMapAdjustedSourceSize(const QSize &size) +{ + if (size.isEmpty()) + return size; + + QSize adjustedSize = size; + + // ### for now only power-of-two sizes are mipmap-capable + if (!isPowerOfTwo(size.width())) + adjustedSize.setWidth(qNextPowerOfTwo(size.width())); + if (!isPowerOfTwo(size.height())) + adjustedSize.setHeight(qNextPowerOfTwo(size.height())); + + return adjustedSize; +} + void QSGD3D12EnginePrivate::releaseResources() { if (!initialized) return; + mipmapper.releaseResources(); + commandList = nullptr; copyCommandList = nullptr; @@ -636,6 +667,9 @@ void QSGD3D12EnginePrivate::initialize(QWindow *w) psoCache.setMaxCost(MAX_CACHED_PSO); rootSigCache.setMaxCost(MAX_CACHED_ROOTSIG); + if (!mipmapper.initialize(this)) + return; + initialized = true; } @@ -816,6 +850,15 @@ void QSGD3D12EnginePrivate::transitionResource(ID3D12Resource *resource, ID3D12G commandList->ResourceBarrier(1, &barrier); } +void QSGD3D12EnginePrivate::uavBarrier(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList) const +{ + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = resource; + + commandList->ResourceBarrier(1, &barrier); +} + ID3D12Resource *QSGD3D12EnginePrivate::createBuffer(int size) { ID3D12Resource *buf; @@ -977,10 +1020,10 @@ void QSGD3D12EnginePrivate::beginFrame() // Do some texture upload bookkeeping. const quint64 finishedFrameIndex = frameIndex - MAX_FRAMES_IN_FLIGHT; // we know since we just blocked for this // pfd conveniently refers to the same slot that was used by that frame - if (!pfd.pendingTextures.isEmpty()) { + if (!pfd.pendingTextureUploads.isEmpty()) { if (Q_UNLIKELY(debug_render())) qDebug("Removing texture upload data for frame %d", finishedFrameIndex); - for (uint id : qAsConst(pfd.pendingTextures)) { + for (uint id : qAsConst(pfd.pendingTextureUploads)) { const int idx = id - 1; Texture &t(textures[idx]); if (t.fenceValue) { // may have been cleared by the previous frame @@ -990,11 +1033,17 @@ void QSGD3D12EnginePrivate::beginFrame() qDebug("Cleaned staging data for texture %u", id); } } - pfd.pendingTextures.clear(); + pfd.pendingTextureUploads.clear(); + if (!pfd.pendingTextureMipMap.isEmpty()) { + if (Q_UNLIKELY(debug_render())) + qDebug() << "cleaning mipmap generation data for " << pfd.pendingTextureMipMap; + // no special cleanup is needed as mipmap generation uses the frame's resources + pfd.pendingTextureMipMap.clear(); + } bool hasPending = false; for (int delta = 1; delta < MAX_FRAMES_IN_FLIGHT; ++delta) { const PersistentFrameData &prevFrameData(pframeData[(frameIndex - delta) % MAX_FRAMES_IN_FLIGHT]); - if (!prevFrameData.pendingTextures.isEmpty()) { + if (!prevFrameData.pendingTextureUploads.isEmpty()) { hasPending = true; break; } @@ -1005,12 +1054,24 @@ void QSGD3D12EnginePrivate::beginFrame() copyCommandAllocator->Reset(); } } + if (!pfd.pendingTextureReleases.isEmpty()) { + for (uint id : qAsConst(pfd.pendingTextureReleases)) { + Texture &t(textures[id - 1]); + t.entryInUse = false; // createTexture() can now reuse this entry + t.texture = nullptr; + } + pfd.pendingTextureReleases.clear(); + } // Do the deferred deletes. if (!pfd.deleteQueue.isEmpty()) { for (PersistentFrameData::DeleteQueueEntry &e : pfd.deleteQueue) { e.res = nullptr; - e.dh = nullptr; + e.descHeap = nullptr; + if (e.cpuDescriptorPtr) { + D3D12_CPU_DESCRIPTOR_HANDLE h = { e.cpuDescriptorPtr }; + cpuDescHeapManager.release(h, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } } pfd.deleteQueue.clear(); } @@ -1057,18 +1118,32 @@ void QSGD3D12EnginePrivate::endDrawCalls(bool needsBackbufferTransition) updateBuffer(&constantData, &pfd.constant, "constant"); // Add a wait on the 3D queue for the relevant texture uploads on the copy queue. - if (!pfd.pendingTextures.isEmpty()) { + if (!pfd.pendingTextureUploads.isEmpty()) { quint64 topFenceValue = 0; - for (uint id : qAsConst(pfd.pendingTextures)) { + for (uint id : qAsConst(pfd.pendingTextureUploads)) { const int idx = id - 1; Texture &t(textures[idx]); Q_ASSERT(t.fenceValue); + if (t.waitAdded) + continue; + t.waitAdded = true; if (t.fenceValue > topFenceValue) topFenceValue = t.fenceValue; + if (t.mipmap) + pfd.pendingTextureMipMap.insert(id); + } + if (topFenceValue) { + if (Q_UNLIKELY(debug_render())) + qDebug("added wait for texture fence %llu", topFenceValue); + commandQueue->Wait(textureUploadFence.Get(), topFenceValue); + // Generate mipmaps after the wait, when necessary. + if (!pfd.pendingTextureMipMap.isEmpty()) { + if (Q_UNLIKELY(debug_render())) + qDebug() << "starting mipmap generation for" << pfd.pendingTextureMipMap; + for (uint id : qAsConst(pfd.pendingTextureMipMap)) + mipmapper.queueGenerate(textures[id - 1]); + } } - if (Q_UNLIKELY(debug_render())) - qDebug("added wait for texture fence %llu", topFenceValue); - commandQueue->Wait(textureUploadFence.Get(), topFenceValue); } // Transition the backbuffer for present, if needed. @@ -1480,16 +1555,8 @@ void QSGD3D12EnginePrivate::queueDraw(QSGGeometry::DrawingMode mode, int count, // Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap. Q_ASSERT(tframeData.activeTextures.count() == tframeData.pipelineState.shaders.rootSig.textureViews.count()); if (!tframeData.activeTextures.isEmpty()) { + ensureGPUDescriptorHeap(tframeData.activeTextures.count()); const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - if (pfd.cbvSrvUavNextFreeDescriptorIndex + tframeData.activeTextures.count() > pfd.gpuCbvSrvUavHeapSize) { - const int newSize = pfd.gpuCbvSrvUavHeapSize * 2; - if (Q_UNLIKELY(debug_render())) - qDebug("Out of space for SRVs, creating new CBV-SRV-UAV descriptor heap with descriptor count %d", newSize); - pfd.deferredDelete(pfd.gpuCbvSrvUavHeap); - createCbvSrvUavHeap(currentPFrameIndex, newSize); - setDescriptorHeaps(true); - pfd.cbvSrvUavNextFreeDescriptorIndex = 0; - } D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart(); dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; for (uint id : qAsConst(tframeData.activeTextures)) { @@ -1529,6 +1596,22 @@ void QSGD3D12EnginePrivate::queueDraw(QSGGeometry::DrawingMode mode, int count, } } +void QSGD3D12EnginePrivate::ensureGPUDescriptorHeap(int cbvSrvUavDescriptorCount) +{ + PersistentFrameData &pfd(pframeData[currentPFrameIndex]); + int newSize = pfd.gpuCbvSrvUavHeapSize; + while (pfd.cbvSrvUavNextFreeDescriptorIndex + cbvSrvUavDescriptorCount > newSize) + newSize *= 2; + if (newSize != pfd.gpuCbvSrvUavHeapSize) { + if (Q_UNLIKELY(debug_render())) + qDebug("Out of space for SRVs, creating new CBV-SRV-UAV descriptor heap with descriptor count %d", newSize); + pfd.deferredDelete(pfd.gpuCbvSrvUavHeap); + createCbvSrvUavHeap(currentPFrameIndex, newSize); + setDescriptorHeaps(true); + pfd.cbvSrvUavNextFreeDescriptorIndex = 0; + } +} + void QSGD3D12EnginePrivate::present() { if (!initialized) @@ -1560,41 +1643,62 @@ void QSGD3D12EnginePrivate::waitGPU() waitForGPU(presentFence); } -uint QSGD3D12EnginePrivate::createTexture(QImage::Format format, const QSize &size, QSGD3D12Engine::TextureCreateFlags flags) +uint QSGD3D12EnginePrivate::genTexture() { - int id = 0; + uint id = 0; for (int i = 0; i < textures.count(); ++i) { - if (!textures[i].texture) { + if (!textures[i].entryInUse) { id = i + 1; break; } } + if (!id) { textures.resize(textures.size() + 1); id = textures.count(); } + Texture &t(textures[id - 1]); + t.entryInUse = true; + t.fenceValue = 0; + t.mipmap = t.waitAdded = false; + + return id; +} + +void QSGD3D12EnginePrivate::createTextureAsync(uint id, const QImage &image, QSGD3D12Engine::TextureCreateFlags flags) +{ + Q_ASSERT(id); const int idx = id - 1; + Q_ASSERT(idx < textures.count() && textures[idx].entryInUse); Texture &t(textures[idx]); + const bool alpha = flags & QSGD3D12Engine::CreateWithAlpha; + t.mipmap = flags & QSGD3D12Engine::CreateWithMipMaps; + t.waitAdded = false; + + const QSize adjustedSize = !t.mipmap ? image.size() : QSGD3D12Engine::mipMapAdjustedSourceSize(image.size()); + D3D12_HEAP_PROPERTIES defaultHeapProp = {}; defaultHeapProp.Type = D3D12_HEAP_TYPE_DEFAULT; D3D12_RESOURCE_DESC textureDesc = {}; textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - textureDesc.Width = size.width(); - textureDesc.Height = size.height(); + textureDesc.Width = adjustedSize.width(); + textureDesc.Height = adjustedSize.height(); textureDesc.DepthOrArraySize = 1; - textureDesc.MipLevels = 1; // ### - textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // ### use format + textureDesc.MipLevels = !t.mipmap ? 1 : QSGD3D12Engine::mipMapLevels(adjustedSize); + textureDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; textureDesc.SampleDesc.Count = 1; textureDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + if (t.mipmap) + textureDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; HRESULT hr = device->CreateCommittedResource(&defaultHeapProp, D3D12_HEAP_FLAG_NONE, &textureDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&t.texture)); if (FAILED(hr)) { qWarning("Failed to create texture resource: 0x%x", hr); - return 0; + return; } t.srv = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -1603,68 +1707,32 @@ uint QSGD3D12EnginePrivate::createTexture(QImage::Format format, const QSize &si srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srvDesc.Format = textureDesc.Format; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MipLevels = 1; // ### + srvDesc.Texture2D.MipLevels = textureDesc.MipLevels; device->CreateShaderResourceView(t.texture.Get(), &srvDesc, t.srv); - if (Q_UNLIKELY(debug_render())) - qDebug("allocated texture %d", id); - - return id; -} - -void QSGD3D12EnginePrivate::releaseTexture(uint id) -{ - if (!id) - return; - - const int idx = id - 1; - Q_ASSERT(idx < textures.count()); + if (t.mipmap) { + // Mipmap generation will need an UAV for each level that needs to be generated. + t.mipUAVs.clear(); + for (int level = 1; level < textureDesc.MipLevels; ++level) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = textureDesc.Format; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = level; + D3D12_CPU_DESCRIPTOR_HANDLE h = cpuDescHeapManager.allocate(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + device->CreateUnorderedAccessView(t.texture.Get(), nullptr, &uavDesc, h); + t.mipUAVs.append(h); + } + } if (Q_UNLIKELY(debug_render())) - qDebug("releasing texture %d", id); - - for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) - pframeData[i].pendingTextures.remove(id); - - Texture &t(textures[idx]); - t.texture = nullptr; - t.stagingBuffer = nullptr; - t.fenceValue = 0; - cpuDescHeapManager.release(t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); -} - -SIZE_T QSGD3D12EnginePrivate::textureSRV(uint id) const -{ - Q_ASSERT(id); - const int idx = id - 1; - Q_ASSERT(idx < textures.count()); - return textures[idx].srv.ptr; -} - -void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QImage &image, QSGD3D12Engine::TextureUploadFlags flags) -{ - Q_ASSERT(id); - const int idx = id - 1; - Q_ASSERT(idx < textures.count()); - - Texture &t(textures[idx]); - if (t.fenceValue) { - qWarning("queueTextureUpload: An upload is still active for texture %d", id); - return; - } - if (!t.texture) { - qWarning("queueTextureUpload: Attempted to upload for non-created texture %d", id); - return; - } + qDebug("created texture %d, size %dx%d, miplevels %d", id, adjustedSize.width(), adjustedSize.height(), textureDesc.MipLevels); t.fenceValue = nextTextureUploadFenceValue.fetchAndAddAcquire(1) + 1; - D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc(); UINT64 bufferSize; - const int TEXTURE_MIP_LEVELS = 1; // ### - D3D12_PLACED_SUBRESOURCE_FOOTPRINT textureLayout[TEXTURE_MIP_LEVELS]; - device->GetCopyableFootprints(&textureDesc, 0, TEXTURE_MIP_LEVELS, 0, textureLayout, nullptr, nullptr, &bufferSize); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT textureLayout; + device->GetCopyableFootprints(&textureDesc, 0, 1, 0, &textureLayout, nullptr, nullptr, &bufferSize); D3D12_RESOURCE_DESC bufDesc = {}; bufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; @@ -1685,7 +1753,11 @@ void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QImage &image, QSG return; } - QImage convImage = image.convertToFormat(QImage::Format_RGBA8888); // ### + // ### conversion and scaling are slow. the latter goes away once npot + // mipmap generation is supported. figure out something for the former. + QImage convImage = image.convertToFormat(alpha ? QImage::Format_RGBA8888_Premultiplied : QImage::Format_RGBX8888); + if (t.mipmap && adjustedSize != convImage.size()) + convImage = convImage.scaled(adjustedSize, Qt::IgnoreAspectRatio, Qt::SmoothTransformation); quint8 *p = nullptr; const D3D12_RANGE readRange = { 0, 0 }; @@ -1693,10 +1765,10 @@ void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QImage &image, QSG qWarning("Map failed (texture upload buffer)"); return; } - quint8 *lp = p + textureLayout[0].Offset; + quint8 *lp = p + textureLayout.Offset; for (uint y = 0; y < textureDesc.Height; ++y) { memcpy(lp, convImage.scanLine(y), convImage.width() * 4); - lp += textureLayout[0].Footprint.RowPitch; + lp += textureLayout.Footprint.RowPitch; } t.stagingBuffer->Unmap(0, nullptr); @@ -1709,7 +1781,7 @@ void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QImage &image, QSG D3D12_TEXTURE_COPY_LOCATION srcLoc; srcLoc.pResource = t.stagingBuffer.Get(); srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - srcLoc.PlacedFootprint = textureLayout[0]; + srcLoc.PlacedFootprint = textureLayout; copyCommandList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); copyCommandList->Close(); @@ -1718,6 +1790,44 @@ void QSGD3D12EnginePrivate::queueTextureUpload(uint id, const QImage &image, QSG copyCommandQueue->Signal(textureUploadFence.Get(), t.fenceValue); } +void QSGD3D12EnginePrivate::releaseTexture(uint id) +{ + // This function can safely be called outside begin-endFrame, even though + // it uses currentPFrameIndex. + + if (!id) + return; + + const int idx = id - 1; + Q_ASSERT(idx < textures.count()); + + if (Q_UNLIKELY(debug_render())) + qDebug("releasing texture %d", id); + + Texture &t(textures[idx]); + if (!t.entryInUse) + return; + + PersistentFrameData &pfd(pframeData[currentPFrameIndex]); + + if (t.texture) { + pfd.deferredDelete(t.texture); + pfd.deferredDelete(t.srv); + for (D3D12_CPU_DESCRIPTOR_HANDLE h : t.mipUAVs) + pfd.deferredDelete(h); + } + + pfd.pendingTextureReleases.insert(id); +} + +SIZE_T QSGD3D12EnginePrivate::textureSRV(uint id) const +{ + Q_ASSERT(id); + const int idx = id - 1; + Q_ASSERT(idx < textures.count() && textures[idx].entryInUse); + return textures[idx].srv.ptr; +} + void QSGD3D12EnginePrivate::activateTexture(uint id) { if (!inFrame) { @@ -1725,13 +1835,161 @@ void QSGD3D12EnginePrivate::activateTexture(uint id) return; } + Q_ASSERT(id); + const int idx = id - 1; + Q_ASSERT(idx < textures.count() && textures[idx].entryInUse); + + // activeTextures is a vector because the order matters tframeData.activeTextures.append(id); - PersistentFrameData &pfd(pframeData[currentPFrameIndex]); - const int idx = id - 1; - Q_ASSERT(idx < textures.count()); if (textures[idx].fenceValue) - pfd.pendingTextures.insert(id); + pframeData[currentPFrameIndex].pendingTextureUploads.insert(id); +} + +bool QSGD3D12EnginePrivate::MipMapGen::initialize(QSGD3D12EnginePrivate *enginePriv) +{ + engine = enginePriv; + + D3D12_STATIC_SAMPLER_DESC sampler = {}; + sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + sampler.MinLOD = 0.0f; + sampler.MaxLOD = D3D12_FLOAT32_MAX; + + D3D12_DESCRIPTOR_RANGE descRange[2]; + descRange[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + descRange[0].NumDescriptors = 1; + descRange[0].BaseShaderRegister = 0; // t0 + descRange[0].RegisterSpace = 0; + descRange[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + descRange[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + descRange[1].NumDescriptors = 4; + descRange[1].BaseShaderRegister = 0; // u0..u3 + descRange[1].RegisterSpace = 0; + descRange[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + // Split into two to allow switching between the first and second set of UAVs later. + D3D12_ROOT_PARAMETER rootParameters[3]; + rootParameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters[0].DescriptorTable.NumDescriptorRanges = 1; + rootParameters[0].DescriptorTable.pDescriptorRanges = &descRange[0]; + + rootParameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters[1].DescriptorTable.NumDescriptorRanges = 1; + rootParameters[1].DescriptorTable.pDescriptorRanges = &descRange[1]; + + rootParameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters[2].Constants.Num32BitValues = 4; // uint2 mip1Size, uint sampleLevel, uint totalMips + rootParameters[2].Constants.ShaderRegister = 0; // b0 + rootParameters[2].Constants.RegisterSpace = 0; + + D3D12_ROOT_SIGNATURE_DESC desc = {}; + desc.NumParameters = 3; + desc.pParameters = rootParameters; + desc.NumStaticSamplers = 1; + desc.pStaticSamplers = &sampler; + + ComPtr<ID3DBlob> signature; + ComPtr<ID3DBlob> error; + if (FAILED(D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &signature, &error))) { + QByteArray msg(static_cast<const char *>(error->GetBufferPointer()), error->GetBufferSize()); + qWarning("Failed to serialize compute root signature: %s", qPrintable(msg)); + return false; + } + if (FAILED(engine->device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), + IID_PPV_ARGS(&rootSig)))) { + qWarning("Failed to create compute root signature"); + return false; + } + + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = rootSig.Get(); + psoDesc.CS.pShaderBytecode = g_CS_Generate4MipMaps; + psoDesc.CS.BytecodeLength = sizeof(g_CS_Generate4MipMaps); + + if (FAILED(engine->device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&pipelineState)))) { + qWarning("Failed to create compute pipeline state"); + return false; + } + + return true; +} + +void QSGD3D12EnginePrivate::MipMapGen::releaseResources() +{ + pipelineState = nullptr; + rootSig = nullptr; +} + +// The mipmap generator is used to insert commands on the main 3D queue. It is +// guaranteed that the queue has a wait for the base texture level upload +// before invoking queueGenerate(). There can be any number of invocations +// without waiting for earlier ones to finish. finished() is invoked when it is +// known for sure that frame containing the upload and mipmap generation has +// finished on the GPU. + +void QSGD3D12EnginePrivate::MipMapGen::queueGenerate(const Texture &t) +{ + D3D12_RESOURCE_DESC textureDesc = t.texture->GetDesc(); + + engine->commandList->SetPipelineState(pipelineState.Get()); + engine->commandList->SetComputeRootSignature(rootSig.Get()); + + // 1 SRV + (miplevels - 1) UAVs + const int descriptorCount = 1 + (textureDesc.MipLevels - 1); + + engine->ensureGPUDescriptorHeap(descriptorCount); + + // The descriptor heap is set on the command list either because the + // ensure() call above resized, or, typically, due to a texture-dependent + // draw call earlier. + + engine->transitionResource(t.texture.Get(), engine->commandList.Get(), + D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + QSGD3D12EnginePrivate::PersistentFrameData &pfd(engine->pframeData[engine->currentPFrameIndex]); + + const uint stride = engine->cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE h = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart(); + h.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; + + engine->device->CopyDescriptorsSimple(1, h, t.srv, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + h.ptr += stride; + + for (int level = 1; level < textureDesc.MipLevels; ++level, h.ptr += stride) + engine->device->CopyDescriptorsSimple(1, h, t.mipUAVs[level - 1], D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + D3D12_GPU_DESCRIPTOR_HANDLE gpuAddr = pfd.gpuCbvSrvUavHeap->GetGPUDescriptorHandleForHeapStart(); + gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; + + engine->commandList->SetComputeRootDescriptorTable(0, gpuAddr); + gpuAddr.ptr += stride; // now points to the first UAV + + for (int level = 1; level < textureDesc.MipLevels; level += 4, gpuAddr.ptr += stride * 4) { + engine->commandList->SetComputeRootDescriptorTable(1, gpuAddr); + + QSize sz(textureDesc.Width, textureDesc.Height); + sz.setWidth(qMax(1, sz.width() >> level)); + sz.setHeight(qMax(1, sz.height() >> level)); + + const quint32 constants[4] = { quint32(sz.width()), quint32(sz.height()), + quint32(level - 1), + quint32(textureDesc.MipLevels - 1) }; + + engine->commandList->SetComputeRoot32BitConstants(2, 4, constants, 0); + engine->commandList->Dispatch(sz.width(), sz.height(), 1); + engine->uavBarrier(t.texture.Get(), engine->commandList.Get()); + } + + engine->transitionResource(t.texture.Get(), engine->commandList.Get(), + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + pfd.cbvSrvUavNextFreeDescriptorIndex += descriptorCount; } QT_END_NAMESPACE diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p.h b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p.h index a474bcbea4..49df43f3c0 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p.h +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p.h @@ -52,6 +52,7 @@ // #include <QWindow> +#include <QImage> #include <qsggeometry.h> QT_BEGIN_NAMESPACE @@ -295,6 +296,8 @@ public: static quint32 alignedConstantBufferSize(quint32 size); static QSGD3D12Format toDXGIFormat(QSGGeometry::Type sgtype, int tupleSize = 1, int *size = nullptr); + static int mipMapLevels(const QSize &size); + static QSize mipMapAdjustedSourceSize(const QSize &size); enum TextureCreateFlag { CreateWithAlpha = 0x1, @@ -302,15 +305,10 @@ public: }; Q_DECLARE_FLAGS(TextureCreateFlags, TextureCreateFlag) - enum TextureUploadFlag { - UploadWithMipMaps = 0x1 - }; - Q_DECLARE_FLAGS(TextureUploadFlags, TextureUploadFlag) - - uint createTexture(QImage::Format format, const QSize &size, TextureCreateFlags flags); + uint genTexture(); + void createTextureAsync(uint id, const QImage &image, TextureCreateFlags flags); void releaseTexture(uint id); SIZE_T textureSRV(uint id) const; - void queueTextureUpload(uint id, const QImage &image, TextureUploadFlags flags); void activateTexture(uint id); private: @@ -320,7 +318,6 @@ private: Q_DECLARE_OPERATORS_FOR_FLAGS(QSGD3D12Engine::ClearFlags) Q_DECLARE_OPERATORS_FOR_FLAGS(QSGD3D12Engine::TextureCreateFlags) -Q_DECLARE_OPERATORS_FOR_FLAGS(QSGD3D12Engine::TextureUploadFlags) QT_END_NAMESPACE diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p_p.h b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p_p.h index 675144f3ca..db73e0329d 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p_p.h +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12engine_p_p.h @@ -162,10 +162,10 @@ public: void present(); void waitGPU(); - uint createTexture(QImage::Format format, const QSize &size, QSGD3D12Engine::TextureCreateFlags flags); + uint genTexture(); + void createTextureAsync(uint id, const QImage &image, QSGD3D12Engine::TextureCreateFlags flags); void releaseTexture(uint id); SIZE_T textureSRV(uint id) const; - void queueTextureUpload(uint id, const QImage &image, QSGD3D12Engine::TextureUploadFlags flags); void activateTexture(uint id); // the device is intentionally hidden here. all resources have to go @@ -177,6 +177,7 @@ private: bool createCbvSrvUavHeap(int pframeIndex, int descriptorCount); void setDescriptorHeaps(bool force = false); + void ensureGPUDescriptorHeap(int cbvSrvUavDescriptorCount); DXGI_SAMPLE_DESC makeSampleDesc(DXGI_FORMAT format, int samples); ID3D12Resource *createDepthStencil(D3D12_CPU_DESCRIPTOR_HANDLE viewHandle, const QSize &size, int samples); @@ -187,6 +188,8 @@ private: void transitionResource(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList, D3D12_RESOURCE_STATES before, D3D12_RESOURCE_STATES after) const; + void uavBarrier(ID3D12Resource *resource, ID3D12GraphicsCommandList *commandList) const; + ID3D12Resource *createBuffer(int size); ID3D12Resource *backBufferRT() const; @@ -212,14 +215,18 @@ private: ComPtr<ID3D12DescriptorHeap> gpuCbvSrvUavHeap; int gpuCbvSrvUavHeapSize; int cbvSrvUavNextFreeDescriptorIndex; - QSet<uint> pendingTextures; + QSet<uint> pendingTextureUploads; + QSet<uint> pendingTextureMipMap; + QSet<uint> pendingTextureReleases; struct DeleteQueueEntry { ComPtr<ID3D12Resource> res; - ComPtr<ID3D12DescriptorHeap> dh; + ComPtr<ID3D12DescriptorHeap> descHeap; + SIZE_T cpuDescriptorPtr = 0; }; QVector<DeleteQueueEntry> deleteQueue; void deferredDelete(ComPtr<ID3D12Resource> res) { DeleteQueueEntry e; e.res = res; deleteQueue << e; } - void deferredDelete(ComPtr<ID3D12DescriptorHeap> dh) { DeleteQueueEntry e; e.dh = dh; deleteQueue << e; } + void deferredDelete(ComPtr<ID3D12DescriptorHeap> dh) { DeleteQueueEntry e; e.descHeap = dh; deleteQueue << e; } + void deferredDelete(D3D12_CPU_DESCRIPTOR_HANDLE h) { DeleteQueueEntry e; e.cpuDescriptorPtr = h.ptr; deleteQueue << e; } }; void markCPUBufferDirty(CPUBufferRef *dst, PersistentFrameData::ChangeTrackedBuffer *buf, int offset, int size); @@ -270,10 +277,14 @@ private: QCache<QSGD3D12RootSignature, RootSigCacheEntry> rootSigCache; struct Texture { + bool entryInUse = false; ComPtr<ID3D12Resource> texture; D3D12_CPU_DESCRIPTOR_HANDLE srv; quint64 fenceValue = 0; + bool waitAdded = false; ComPtr<ID3D12Resource> stagingBuffer; + QVector<D3D12_CPU_DESCRIPTOR_HANDLE> mipUAVs; + bool mipmap = false; }; QVector<Texture> textures; @@ -295,6 +306,18 @@ private: QSGD3D12PipelineState pipelineState; }; TransientFrameData tframeData; + + struct MipMapGen { + bool initialize(QSGD3D12EnginePrivate *enginePriv); + void releaseResources(); + void queueGenerate(const Texture &t); + + QSGD3D12EnginePrivate *engine; + ComPtr<ID3D12RootSignature> rootSig; + ComPtr<ID3D12PipelineState> pipelineState; + }; + + MipMapGen mipmapper; }; QT_END_NAMESPACE diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode.cpp b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode.cpp index 8fccced5f5..9bb360bc5e 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode.cpp +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode.cpp @@ -52,7 +52,7 @@ void QSGD3D12ImageNode::setFiltering(QSGTexture::Filtering filtering) return; m_material.setFiltering(filtering); - //m_smoothMaterial.setFiltering(filtering); + m_smoothMaterial.setFiltering(filtering); markDirty(DirtyMaterial); } @@ -62,7 +62,7 @@ void QSGD3D12ImageNode::setMipmapFiltering(QSGTexture::Filtering filtering) return; m_material.setMipmapFiltering(filtering); - //m_smoothMaterial.setMipmapFiltering(filtering); + m_smoothMaterial.setMipmapFiltering(filtering); markDirty(DirtyMaterial); } @@ -72,7 +72,7 @@ void QSGD3D12ImageNode::setVerticalWrapMode(QSGTexture::WrapMode wrapMode) return; m_material.setVerticalWrapMode(wrapMode); - //m_smoothMaterial.setVerticalWrapMode(wrapMode); + m_smoothMaterial.setVerticalWrapMode(wrapMode); markDirty(DirtyMaterial); } @@ -82,19 +82,22 @@ void QSGD3D12ImageNode::setHorizontalWrapMode(QSGTexture::WrapMode wrapMode) return; m_material.setHorizontalWrapMode(wrapMode); - //m_smoothMaterial.setHorizontalWrapMode(wrapMode); + m_smoothMaterial.setHorizontalWrapMode(wrapMode); markDirty(DirtyMaterial); } void QSGD3D12ImageNode::updateMaterialAntialiasing() { - //setMaterial(m_antialiasing ? &m_smoothMaterial : &m_material); + if (m_antialiasing) + setMaterial(&m_smoothMaterial); + else + setMaterial(&m_material); } void QSGD3D12ImageNode::setMaterialTexture(QSGTexture *texture) { m_material.setTexture(texture); -// m_smoothMaterial.setTexture(texture); + m_smoothMaterial.setTexture(texture); } QSGTexture *QSGD3D12ImageNode::materialTexture() const diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode_p.h b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode_p.h index 101c19ee8b..7f47f7daa4 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode_p.h +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12imagenode_p.h @@ -74,7 +74,7 @@ public: private: QSGD3D12TextureMaterial m_material; -// QSGSmoothTextureMaterial m_smoothMaterial; + QSGD3D12SmoothTextureMaterial m_smoothMaterial; }; QT_END_NAMESPACE diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture.cpp b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture.cpp index 06278be45f..c76e730a6a 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture.cpp +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture.cpp @@ -45,25 +45,21 @@ QT_BEGIN_NAMESPACE void QSGD3D12Texture::setImage(const QImage &image, uint flags) { - // ### mipmap, atlas + // ### atlas const bool alphaRequest = flags & QSGRenderContext::CreateTexture_Alpha; m_alphaWanted = alphaRequest && image.hasAlphaChannel(); + m_image = image; m_size = image.size(); - QSGD3D12Engine::TextureCreateFlags createFlags = 0; - if (m_alphaWanted) - createFlags |= QSGD3D12Engine::CreateWithAlpha; - - m_id = m_engine->createTexture(image.format(), image.size(), createFlags); - if (!m_id) { - qWarning("Failed to allocate texture of size %dx%d", image.width(), image.height()); - return; - } + m_id = m_engine->genTexture(); + Q_ASSERT(m_id); - QSGD3D12Engine::TextureUploadFlags uploadFlags = 0; - m_engine->queueTextureUpload(m_id, image, uploadFlags); + // We could kick off the texture creation and the async upload right here. + // Unfortunately we cannot tell at this stage if mipmaps will be enabled + // via an Image element's mipmap property...so defer to bind(). + m_createPending = true; } QSGD3D12Texture::~QSGD3D12Texture() @@ -89,7 +85,7 @@ bool QSGD3D12Texture::hasAlphaChannel() const bool QSGD3D12Texture::hasMipmaps() const { - return false; // ### + return mipmapFiltering() != QSGTexture::None; } QRectF QSGD3D12Texture::normalizedTextureSubRect() const @@ -109,10 +105,32 @@ QSGTexture *QSGD3D12Texture::removedFromAtlas() const void QSGD3D12Texture::bind() { - // Called when the texture material updates the pipeline state. Here we - // know that the texture is going to be used in the current frame by the - // next draw call. Notify the engine so that it can wait for possible - // pending uploads in endFrame() and set up the pipeline accordingly. + // Called when the texture material updates the pipeline state. + + if (!m_createPending && hasMipmaps() != m_createdWithMipMaps) { + m_engine->releaseTexture(m_id); + m_id = m_engine->genTexture(); + Q_ASSERT(m_id); + m_createPending = true; + } + + if (m_createPending) { + m_createPending = false; + + QSGD3D12Engine::TextureCreateFlags createFlags = 0; + if (m_alphaWanted) + createFlags |= QSGD3D12Engine::CreateWithAlpha; + + m_createdWithMipMaps = hasMipmaps(); + if (m_createdWithMipMaps) + createFlags |= QSGD3D12Engine::CreateWithMipMaps; + + m_engine->createTextureAsync(m_id, m_image, createFlags); + } + + // Here we know that the texture is going to be used in the current frame + // by the next draw call. Notify the engine so that it can wait for + // possible pending uploads and set up the pipeline accordingly. m_engine->activateTexture(m_id); } diff --git a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture_p.h b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture_p.h index 045d003f4a..35f65420fa 100644 --- a/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture_p.h +++ b/src/quick/scenegraph/adaptations/d3d12/qsgd3d12texture_p.h @@ -78,6 +78,9 @@ public: private: QSGD3D12Engine *m_engine; + QImage m_image; + bool m_createPending = false; + bool m_createdWithMipMaps = false; uint m_id = 0; bool m_alphaWanted = false; QSize m_size; diff --git a/src/quick/scenegraph/adaptations/d3d12/shaders/mipmapgen.hlsl b/src/quick/scenegraph/adaptations/d3d12/shaders/mipmapgen.hlsl new file mode 100644 index 0000000000..6793b534b0 --- /dev/null +++ b/src/quick/scenegraph/adaptations/d3d12/shaders/mipmapgen.hlsl @@ -0,0 +1,60 @@ +static const uint GROUP_DIM = 8; // 2 ^ (out_mip_count - 1) + +Texture2D tex : register(t0); +SamplerState samp : register(s0); + +cbuffer ConstantBuffer : register(b0) +{ + uint2 mip1Size; + uint sampleLevel; + uint totalMips; +} + +RWTexture2D<float4> mip1 : register(u0); +RWTexture2D<float4> mip2 : register(u1); +RWTexture2D<float4> mip3 : register(u2); +RWTexture2D<float4> mip4 : register(u3); + +groupshared float4 groupColor[GROUP_DIM][GROUP_DIM]; + +[numthreads(GROUP_DIM, GROUP_DIM, 1)] +void CS_Generate4MipMaps(uint3 localId: SV_GroupThreadId, uint3 globalId: SV_DispatchThreadID) +{ + const float2 coord = float2(1.0f / float(mip1Size.x), 1.0f / float(mip1Size.y)) * (globalId.xy + 0.5); + float4 c = tex.SampleLevel(samp, coord, sampleLevel); + + mip1[globalId.xy] = c; + groupColor[localId.y][localId.x] = c; + + if (sampleLevel + 1 >= totalMips) + return; + + GroupMemoryBarrierWithGroupSync(); + + if ((localId.x & 1) == 0 && (localId.y & 1) == 0) { + c = (c + groupColor[localId.y][localId.x + 1] + groupColor[localId.y + 1][localId.x] + groupColor[localId.y + 1][localId.x + 1]) / 4.0; + mip2[globalId.xy / 2] = c; + groupColor[localId.y][localId.x] = c; + } + + if (sampleLevel + 2 >= totalMips) + return; + + GroupMemoryBarrierWithGroupSync(); + + if ((localId.x & 3) == 0 && (localId.y & 3) == 0) { + c = (c + groupColor[localId.y][localId.x + 2] + groupColor[localId.y + 2][localId.x] + groupColor[localId.y + 2][localId.x + 2]) / 4.0; + mip3[globalId.xy / 4] = c; + groupColor[localId.y][localId.x] = c; + } + + if (sampleLevel + 3 >= totalMips) + return; + + GroupMemoryBarrierWithGroupSync(); + + if ((localId.x & 7) == 0 && (localId.y & 7) == 0) { + c = (c + groupColor[localId.y][localId.x + 3] + groupColor[localId.y + 3][localId.x] + groupColor[localId.y + 3][localId.x + 3]) / 4.0; + mip4[globalId.xy / 8] = c; + } +} diff --git a/src/quick/scenegraph/adaptations/d3d12/shaders/shaders.pri b/src/quick/scenegraph/adaptations/d3d12/shaders/shaders.pri index cc0a8cb15d..96b0c94565 100644 --- a/src/quick/scenegraph/adaptations/d3d12/shaders/shaders.pri +++ b/src/quick/scenegraph/adaptations/d3d12/shaders/shaders.pri @@ -38,10 +38,28 @@ texture_pshader.header = ps_texture.hlslh texture_pshader.entry = PS_Texture texture_pshader.type = ps_5_0 +smoothtexture_VSPS = $$PWD/smoothtexture.hlsl +smoothtexture_vshader.input = smoothtexture_VSPS +smoothtexture_vshader.header = vs_smoothtexture.hlslh +smoothtexture_vshader.entry = VS_SmoothTexture +smoothtexture_vshader.type = vs_5_0 +smoothtexture_pshader.input = smoothtexture_VSPS +smoothtexture_pshader.header = ps_smoothtexture.hlslh +smoothtexture_pshader.entry = PS_SmoothTexture +smoothtexture_pshader.type = ps_5_0 + +mipmapgen_CS = $$PWD/mipmapgen.hlsl +mipmapgen_cshader.input = mipmapgen_CS +mipmapgen_cshader.header = cs_mipmapgen.hlslh +mipmapgen_cshader.entry = CS_Generate4MipMaps +mipmapgen_cshader.type = cs_5_0 + HLSL_SHADERS = \ vertexcolor_vshader vertexcolor_pshader \ stencilclip_vshader stencilclip_pshader \ smoothcolor_vshader smoothcolor_pshader \ - texture_vshader texture_pshader + texture_vshader texture_pshader \ + smoothtexture_vshader smoothtexture_pshader \ + mipmapgen_cshader load(hlsl_bytecode_header) diff --git a/src/quick/scenegraph/adaptations/d3d12/shaders/smoothtexture.hlsl b/src/quick/scenegraph/adaptations/d3d12/shaders/smoothtexture.hlsl new file mode 100644 index 0000000000..05b1c6e9d4 --- /dev/null +++ b/src/quick/scenegraph/adaptations/d3d12/shaders/smoothtexture.hlsl @@ -0,0 +1,77 @@ +struct VSInput +{ + float4 position : POSITION; + float2 coord : TEXCOORD0; + float2 offset : TEXCOORD1; + float2 coordOffset : TEXCOORD2; +}; + +cbuffer ConstantBuffer : register(b0) +{ + float4x4 mvp; + float opacity; + float2 pixelSize; +}; + +struct PSInput +{ + float4 position : SV_POSITION; + float2 coord : TEXCOORD0; + float vertexOpacity : TEXCOORD3; +}; + +Texture2D tex : register(t0); +SamplerState samp : register(s0); + +PSInput VS_SmoothTexture(VSInput input) +{ + PSInput result; + + float4 pos = mul(mvp, input.position); + float2 coord = input.coord; + + if (input.offset.x != 0.0) { + // In HLSL matrix packing is column-major by default (which is good) but the math is row-major (unlike GLSL). + float4 delta = float4(mvp._11, mvp._21, mvp._31, mvp._41) * input.offset.x; + float2 dir = delta.xy * pos.w - pos.xy * delta.w; + float2 ndir = 0.5 * pixelSize * normalize(dir / pixelSize); + dir -= ndir * delta.w * pos.w; + float numerator = dot(dir, ndir * pos.w * pos.w); + float scale = 0.0; + if (numerator < 0.0) + scale = 1.0; + else + scale = min(1.0, numerator / dot(dir, dir)); + pos += scale * delta; + coord.x += scale * input.coordOffset.x; + } + + if (input.offset.y != 0.0) { + float4 delta = float4(mvp._12, mvp._22, mvp._32, mvp._42) * input.offset.y; + float2 dir = delta.xy * pos.w - pos.xy * delta.w; + float2 ndir = 0.5 * pixelSize * normalize(dir / pixelSize); + dir -= ndir * delta.w * pos.w; + float numerator = dot(dir, ndir * pos.w * pos.w); + float scale = 0.0; + if (numerator < 0.0) + scale = 1.0; + else + scale = min(1.0, numerator / dot(dir, dir)); + pos += scale * delta; + coord.y += scale * input.coordOffset.y; + } + + if ((input.offset.x != 0.0 || input.offset.y != 0.0) && (input.coordOffset.x == 0.0 && input.coordOffset.y == 0.0)) + result.vertexOpacity = 0.0; + else + result.vertexOpacity = opacity; + + result.position = pos; + result.coord = coord; + return result; +} + +float4 PS_SmoothTexture(PSInput input) : SV_TARGET +{ + return tex.Sample(samp, input.coord) * input.vertexOpacity; +} |