diff options
author | Laszlo Agocs <laszlo.agocs@theqtcompany.com> | 2016-05-26 12:03:19 +0200 |
---|---|---|
committer | Laszlo Agocs <laszlo.agocs@theqtcompany.com> | 2016-05-31 12:42:10 +0000 |
commit | ea22206baf208d3a811279b7fe5c1fe3396d0be3 (patch) | |
tree | 20109cd20dde8c25d0edb3229f42f8a22d4b76eb | |
parent | c9cffe61291d265e05f05087275daa6625b1aea2 (diff) |
D3D12: Drop QVectors from QSGD3D12Engine
Results in a more than 3x drop (and now below GL, as expected) in CPU usage
in the 500 Image elements test.
There is zero value in using a dynamic data structure for these anyhow,
apart from getting == and qHash. However, writing our own hash provides
further opportunities for optimizing, for instance there is not much point
in hashing the individual input elements since the same shader code pointers
imply that the input elements match too. Thus hashing becomes faster without
more collisions in practice.
Change-Id: Iae766bd44d30ec37080369c8b37677e633c37a88
Reviewed-by: Andy Nichols <andy.nichols@qt.io>
6 files changed, 58 insertions, 32 deletions
diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp index 382a08eea9..e17badf018 100644 --- a/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp +++ b/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp @@ -221,7 +221,7 @@ void QSGD3D12TextureMaterial::preparePipeline(QSGD3D12PipelineState *pipelineSta pipelineState->shaders.ps = g_PS_Texture; pipelineState->shaders.psSize = sizeof(g_PS_Texture); - pipelineState->shaders.rootSig.textureViews.resize(1); + pipelineState->shaders.rootSig.textureViewCount = 1; } QSGD3D12Material::UpdateResults QSGD3D12TextureMaterial::updatePipeline(const RenderState &state, @@ -304,7 +304,7 @@ void QSGD3D12SmoothTextureMaterial::preparePipeline(QSGD3D12PipelineState *pipel pipelineState->shaders.ps = g_PS_SmoothTexture; pipelineState->shaders.psSize = sizeof(g_PS_SmoothTexture); - pipelineState->shaders.rootSig.textureViews.resize(1); + pipelineState->shaders.rootSig.textureViewCount = 1; } QSGD3D12Material::UpdateResults QSGD3D12SmoothTextureMaterial::updatePipeline(const RenderState &state, @@ -479,7 +479,7 @@ void QSGD3D12TextMaterial::preparePipeline(QSGD3D12PipelineState *pipelineState) pipelineState->shaders.psSize = sizeof(g_PS_StyledText); } - pipelineState->shaders.rootSig.textureViews.resize(1); + pipelineState->shaders.rootSig.textureViewCount = 1; } QSGD3D12Material::UpdateResults QSGD3D12TextMaterial::updatePipeline(const RenderState &state, diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp index d123f31494..6cc38e573d 100644 --- a/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp +++ b/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp @@ -1414,6 +1414,7 @@ void QSGD3D12EnginePrivate::invalidateCachedFrameState() { tframeData.drawingMode = QSGGeometry::DrawingMode(-1); tframeData.currentIndexBuffer = 0; + tframeData.activeTextureCount = 0; tframeData.drawCount = 0; tframeData.lastPso = nullptr; tframeData.lastRootSig = nullptr; @@ -1601,13 +1602,13 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli rootParams[0].Descriptor.RegisterSpace = 0; ++rootParamCount; - if (!pipelineState.shaders.rootSig.textureViews.isEmpty()) { + if (pipelineState.shaders.rootSig.textureViewCount > 0) { rootParams[rootParamCount].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[rootParamCount].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; rootParams[rootParamCount].DescriptorTable.NumDescriptorRanges = 1; D3D12_DESCRIPTOR_RANGE descRange; descRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViews.count(); + descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViewCount; descRange.BaseShaderRegister = 0; // t0, t1, ... descRange.RegisterSpace = 0; descRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; @@ -1623,11 +1624,12 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli // that the number of static samplers has to match the number of // textures. This is not really ideal in general but works for Quick's use cases. // The shaders can still choose to declare and use fewer samplers, if they want to. - desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViews.count(); + desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViewCount; D3D12_STATIC_SAMPLER_DESC staticSamplers[8]; int sdIdx = 0; - Q_ASSERT(pipelineState.shaders.rootSig.textureViews.count() <= _countof(staticSamplers)); - for (const QSGD3D12TextureView &tv : qAsConst(pipelineState.shaders.rootSig.textureViews)) { + Q_ASSERT(pipelineState.shaders.rootSig.textureViewCount <= _countof(staticSamplers)); + for (int i = 0; i < pipelineState.shaders.rootSig.textureViewCount; ++i) { + const QSGD3D12TextureView &tv(pipelineState.shaders.rootSig.textureViews[i]); D3D12_STATIC_SAMPLER_DESC sd = {}; sd.Filter = D3D12_FILTER(tv.filter); sd.AddressU = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeHoriz); @@ -1666,10 +1668,10 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; - D3D12_INPUT_ELEMENT_DESC inputElements[8]; - Q_ASSERT(pipelineState.inputElements.count() <= _countof(inputElements)); + D3D12_INPUT_ELEMENT_DESC inputElements[QSGD3D12_MAX_INPUT_ELEMENTS]; int ieIdx = 0; - for (const QSGD3D12InputElement &ie : pipelineState.inputElements) { + for (int i = 0; i < pipelineState.inputElementCount; ++i) { + const QSGD3D12InputElement &ie(pipelineState.inputElements[i]); D3D12_INPUT_ELEMENT_DESC ieDesc = {}; ieDesc.SemanticName = ie.semanticName; ieDesc.SemanticIndex = ie.semanticIndex; @@ -1773,7 +1775,7 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli commandList->SetGraphicsRootSignature(tframeData.lastRootSig); } - if (!pipelineState.shaders.rootSig.textureViews.isEmpty()) + if (pipelineState.shaders.rootSig.textureViewCount > 0) setDescriptorHeaps(); } @@ -1980,14 +1982,15 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams ¶ms) } // Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap. - Q_ASSERT(tframeData.activeTextures.count() == tframeData.pipelineState.shaders.rootSig.textureViews.count()); - if (!tframeData.activeTextures.isEmpty()) { + Q_ASSERT(tframeData.activeTextureCount == tframeData.pipelineState.shaders.rootSig.textureViewCount); + if (tframeData.activeTextureCount > 0) { if (!skip) { - ensureGPUDescriptorHeap(tframeData.activeTextures.count()); + ensureGPUDescriptorHeap(tframeData.activeTextureCount); const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart(); dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; - for (const TransientFrameData::ActiveTexture &t : qAsConst(tframeData.activeTextures)) { + for (int i = 0; i < tframeData.activeTextureCount; ++i) { + const TransientFrameData::ActiveTexture &t(tframeData.activeTextures[i]); Q_ASSERT(t.id); const int idx = t.id - 1; const bool isTex = t.type == TransientFrameData::ActiveTexture::TypeTexture; @@ -2000,9 +2003,9 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams ¶ms) gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride; commandList->SetGraphicsRootDescriptorTable(1, gpuAddr); - pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextures.count(); + pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextureCount; } - tframeData.activeTextures.clear(); + tframeData.activeTextureCount = 0; } // Add the draw call. @@ -2593,8 +2596,10 @@ void QSGD3D12EnginePrivate::useTexture(uint id) const int idx = id - 1; Q_ASSERT(idx < textures.count() && textures[idx].entryInUse()); - // activeTextures is a vector because the order matters - tframeData.activeTextures.append(TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id)); + // Within one frame the order of calling this function determines the + // texture register (0, 1, ...) so fill up activeTextures accordingly. + tframeData.activeTextures[tframeData.activeTextureCount++] + = TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id); if (textures[idx].fenceValue) pframeData[currentPFrameIndex].pendingTextureUploads.insert(id); @@ -2890,7 +2895,8 @@ void QSGD3D12EnginePrivate::useRenderTargetAsTexture(uint id) transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } - tframeData.activeTextures.append(TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id)); + tframeData.activeTextures[tframeData.activeTextureCount++] = + TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id); } QImage QSGD3D12EnginePrivate::executeAndWaitReadbackRenderTarget(uint id) diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h index d4c0d0b9e3..094c152381 100644 --- a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h +++ b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h @@ -136,18 +136,26 @@ inline uint qHash(const QSGD3D12TextureView &key, uint seed = 0) return key.filter + key.addressModeHoriz + key.addressModeVert; } +const int QSGD3D12_MAX_TEXTURE_VIEWS = 8; + struct QSGD3D12RootSignature { - QVector<QSGD3D12TextureView> textureViews; + int textureViewCount = 0; + QSGD3D12TextureView textureViews[QSGD3D12_MAX_TEXTURE_VIEWS]; bool operator==(const QSGD3D12RootSignature &other) const { - return textureViews == other.textureViews; + if (textureViewCount != other.textureViewCount) + return false; + for (int i = 0; i < textureViewCount; ++i) + if (!(textureViews[i] == other.textureViews[i])) + return false; + return true; } }; inline uint qHash(const QSGD3D12RootSignature &key, uint seed = 0) { - return qHash(key.textureViews, seed); + return key.textureViewCount + (key.textureViewCount > 0 ? qHash(key.textureViews[0], seed) : 0); } // Shader bytecode blobs and root signature-related data. @@ -172,6 +180,8 @@ inline uint qHash(const QSGD3D12ShaderState &key, uint seed = 0) return qHash(key.vs, seed) + key.vsSize + qHash(key.ps, seed) + key.psSize + qHash(key.rootSig, seed); } +const int QSGD3D12_MAX_INPUT_ELEMENTS = 8; + struct QSGD3D12PipelineState { enum CullMode { @@ -216,7 +226,8 @@ struct QSGD3D12PipelineState QSGD3D12ShaderState shaders; - QVector<QSGD3D12InputElement> inputElements; + int inputElementCount = 0; + QSGD3D12InputElement inputElements[QSGD3D12_MAX_INPUT_ELEMENTS]; CullMode cullMode = CullNone; bool frontCCW = true; @@ -233,8 +244,8 @@ struct QSGD3D12PipelineState TopologyType topologyType = TopologyTypeTriangle; bool operator==(const QSGD3D12PipelineState &other) const { - return shaders == other.shaders - && inputElements == other.inputElements + bool eq = shaders == other.shaders + && inputElementCount == other.inputElementCount && cullMode == other.cullMode && frontCCW == other.frontCCW && colorWrite == other.colorWrite @@ -248,12 +259,21 @@ struct QSGD3D12PipelineState && (!stencilEnable || stencilDepthFailOp == other.stencilDepthFailOp) && (!stencilEnable || stencilPassOp == other.stencilPassOp) && topologyType == other.topologyType; + if (eq) { + for (int i = 0; i < inputElementCount; ++i) { + if (!(inputElements[i] == other.inputElements[i])) { + eq = false; + break; + } + } + } + return eq; } }; inline uint qHash(const QSGD3D12PipelineState &key, uint seed = 0) { - return qHash(key.shaders, seed) + qHash(key.inputElements, seed) + return qHash(key.shaders, seed) + key.inputElementCount + key.cullMode + key.frontCCW + key.colorWrite + key.blend + key.depthEnable + key.depthWrite diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h index d8a0166dc5..8a48bbb94e 100644 --- a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h +++ b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h @@ -350,7 +350,8 @@ private: ActiveTexture(Type type, uint id) : type(type), id(id) { } ActiveTexture() { } }; - QVector<ActiveTexture> activeTextures; + int activeTextureCount; + ActiveTexture activeTextures[QSGD3D12_MAX_TEXTURE_VIEWS]; int drawCount; ID3D12PipelineState *lastPso; ID3D12RootSignature *lastRootSig; diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp index f76927c0bd..e870d8a7f1 100644 --- a/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp +++ b/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp @@ -75,7 +75,6 @@ QSGD3D12Renderer::QSGD3D12Renderer(QSGRenderContext *context) m_cboData(4096) { setNodeUpdater(new DummyUpdater); - m_freshPipelineState.shaders.rootSig.textureViews.reserve(4); } QSGD3D12Renderer::~QSGD3D12Renderer() @@ -554,7 +553,7 @@ void QSGD3D12Renderer::renderElement(int elementIndex) void QSGD3D12Renderer::setInputLayout(const QSGGeometry *g, QSGD3D12PipelineState *pipelineState) { - pipelineState->inputElements.resize(g->attributeCount()); + pipelineState->inputElementCount = g->attributeCount(); const QSGGeometry::Attribute *attrs = g->attributes(); quint32 offset = 0; for (int i = 0; i < g->attributeCount(); ++i) { diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp index b4288c2ef5..e335fac0b0 100644 --- a/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp +++ b/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp @@ -318,7 +318,7 @@ void QSGD3D12ShaderEffectMaterial::preparePipeline(QSGD3D12PipelineState *pipeli pipelineState->shaders.ps = reinterpret_cast<const quint8 *>(linker.fs.constData()); pipelineState->shaders.psSize = linker.fs.size(); - pipelineState->shaders.rootSig.textureViews.resize(textureProviders.count()); + pipelineState->shaders.rootSig.textureViewCount = textureProviders.count(); } static inline QColor qsg_premultiply_color(const QColor &c) |