aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLaszlo Agocs <laszlo.agocs@theqtcompany.com>2016-05-26 12:03:19 +0200
committerLaszlo Agocs <laszlo.agocs@theqtcompany.com>2016-05-31 12:42:10 +0000
commitea22206baf208d3a811279b7fe5c1fe3396d0be3 (patch)
tree20109cd20dde8c25d0edb3229f42f8a22d4b76eb
parentc9cffe61291d265e05f05087275daa6625b1aea2 (diff)
D3D12: Drop QVectors from QSGD3D12Engine
Results in a more than 3x drop (and now below GL, as expected) in CPU usage in the 500 Image elements test. There is zero value in using a dynamic data structure for these anyhow, apart from getting == and qHash. However, writing our own hash provides further opportunities for optimizing, for instance there is not much point in hashing the individual input elements since the same shader code pointers imply that the input elements match too. Thus hashing becomes faster without more collisions in practice. Change-Id: Iae766bd44d30ec37080369c8b37677e633c37a88 Reviewed-by: Andy Nichols <andy.nichols@qt.io>
-rw-r--r--src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp6
-rw-r--r--src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp42
-rw-r--r--src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h34
-rw-r--r--src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h3
-rw-r--r--src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp3
-rw-r--r--src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp2
6 files changed, 58 insertions, 32 deletions
diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp
index 382a08eea9..e17badf018 100644
--- a/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp
+++ b/src/plugins/scenegraph/d3d12/qsgd3d12builtinmaterials.cpp
@@ -221,7 +221,7 @@ void QSGD3D12TextureMaterial::preparePipeline(QSGD3D12PipelineState *pipelineSta
pipelineState->shaders.ps = g_PS_Texture;
pipelineState->shaders.psSize = sizeof(g_PS_Texture);
- pipelineState->shaders.rootSig.textureViews.resize(1);
+ pipelineState->shaders.rootSig.textureViewCount = 1;
}
QSGD3D12Material::UpdateResults QSGD3D12TextureMaterial::updatePipeline(const RenderState &state,
@@ -304,7 +304,7 @@ void QSGD3D12SmoothTextureMaterial::preparePipeline(QSGD3D12PipelineState *pipel
pipelineState->shaders.ps = g_PS_SmoothTexture;
pipelineState->shaders.psSize = sizeof(g_PS_SmoothTexture);
- pipelineState->shaders.rootSig.textureViews.resize(1);
+ pipelineState->shaders.rootSig.textureViewCount = 1;
}
QSGD3D12Material::UpdateResults QSGD3D12SmoothTextureMaterial::updatePipeline(const RenderState &state,
@@ -479,7 +479,7 @@ void QSGD3D12TextMaterial::preparePipeline(QSGD3D12PipelineState *pipelineState)
pipelineState->shaders.psSize = sizeof(g_PS_StyledText);
}
- pipelineState->shaders.rootSig.textureViews.resize(1);
+ pipelineState->shaders.rootSig.textureViewCount = 1;
}
QSGD3D12Material::UpdateResults QSGD3D12TextMaterial::updatePipeline(const RenderState &state,
diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp
index d123f31494..6cc38e573d 100644
--- a/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp
+++ b/src/plugins/scenegraph/d3d12/qsgd3d12engine.cpp
@@ -1414,6 +1414,7 @@ void QSGD3D12EnginePrivate::invalidateCachedFrameState()
{
tframeData.drawingMode = QSGGeometry::DrawingMode(-1);
tframeData.currentIndexBuffer = 0;
+ tframeData.activeTextureCount = 0;
tframeData.drawCount = 0;
tframeData.lastPso = nullptr;
tframeData.lastRootSig = nullptr;
@@ -1601,13 +1602,13 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
rootParams[0].Descriptor.RegisterSpace = 0;
++rootParamCount;
- if (!pipelineState.shaders.rootSig.textureViews.isEmpty()) {
+ if (pipelineState.shaders.rootSig.textureViewCount > 0) {
rootParams[rootParamCount].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParams[rootParamCount].ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
rootParams[rootParamCount].DescriptorTable.NumDescriptorRanges = 1;
D3D12_DESCRIPTOR_RANGE descRange;
descRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
- descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViews.count();
+ descRange.NumDescriptors = pipelineState.shaders.rootSig.textureViewCount;
descRange.BaseShaderRegister = 0; // t0, t1, ...
descRange.RegisterSpace = 0;
descRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
@@ -1623,11 +1624,12 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
// that the number of static samplers has to match the number of
// textures. This is not really ideal in general but works for Quick's use cases.
// The shaders can still choose to declare and use fewer samplers, if they want to.
- desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViews.count();
+ desc.NumStaticSamplers = pipelineState.shaders.rootSig.textureViewCount;
D3D12_STATIC_SAMPLER_DESC staticSamplers[8];
int sdIdx = 0;
- Q_ASSERT(pipelineState.shaders.rootSig.textureViews.count() <= _countof(staticSamplers));
- for (const QSGD3D12TextureView &tv : qAsConst(pipelineState.shaders.rootSig.textureViews)) {
+ Q_ASSERT(pipelineState.shaders.rootSig.textureViewCount <= _countof(staticSamplers));
+ for (int i = 0; i < pipelineState.shaders.rootSig.textureViewCount; ++i) {
+ const QSGD3D12TextureView &tv(pipelineState.shaders.rootSig.textureViews[i]);
D3D12_STATIC_SAMPLER_DESC sd = {};
sd.Filter = D3D12_FILTER(tv.filter);
sd.AddressU = D3D12_TEXTURE_ADDRESS_MODE(tv.addressModeHoriz);
@@ -1666,10 +1668,10 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
- D3D12_INPUT_ELEMENT_DESC inputElements[8];
- Q_ASSERT(pipelineState.inputElements.count() <= _countof(inputElements));
+ D3D12_INPUT_ELEMENT_DESC inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
int ieIdx = 0;
- for (const QSGD3D12InputElement &ie : pipelineState.inputElements) {
+ for (int i = 0; i < pipelineState.inputElementCount; ++i) {
+ const QSGD3D12InputElement &ie(pipelineState.inputElements[i]);
D3D12_INPUT_ELEMENT_DESC ieDesc = {};
ieDesc.SemanticName = ie.semanticName;
ieDesc.SemanticIndex = ie.semanticIndex;
@@ -1773,7 +1775,7 @@ void QSGD3D12EnginePrivate::finalizePipeline(const QSGD3D12PipelineState &pipeli
commandList->SetGraphicsRootSignature(tframeData.lastRootSig);
}
- if (!pipelineState.shaders.rootSig.textureViews.isEmpty())
+ if (pipelineState.shaders.rootSig.textureViewCount > 0)
setDescriptorHeaps();
}
@@ -1980,14 +1982,15 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams &params)
}
// Copy the SRVs to a drawcall-dedicated area of the shader-visible descriptor heap.
- Q_ASSERT(tframeData.activeTextures.count() == tframeData.pipelineState.shaders.rootSig.textureViews.count());
- if (!tframeData.activeTextures.isEmpty()) {
+ Q_ASSERT(tframeData.activeTextureCount == tframeData.pipelineState.shaders.rootSig.textureViewCount);
+ if (tframeData.activeTextureCount > 0) {
if (!skip) {
- ensureGPUDescriptorHeap(tframeData.activeTextures.count());
+ ensureGPUDescriptorHeap(tframeData.activeTextureCount);
const uint stride = cpuDescHeapManager.handleSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
D3D12_CPU_DESCRIPTOR_HANDLE dst = pfd.gpuCbvSrvUavHeap->GetCPUDescriptorHandleForHeapStart();
dst.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
- for (const TransientFrameData::ActiveTexture &t : qAsConst(tframeData.activeTextures)) {
+ for (int i = 0; i < tframeData.activeTextureCount; ++i) {
+ const TransientFrameData::ActiveTexture &t(tframeData.activeTextures[i]);
Q_ASSERT(t.id);
const int idx = t.id - 1;
const bool isTex = t.type == TransientFrameData::ActiveTexture::TypeTexture;
@@ -2000,9 +2003,9 @@ void QSGD3D12EnginePrivate::queueDraw(const QSGD3D12Engine::DrawParams &params)
gpuAddr.ptr += pfd.cbvSrvUavNextFreeDescriptorIndex * stride;
commandList->SetGraphicsRootDescriptorTable(1, gpuAddr);
- pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextures.count();
+ pfd.cbvSrvUavNextFreeDescriptorIndex += tframeData.activeTextureCount;
}
- tframeData.activeTextures.clear();
+ tframeData.activeTextureCount = 0;
}
// Add the draw call.
@@ -2593,8 +2596,10 @@ void QSGD3D12EnginePrivate::useTexture(uint id)
const int idx = id - 1;
Q_ASSERT(idx < textures.count() && textures[idx].entryInUse());
- // activeTextures is a vector because the order matters
- tframeData.activeTextures.append(TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id));
+ // Within one frame the order of calling this function determines the
+ // texture register (0, 1, ...) so fill up activeTextures accordingly.
+ tframeData.activeTextures[tframeData.activeTextureCount++]
+ = TransientFrameData::ActiveTexture(TransientFrameData::ActiveTexture::TypeTexture, id);
if (textures[idx].fenceValue)
pframeData[currentPFrameIndex].pendingTextureUploads.insert(id);
@@ -2890,7 +2895,8 @@ void QSGD3D12EnginePrivate::useRenderTargetAsTexture(uint id)
transitionResource(rt.color.Get(), commandList, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
}
- tframeData.activeTextures.append(TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id));
+ tframeData.activeTextures[tframeData.activeTextureCount++] =
+ TransientFrameData::ActiveTexture::ActiveTexture(TransientFrameData::ActiveTexture::TypeRenderTarget, id);
}
QImage QSGD3D12EnginePrivate::executeAndWaitReadbackRenderTarget(uint id)
diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h
index d4c0d0b9e3..094c152381 100644
--- a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h
+++ b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p.h
@@ -136,18 +136,26 @@ inline uint qHash(const QSGD3D12TextureView &key, uint seed = 0)
return key.filter + key.addressModeHoriz + key.addressModeVert;
}
+const int QSGD3D12_MAX_TEXTURE_VIEWS = 8;
+
struct QSGD3D12RootSignature
{
- QVector<QSGD3D12TextureView> textureViews;
+ int textureViewCount = 0;
+ QSGD3D12TextureView textureViews[QSGD3D12_MAX_TEXTURE_VIEWS];
bool operator==(const QSGD3D12RootSignature &other) const {
- return textureViews == other.textureViews;
+ if (textureViewCount != other.textureViewCount)
+ return false;
+ for (int i = 0; i < textureViewCount; ++i)
+ if (!(textureViews[i] == other.textureViews[i]))
+ return false;
+ return true;
}
};
inline uint qHash(const QSGD3D12RootSignature &key, uint seed = 0)
{
- return qHash(key.textureViews, seed);
+ return key.textureViewCount + (key.textureViewCount > 0 ? qHash(key.textureViews[0], seed) : 0);
}
// Shader bytecode blobs and root signature-related data.
@@ -172,6 +180,8 @@ inline uint qHash(const QSGD3D12ShaderState &key, uint seed = 0)
return qHash(key.vs, seed) + key.vsSize + qHash(key.ps, seed) + key.psSize + qHash(key.rootSig, seed);
}
+const int QSGD3D12_MAX_INPUT_ELEMENTS = 8;
+
struct QSGD3D12PipelineState
{
enum CullMode {
@@ -216,7 +226,8 @@ struct QSGD3D12PipelineState
QSGD3D12ShaderState shaders;
- QVector<QSGD3D12InputElement> inputElements;
+ int inputElementCount = 0;
+ QSGD3D12InputElement inputElements[QSGD3D12_MAX_INPUT_ELEMENTS];
CullMode cullMode = CullNone;
bool frontCCW = true;
@@ -233,8 +244,8 @@ struct QSGD3D12PipelineState
TopologyType topologyType = TopologyTypeTriangle;
bool operator==(const QSGD3D12PipelineState &other) const {
- return shaders == other.shaders
- && inputElements == other.inputElements
+ bool eq = shaders == other.shaders
+ && inputElementCount == other.inputElementCount
&& cullMode == other.cullMode
&& frontCCW == other.frontCCW
&& colorWrite == other.colorWrite
@@ -248,12 +259,21 @@ struct QSGD3D12PipelineState
&& (!stencilEnable || stencilDepthFailOp == other.stencilDepthFailOp)
&& (!stencilEnable || stencilPassOp == other.stencilPassOp)
&& topologyType == other.topologyType;
+ if (eq) {
+ for (int i = 0; i < inputElementCount; ++i) {
+ if (!(inputElements[i] == other.inputElements[i])) {
+ eq = false;
+ break;
+ }
+ }
+ }
+ return eq;
}
};
inline uint qHash(const QSGD3D12PipelineState &key, uint seed = 0)
{
- return qHash(key.shaders, seed) + qHash(key.inputElements, seed)
+ return qHash(key.shaders, seed) + key.inputElementCount
+ key.cullMode + key.frontCCW
+ key.colorWrite + key.blend
+ key.depthEnable + key.depthWrite
diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h
index d8a0166dc5..8a48bbb94e 100644
--- a/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h
+++ b/src/plugins/scenegraph/d3d12/qsgd3d12engine_p_p.h
@@ -350,7 +350,8 @@ private:
ActiveTexture(Type type, uint id) : type(type), id(id) { }
ActiveTexture() { }
};
- QVector<ActiveTexture> activeTextures;
+ int activeTextureCount;
+ ActiveTexture activeTextures[QSGD3D12_MAX_TEXTURE_VIEWS];
int drawCount;
ID3D12PipelineState *lastPso;
ID3D12RootSignature *lastRootSig;
diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp
index f76927c0bd..e870d8a7f1 100644
--- a/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp
+++ b/src/plugins/scenegraph/d3d12/qsgd3d12renderer.cpp
@@ -75,7 +75,6 @@ QSGD3D12Renderer::QSGD3D12Renderer(QSGRenderContext *context)
m_cboData(4096)
{
setNodeUpdater(new DummyUpdater);
- m_freshPipelineState.shaders.rootSig.textureViews.reserve(4);
}
QSGD3D12Renderer::~QSGD3D12Renderer()
@@ -554,7 +553,7 @@ void QSGD3D12Renderer::renderElement(int elementIndex)
void QSGD3D12Renderer::setInputLayout(const QSGGeometry *g, QSGD3D12PipelineState *pipelineState)
{
- pipelineState->inputElements.resize(g->attributeCount());
+ pipelineState->inputElementCount = g->attributeCount();
const QSGGeometry::Attribute *attrs = g->attributes();
quint32 offset = 0;
for (int i = 0; i < g->attributeCount(); ++i) {
diff --git a/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp b/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp
index b4288c2ef5..e335fac0b0 100644
--- a/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp
+++ b/src/plugins/scenegraph/d3d12/qsgd3d12shadereffectnode.cpp
@@ -318,7 +318,7 @@ void QSGD3D12ShaderEffectMaterial::preparePipeline(QSGD3D12PipelineState *pipeli
pipelineState->shaders.ps = reinterpret_cast<const quint8 *>(linker.fs.constData());
pipelineState->shaders.psSize = linker.fs.size();
- pipelineState->shaders.rootSig.textureViews.resize(textureProviders.count());
+ pipelineState->shaders.rootSig.textureViewCount = textureProviders.count();
}
static inline QColor qsg_premultiply_color(const QColor &c)