diff options
author | Laszlo Agocs <laszlo.agocs@qt.io> | 2020-09-30 21:15:14 +0200 |
---|---|---|
committer | Laszlo Agocs <laszlo.agocs@qt.io> | 2020-10-02 13:14:27 +0200 |
commit | b1101fce301e83fe5dcac3c735959378789a8d16 (patch) | |
tree | f0e039b43e85010ed17b9ec713d5ea31acae422f /src/gui/rhi | |
parent | 387a61adfb2c4e0ea2fd1c17f00c783e4bb6d3a0 (diff) |
rhi: gl: Best uniform is no uniform
Artificial tests with tens of thousands of meshes drawn with
the same program (graphics pipeline) and almost the same
uniform values (the difference being in the normal and model
and view matrices) do benefit - strongly depending on the GL
implementation probably - from doing fewer glUniformNf[v] calls.
Building on the fact that uniform location values will typically
be an int value starting at 0 (not guaranteed of course, we just
skip the smartness in that case), we can dedicate a small 16K
block to keep track of float/vec3/vec3/vec4 values in the graphics
and compute pipeline objects.
Change-Id: I217c31ccdeb511b3e8b8286078d7fbde399c8e3b
Reviewed-by: Andy Nichols <andy.nichols@qt.io>
Diffstat (limited to 'src/gui/rhi')
-rw-r--r-- | src/gui/rhi/qrhigles2.cpp | 79 | ||||
-rw-r--r-- | src/gui/rhi/qrhigles2_p_p.h | 9 |
2 files changed, 84 insertions, 4 deletions
diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp index 4c21804596..c9d92b4d71 100644 --- a/src/gui/rhi/qrhigles2.cpp +++ b/src/gui/rhi/qrhigles2.cpp @@ -2869,6 +2869,8 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, const char *bufView = bufD->ubuf + viewOffset; QGles2UniformDescriptionVector &uniforms(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniforms : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniforms); + QGles2UniformState *uniformState = maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniformState + : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniformState; for (const QGles2UniformDescription &uniform : qAsConst(uniforms)) { if (uniform.binding == b->binding) { // in a uniform buffer everything is at least 4 byte aligned @@ -2902,7 +2904,17 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform1f(uniform.glslLocation, *reinterpret_cast<const float *>(src)); + const float v = *reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 1 || thisUniformState.v[0] != v) { + thisUniformState.componentCount = 1; + thisUniformState.v[0] = v; + f->glUniform1f(uniform.glslLocation, v); + } + } else { + f->glUniform1f(uniform.glslLocation, v); + } } else { // input is 16 bytes per element as per std140, have to convert to packed packedFloatArray.resize(elemCount); @@ -2915,7 +2927,21 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform2fv(uniform.glslLocation, 1, reinterpret_cast<const float *>(src)); + const float *v = reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 2 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1]) + { + thisUniformState.componentCount = 2; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + f->glUniform2fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform2fv(uniform.glslLocation, 1, v); + } } else { packedFloatArray.resize(elemCount * 2); qrhi_std140_to_packed(packedFloatArray.data(), 2, elemCount, src); @@ -2927,7 +2953,23 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform3fv(uniform.glslLocation, 1, reinterpret_cast<const float *>(src)); + const float *v = reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 3 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1] + || thisUniformState.v[2] != v[2]) + { + thisUniformState.componentCount = 3; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + thisUniformState.v[2] = v[2]; + f->glUniform3fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform3fv(uniform.glslLocation, 1, v); + } } else { packedFloatArray.resize(elemCount * 3); qrhi_std140_to_packed(packedFloatArray.data(), 3, elemCount, src); @@ -2936,7 +2978,32 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, } break; case QShaderDescription::Vec4: - f->glUniform4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast<const float *>(src)); + { + const int elemCount = uniform.arrayDim; + if (elemCount < 1) { + const float *v = reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 4 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1] + || thisUniformState.v[2] != v[2] + || thisUniformState.v[3] != v[3]) + { + thisUniformState.componentCount = 4; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + thisUniformState.v[2] = v[2]; + thisUniformState.v[3] = v[3]; + f->glUniform4fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform4fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast<const float *>(src)); + } + } break; case QShaderDescription::Mat2: f->glUniformMatrix2fv(uniform.glslLocation, 1, GL_FALSE, reinterpret_cast<const float *>(src)); @@ -4491,6 +4558,8 @@ bool QGles2GraphicsPipeline::create() for (const QShaderDescription::InOutVariable &v : fsDesc.combinedImageSamplers()) rhiD->gatherSamplers(program, v, &samplers); + memset(uniformState, 0, sizeof(uniformState)); + generation += 1; rhiD->registerResource(this); return true; @@ -4562,6 +4631,8 @@ bool QGles2ComputePipeline::create() // storage images and buffers need no special steps here + memset(uniformState, 0, sizeof(uniformState)); + generation += 1; rhiD->registerResource(this); return true; diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h index 95bf85f0b4..07e0466b30 100644 --- a/src/gui/rhi/qrhigles2_p_p.h +++ b/src/gui/rhi/qrhigles2_p_p.h @@ -269,6 +269,13 @@ Q_DECLARE_TYPEINFO(QGles2SamplerDescription, Q_MOVABLE_TYPE); using QGles2UniformDescriptionVector = QVarLengthArray<QGles2UniformDescription, 8>; using QGles2SamplerDescriptionVector = QVarLengthArray<QGles2SamplerDescription, 4>; +struct QGles2UniformState +{ + static constexpr int MAX_TRACKED_LOCATION = 1023; + int componentCount; + float v[4]; +}; + struct QGles2GraphicsPipeline : public QRhiGraphicsPipeline { QGles2GraphicsPipeline(QRhiImplementation *rhi); @@ -280,6 +287,7 @@ struct QGles2GraphicsPipeline : public QRhiGraphicsPipeline GLenum drawMode = GL_TRIANGLES; QGles2UniformDescriptionVector uniforms; QGles2SamplerDescriptionVector samplers; + QGles2UniformState uniformState[QGles2UniformState::MAX_TRACKED_LOCATION + 1]; uint generation = 0; friend class QRhiGles2; }; @@ -294,6 +302,7 @@ struct QGles2ComputePipeline : public QRhiComputePipeline GLuint program = 0; QGles2UniformDescriptionVector uniforms; QGles2SamplerDescriptionVector samplers; + QGles2UniformState uniformState[QGles2UniformState::MAX_TRACKED_LOCATION + 1]; uint generation = 0; friend class QRhiGles2; }; |