From b1101fce301e83fe5dcac3c735959378789a8d16 Mon Sep 17 00:00:00 2001 From: Laszlo Agocs Date: Wed, 30 Sep 2020 21:15:14 +0200 Subject: rhi: gl: Best uniform is no uniform Artificial tests with tens of thousands of meshes drawn with the same program (graphics pipeline) and almost the same uniform values (the difference being in the normal and model and view matrices) do benefit - strongly depending on the GL implementation probably - from doing fewer glUniformNf[v] calls. Building on the fact that uniform location values will typically be an int value starting at 0 (not guaranteed of course, we just skip the smartness in that case), we can dedicate a small 16K block to keep track of float/vec3/vec3/vec4 values in the graphics and compute pipeline objects. Change-Id: I217c31ccdeb511b3e8b8286078d7fbde399c8e3b Reviewed-by: Andy Nichols --- src/gui/rhi/qrhigles2.cpp | 79 ++++++++++++++++++++++++++++++++++++++++++--- src/gui/rhi/qrhigles2_p_p.h | 9 ++++++ 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp index 4c21804596..c9d92b4d71 100644 --- a/src/gui/rhi/qrhigles2.cpp +++ b/src/gui/rhi/qrhigles2.cpp @@ -2869,6 +2869,8 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, const char *bufView = bufD->ubuf + viewOffset; QGles2UniformDescriptionVector &uniforms(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniforms : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniforms); + QGles2UniformState *uniformState = maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniformState + : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniformState; for (const QGles2UniformDescription &uniform : qAsConst(uniforms)) { if (uniform.binding == b->binding) { // in a uniform buffer everything is at least 4 byte aligned @@ -2902,7 +2904,17 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform1f(uniform.glslLocation, *reinterpret_cast(src)); + const float v = *reinterpret_cast(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 1 || thisUniformState.v[0] != v) { + thisUniformState.componentCount = 1; + thisUniformState.v[0] = v; + f->glUniform1f(uniform.glslLocation, v); + } + } else { + f->glUniform1f(uniform.glslLocation, v); + } } else { // input is 16 bytes per element as per std140, have to convert to packed packedFloatArray.resize(elemCount); @@ -2915,7 +2927,21 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform2fv(uniform.glslLocation, 1, reinterpret_cast(src)); + const float *v = reinterpret_cast(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 2 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1]) + { + thisUniformState.componentCount = 2; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + f->glUniform2fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform2fv(uniform.glslLocation, 1, v); + } } else { packedFloatArray.resize(elemCount * 2); qrhi_std140_to_packed(packedFloatArray.data(), 2, elemCount, src); @@ -2927,7 +2953,23 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform3fv(uniform.glslLocation, 1, reinterpret_cast(src)); + const float *v = reinterpret_cast(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 3 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1] + || thisUniformState.v[2] != v[2]) + { + thisUniformState.componentCount = 3; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + thisUniformState.v[2] = v[2]; + f->glUniform3fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform3fv(uniform.glslLocation, 1, v); + } } else { packedFloatArray.resize(elemCount * 3); qrhi_std140_to_packed(packedFloatArray.data(), 3, elemCount, src); @@ -2936,7 +2978,32 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, } break; case QShaderDescription::Vec4: - f->glUniform4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast(src)); + { + const int elemCount = uniform.arrayDim; + if (elemCount < 1) { + const float *v = reinterpret_cast(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 4 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1] + || thisUniformState.v[2] != v[2] + || thisUniformState.v[3] != v[3]) + { + thisUniformState.componentCount = 4; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + thisUniformState.v[2] = v[2]; + thisUniformState.v[3] = v[3]; + f->glUniform4fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform4fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast(src)); + } + } break; case QShaderDescription::Mat2: f->glUniformMatrix2fv(uniform.glslLocation, 1, GL_FALSE, reinterpret_cast(src)); @@ -4491,6 +4558,8 @@ bool QGles2GraphicsPipeline::create() for (const QShaderDescription::InOutVariable &v : fsDesc.combinedImageSamplers()) rhiD->gatherSamplers(program, v, &samplers); + memset(uniformState, 0, sizeof(uniformState)); + generation += 1; rhiD->registerResource(this); return true; @@ -4562,6 +4631,8 @@ bool QGles2ComputePipeline::create() // storage images and buffers need no special steps here + memset(uniformState, 0, sizeof(uniformState)); + generation += 1; rhiD->registerResource(this); return true; diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h index 95bf85f0b4..07e0466b30 100644 --- a/src/gui/rhi/qrhigles2_p_p.h +++ b/src/gui/rhi/qrhigles2_p_p.h @@ -269,6 +269,13 @@ Q_DECLARE_TYPEINFO(QGles2SamplerDescription, Q_MOVABLE_TYPE); using QGles2UniformDescriptionVector = QVarLengthArray; using QGles2SamplerDescriptionVector = QVarLengthArray; +struct QGles2UniformState +{ + static constexpr int MAX_TRACKED_LOCATION = 1023; + int componentCount; + float v[4]; +}; + struct QGles2GraphicsPipeline : public QRhiGraphicsPipeline { QGles2GraphicsPipeline(QRhiImplementation *rhi); @@ -280,6 +287,7 @@ struct QGles2GraphicsPipeline : public QRhiGraphicsPipeline GLenum drawMode = GL_TRIANGLES; QGles2UniformDescriptionVector uniforms; QGles2SamplerDescriptionVector samplers; + QGles2UniformState uniformState[QGles2UniformState::MAX_TRACKED_LOCATION + 1]; uint generation = 0; friend class QRhiGles2; }; @@ -294,6 +302,7 @@ struct QGles2ComputePipeline : public QRhiComputePipeline GLuint program = 0; QGles2UniformDescriptionVector uniforms; QGles2SamplerDescriptionVector samplers; + QGles2UniformState uniformState[QGles2UniformState::MAX_TRACKED_LOCATION + 1]; uint generation = 0; friend class QRhiGles2; }; -- cgit v1.2.3