summaryrefslogtreecommitdiffstats
path: root/src/gui/rhi
diff options
context:
space:
mode:
authorLaszlo Agocs <laszlo.agocs@qt.io>2020-09-30 21:15:14 +0200
committerLaszlo Agocs <laszlo.agocs@qt.io>2020-10-02 13:14:27 +0200
commitb1101fce301e83fe5dcac3c735959378789a8d16 (patch)
treef0e039b43e85010ed17b9ec713d5ea31acae422f /src/gui/rhi
parent387a61adfb2c4e0ea2fd1c17f00c783e4bb6d3a0 (diff)
rhi: gl: Best uniform is no uniform
Artificial tests with tens of thousands of meshes drawn with the same program (graphics pipeline) and almost the same uniform values (the difference being in the normal and model and view matrices) do benefit - strongly depending on the GL implementation probably - from doing fewer glUniformNf[v] calls. Building on the fact that uniform location values will typically be an int value starting at 0 (not guaranteed of course, we just skip the smartness in that case), we can dedicate a small 16K block to keep track of float/vec3/vec3/vec4 values in the graphics and compute pipeline objects. Change-Id: I217c31ccdeb511b3e8b8286078d7fbde399c8e3b Reviewed-by: Andy Nichols <andy.nichols@qt.io>
Diffstat (limited to 'src/gui/rhi')
-rw-r--r--src/gui/rhi/qrhigles2.cpp79
-rw-r--r--src/gui/rhi/qrhigles2_p_p.h9
2 files changed, 84 insertions, 4 deletions
diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp
index 4c21804596..c9d92b4d71 100644
--- a/src/gui/rhi/qrhigles2.cpp
+++ b/src/gui/rhi/qrhigles2.cpp
@@ -2869,6 +2869,8 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD,
const char *bufView = bufD->ubuf + viewOffset;
QGles2UniformDescriptionVector &uniforms(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniforms
: QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniforms);
+ QGles2UniformState *uniformState = maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniformState
+ : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniformState;
for (const QGles2UniformDescription &uniform : qAsConst(uniforms)) {
if (uniform.binding == b->binding) {
// in a uniform buffer everything is at least 4 byte aligned
@@ -2902,7 +2904,17 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD,
{
const int elemCount = uniform.arrayDim;
if (elemCount < 1) {
- f->glUniform1f(uniform.glslLocation, *reinterpret_cast<const float *>(src));
+ const float v = *reinterpret_cast<const float *>(src);
+ if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) {
+ QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]);
+ if (thisUniformState.componentCount != 1 || thisUniformState.v[0] != v) {
+ thisUniformState.componentCount = 1;
+ thisUniformState.v[0] = v;
+ f->glUniform1f(uniform.glslLocation, v);
+ }
+ } else {
+ f->glUniform1f(uniform.glslLocation, v);
+ }
} else {
// input is 16 bytes per element as per std140, have to convert to packed
packedFloatArray.resize(elemCount);
@@ -2915,7 +2927,21 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD,
{
const int elemCount = uniform.arrayDim;
if (elemCount < 1) {
- f->glUniform2fv(uniform.glslLocation, 1, reinterpret_cast<const float *>(src));
+ const float *v = reinterpret_cast<const float *>(src);
+ if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) {
+ QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]);
+ if (thisUniformState.componentCount != 2
+ || thisUniformState.v[0] != v[0]
+ || thisUniformState.v[1] != v[1])
+ {
+ thisUniformState.componentCount = 2;
+ thisUniformState.v[0] = v[0];
+ thisUniformState.v[1] = v[1];
+ f->glUniform2fv(uniform.glslLocation, 1, v);
+ }
+ } else {
+ f->glUniform2fv(uniform.glslLocation, 1, v);
+ }
} else {
packedFloatArray.resize(elemCount * 2);
qrhi_std140_to_packed(packedFloatArray.data(), 2, elemCount, src);
@@ -2927,7 +2953,23 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD,
{
const int elemCount = uniform.arrayDim;
if (elemCount < 1) {
- f->glUniform3fv(uniform.glslLocation, 1, reinterpret_cast<const float *>(src));
+ const float *v = reinterpret_cast<const float *>(src);
+ if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) {
+ QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]);
+ if (thisUniformState.componentCount != 3
+ || thisUniformState.v[0] != v[0]
+ || thisUniformState.v[1] != v[1]
+ || thisUniformState.v[2] != v[2])
+ {
+ thisUniformState.componentCount = 3;
+ thisUniformState.v[0] = v[0];
+ thisUniformState.v[1] = v[1];
+ thisUniformState.v[2] = v[2];
+ f->glUniform3fv(uniform.glslLocation, 1, v);
+ }
+ } else {
+ f->glUniform3fv(uniform.glslLocation, 1, v);
+ }
} else {
packedFloatArray.resize(elemCount * 3);
qrhi_std140_to_packed(packedFloatArray.data(), 3, elemCount, src);
@@ -2936,7 +2978,32 @@ void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD,
}
break;
case QShaderDescription::Vec4:
- f->glUniform4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast<const float *>(src));
+ {
+ const int elemCount = uniform.arrayDim;
+ if (elemCount < 1) {
+ const float *v = reinterpret_cast<const float *>(src);
+ if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) {
+ QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]);
+ if (thisUniformState.componentCount != 4
+ || thisUniformState.v[0] != v[0]
+ || thisUniformState.v[1] != v[1]
+ || thisUniformState.v[2] != v[2]
+ || thisUniformState.v[3] != v[3])
+ {
+ thisUniformState.componentCount = 4;
+ thisUniformState.v[0] = v[0];
+ thisUniformState.v[1] = v[1];
+ thisUniformState.v[2] = v[2];
+ thisUniformState.v[3] = v[3];
+ f->glUniform4fv(uniform.glslLocation, 1, v);
+ }
+ } else {
+ f->glUniform4fv(uniform.glslLocation, 1, v);
+ }
+ } else {
+ f->glUniform4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast<const float *>(src));
+ }
+ }
break;
case QShaderDescription::Mat2:
f->glUniformMatrix2fv(uniform.glslLocation, 1, GL_FALSE, reinterpret_cast<const float *>(src));
@@ -4491,6 +4558,8 @@ bool QGles2GraphicsPipeline::create()
for (const QShaderDescription::InOutVariable &v : fsDesc.combinedImageSamplers())
rhiD->gatherSamplers(program, v, &samplers);
+ memset(uniformState, 0, sizeof(uniformState));
+
generation += 1;
rhiD->registerResource(this);
return true;
@@ -4562,6 +4631,8 @@ bool QGles2ComputePipeline::create()
// storage images and buffers need no special steps here
+ memset(uniformState, 0, sizeof(uniformState));
+
generation += 1;
rhiD->registerResource(this);
return true;
diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h
index 95bf85f0b4..07e0466b30 100644
--- a/src/gui/rhi/qrhigles2_p_p.h
+++ b/src/gui/rhi/qrhigles2_p_p.h
@@ -269,6 +269,13 @@ Q_DECLARE_TYPEINFO(QGles2SamplerDescription, Q_MOVABLE_TYPE);
using QGles2UniformDescriptionVector = QVarLengthArray<QGles2UniformDescription, 8>;
using QGles2SamplerDescriptionVector = QVarLengthArray<QGles2SamplerDescription, 4>;
+struct QGles2UniformState
+{
+ static constexpr int MAX_TRACKED_LOCATION = 1023;
+ int componentCount;
+ float v[4];
+};
+
struct QGles2GraphicsPipeline : public QRhiGraphicsPipeline
{
QGles2GraphicsPipeline(QRhiImplementation *rhi);
@@ -280,6 +287,7 @@ struct QGles2GraphicsPipeline : public QRhiGraphicsPipeline
GLenum drawMode = GL_TRIANGLES;
QGles2UniformDescriptionVector uniforms;
QGles2SamplerDescriptionVector samplers;
+ QGles2UniformState uniformState[QGles2UniformState::MAX_TRACKED_LOCATION + 1];
uint generation = 0;
friend class QRhiGles2;
};
@@ -294,6 +302,7 @@ struct QGles2ComputePipeline : public QRhiComputePipeline
GLuint program = 0;
QGles2UniformDescriptionVector uniforms;
QGles2SamplerDescriptionVector samplers;
+ QGles2UniformState uniformState[QGles2UniformState::MAX_TRACKED_LOCATION + 1];
uint generation = 0;
friend class QRhiGles2;
};