summaryrefslogtreecommitdiffstats
path: root/src/gui/rhi/qrhi_p_p.h
diff options
context:
space:
mode:
authorLaszlo Agocs <laszlo.agocs@qt.io>2020-09-22 19:54:30 +0200
committerLaszlo Agocs <laszlo.agocs@qt.io>2020-09-23 16:59:06 +0200
commitbe2635b8dd37dfa7cb3f1c41544b2736d13a058d (patch)
tree09cbe9d0ec2c117aa51948d28f1df5675b05415d /src/gui/rhi/qrhi_p_p.h
parent6b52ba42865c6d298a8ddf1d735e4c3d3b3dab56 (diff)
rhi: Reuse the data in buffer ops in res.update batches
Because having profilers bombarded with mallocs (due to creating deep copy QByteArrays) is not nice. Change-Id: I848f41f3465d6dc2a58a193cc863495aacf13d79 Reviewed-by: Andy Nichols <andy.nichols@qt.io>
Diffstat (limited to 'src/gui/rhi/qrhi_p_p.h')
-rw-r--r--src/gui/rhi/qrhi_p_p.h43
1 files changed, 39 insertions, 4 deletions
diff --git a/src/gui/rhi/qrhi_p_p.h b/src/gui/rhi/qrhi_p_p.h
index a7d8a40bf9..13f1c88f2d 100644
--- a/src/gui/rhi/qrhi_p_p.h
+++ b/src/gui/rhi/qrhi_p_p.h
@@ -289,6 +289,7 @@ public:
QRhiBuffer *buf;
int offset;
QByteArray data;
+ int dataSize; // the real number of currently used bytes in data, not the same as data.size()
int readSize;
QRhiBufferReadbackResult *result;
@@ -298,7 +299,9 @@ public:
op.type = DynamicUpdate;
op.buf = buf;
op.offset = offset;
- op.data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
+ const int effectiveSize = size ? size : buf->size();
+ op.data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
+ op.dataSize = effectiveSize;
return op;
}
@@ -307,7 +310,29 @@ public:
op->type = DynamicUpdate;
op->buf = buf;
op->offset = offset;
- op->data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
+ const int effectiveSize = size ? size : buf->size();
+
+ // Why the isDetached check? Simply because the cost of detaching
+ // with a larger allocation may be a lot higher than creating a new
+ // deep copy bytearray with our (potentially lot smaller) data.
+ // This reduces the benefits with certain backends (e.g. Vulkan)
+ // that hold on to the data (implicit sharing!) of host visible
+ // buffers for the current and next frame (assuming 2 frames in
+ // flight), but it is still an improvement (enabled by
+ // nextResourceUpdateBatch's shuffling when choosing a free batch
+ // from the pool). For other backends (e.g. D3D11) this can reduce
+ // mallocs (caused by creating new deep copy bytearrays) almost
+ // completely after a few frames (assuming of course that no
+ // dynamic elements with larger buffer data appear).
+
+ if (op->data.isDetached()) {
+ if (op->data.size() < effectiveSize)
+ op->data.resize(effectiveSize);
+ memcpy(op->data.data(), data, effectiveSize);
+ } else {
+ op->data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
+ }
+ op->dataSize = effectiveSize;
}
static BufferOp staticUpload(QRhiBuffer *buf, int offset, int size, const void *data)
@@ -316,7 +341,9 @@ public:
op.type = StaticUpload;
op.buf = buf;
op.offset = offset;
- op.data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
+ const int effectiveSize = size ? size : buf->size();
+ op.data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
+ op.dataSize = effectiveSize;
return op;
}
@@ -325,7 +352,15 @@ public:
op->type = StaticUpload;
op->buf = buf;
op->offset = offset;
- op->data = QByteArray(reinterpret_cast<const char *>(data), size ? size : buf->size());
+ const int effectiveSize = size ? size : buf->size();
+ if (op->data.isDetached()) {
+ if (op->data.size() < effectiveSize)
+ op->data.resize(effectiveSize);
+ memcpy(op->data.data(), data, effectiveSize);
+ } else {
+ op->data = QByteArray(reinterpret_cast<const char *>(data), effectiveSize);
+ }
+ op->dataSize = effectiveSize;
}
static BufferOp read(QRhiBuffer *buf, int offset, int size, QRhiBufferReadbackResult *result)