summaryrefslogtreecommitdiffstats
path: root/src/gui/rhi/qrhimetal.mm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gui/rhi/qrhimetal.mm')
-rw-r--r--src/gui/rhi/qrhimetal.mm3083
1 files changed, 2582 insertions, 501 deletions
diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm
index dc21feb74c..1c7b397193 100644
--- a/src/gui/rhi/qrhimetal.mm
+++ b/src/gui/rhi/qrhimetal.mm
@@ -1,10 +1,18 @@
-// Copyright (C) 2019 The Qt Company Ltd.
+// Copyright (C) 2023 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
-#include "qrhimetal_p_p.h"
+#include "qrhimetal_p.h"
+#include "qshader_p.h"
#include <QGuiApplication>
#include <QWindow>
+#include <QUrl>
+#include <QFile>
+#include <QTemporaryFile>
+#include <QFileInfo>
#include <qmath.h>
+#include <QOperatingSystemVersion>
+
+#include <QtCore/private/qcore_mac_p.h>
#ifdef Q_OS_MACOS
#include <AppKit/AppKit.h>
@@ -32,19 +40,28 @@ QT_BEGIN_NAMESPACE
#error ARC not supported
#endif
-// Note: we expect everything here pass the Metal API validation when running
-// in Debug mode in XCode. Some of the issues that break validation are not
-// obvious and not visible when running outside XCode.
-//
-// An exception is the nextDrawable Called Early blah blah warning, which is
-// plain and simply false.
+// Even though the macOS 13 MTLBinaryArchive problem (QTBUG-106703) seems
+// to be solved in later 13.x releases, we have reports from old Intel hardware
+// and older macOS versions where this causes problems (QTBUG-114338).
+// Thus we no longer do OS version based differentiation, but rather have a
+// single toggle that is currently on, and so QRhi::(set)pipelineCache()
+// does nothing with Metal.
+#define QRHI_METAL_DISABLE_BINARY_ARCHIVE
+
+// We should be able to operate with command buffers that do not automatically
+// retain/release the resources used by them. (since we have logic that mirrors
+// other backends such as the Vulkan one anyway)
+#define QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES
/*!
\class QRhiMetalInitParams
\inmodule QtRhi
- \internal
+ \since 6.6
\brief Metal specific initialization parameters.
+ \note This is a RHI API with limited compatibility guarantees, see \l QRhi
+ for details.
+
A Metal-based QRhi needs no special parameters for initialization.
\badcode
@@ -52,10 +69,13 @@ QT_BEGIN_NAMESPACE
rhi = QRhi::create(QRhi::Metal, &params);
\endcode
- \note Metal API validation cannot be enabled by the application. Instead,
- run the debug build of the application in XCode. Generating a
- \c{.xcodeproj} file via \c{qmake -spec macx-xcode} provides a convenient
- way to enable this.
+ \note Metal API validation cannot be enabled programmatically by the QRhi.
+ Instead, either run the debug build of the application in XCode, by
+ generating a \c{.xcodeproj} file via \c{cmake -G Xcode}, or set the
+ environment variable \c{METAL_DEVICE_WRAPPER_TYPE=1}. The variable needs to
+ be set early on in the environment, perferably before starting the process;
+ attempting to set it at QRhi creation time is not functional in practice.
+ (too late probably)
\note QRhiSwapChain can only target QWindow instances that have their
surface type set to QSurface::MetalSurface.
@@ -74,14 +94,30 @@ QT_BEGIN_NAMESPACE
/*!
\class QRhiMetalNativeHandles
\inmodule QtRhi
- \internal
+ \since 6.6
\brief Holds the Metal device used by the QRhi.
+
+ \note This is a RHI API with limited compatibility guarantees, see \l QRhi
+ for details.
*/
/*!
+ \variable QRhiMetalNativeHandles::dev
+
+ Set to a valid MTLDevice to import an existing device.
+*/
+
+/*!
+ \variable QRhiMetalNativeHandles::cmdQueue
+
+ Set to a valid MTLCommandQueue when importing an existing command queue.
+ When \nullptr, QRhi will create a new command queue.
+*/
+
+/*!
\class QRhiMetalCommandBufferNativeHandles
\inmodule QtRhi
- \internal
+ \since 6.6
\brief Holds the MTLCommandBuffer and MTLRenderCommandEncoder objects that are backing a QRhiCommandBuffer.
\note The command buffer object is only guaranteed to be valid while
@@ -93,14 +129,28 @@ QT_BEGIN_NAMESPACE
\note The command encoder is only valid while recording a pass, that is,
between \l{QRhiCommandBuffer::beginPass()} -
\l{QRhiCommandBuffer::endPass()}.
+
+ \note This is a RHI API with limited compatibility guarantees, see \l QRhi
+ for details.
*/
+/*!
+ \variable QRhiMetalCommandBufferNativeHandles::commandBuffer
+*/
+
+/*!
+ \variable QRhiMetalCommandBufferNativeHandles::encoder
+*/
+
struct QMetalShader
{
id<MTLLibrary> lib = nil;
id<MTLFunction> func = nil;
- std::array<uint, 3> localSize;
+ std::array<uint, 3> localSize = {};
+ uint outputVertexCount = 0;
+ QShaderDescription desc;
QShader::NativeResourceBindingMap nativeResourceBindingMap;
+ QShader::NativeShaderInfo nativeShaderInfo;
void destroy() {
nativeResourceBindingMap.clear();
@@ -113,11 +163,14 @@ struct QMetalShader
struct QRhiMetalData
{
- QRhiMetalData(QRhiImplementation *rhi) : ofr(rhi) { }
+ QRhiMetalData(QRhiMetal *rhi) : q(rhi), ofr(rhi) { }
+ QRhiMetal *q;
id<MTLDevice> dev = nil;
id<MTLCommandQueue> cmdQueue = nil;
+ API_AVAILABLE(macosx(11.0), ios(14.0)) id<MTLBinaryArchive> binArch = nil;
+ id<MTLCommandBuffer> newCommandBuffer();
MTLRenderPassDescriptor *createDefaultRenderPass(bool hasDepthStencil,
const QColor &colorClearValue,
const QRhiDepthStencilClearValue &depthStencilClearValue,
@@ -125,6 +178,11 @@ struct QRhiMetalData
id<MTLLibrary> createMetalLib(const QShader &shader, QShader::Variant shaderVariant,
QString *error, QByteArray *entryPoint, QShaderKey *activeKey);
id<MTLFunction> createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint);
+ bool setupBinaryArchive(NSURL *sourceFileUrl = nil);
+ void addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc);
+ void trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc);
+ void addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc);
+ void trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc);
struct DeferredReleaseEntry {
enum Type {
@@ -159,6 +217,8 @@ struct QRhiMetalData
struct {
id<MTLRenderPipelineState> pipelineState;
id<MTLDepthStencilState> depthStencilState;
+ std::array<id<MTLComputePipelineState>, 3> tessVertexComputeState;
+ id<MTLComputePipelineState> tessTessControlComputeState;
} graphicsPipeline;
struct {
id<MTLComputePipelineState> pipelineState;
@@ -170,6 +230,7 @@ struct QRhiMetalData
struct OffscreenFrame {
OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { }
bool active = false;
+ double lastGpuTime = 0;
QMetalCommandBuffer cbWrapper;
} ofr;
@@ -184,6 +245,17 @@ struct QRhiMetalData
};
QVarLengthArray<TextureReadback, 2> activeTextureReadbacks;
+ struct BufferReadback
+ {
+ int activeFrameSlot = -1;
+ QRhiReadbackResult *result;
+ quint32 offset;
+ quint32 readSize;
+ id<MTLBuffer> buf;
+ };
+
+ QVarLengthArray<BufferReadback, 2> activeBufferReadbacks;
+
MTLCaptureManager *captureMgr;
id<MTLCaptureScope> captureScope = nil;
@@ -255,14 +327,16 @@ struct QMetalShaderResourceBindingsData {
QRhiBatchedBindings<id<MTLTexture> > textureBatches;
QRhiBatchedBindings<id<MTLSamplerState> > samplerBatches;
} res[QRhiMetal::SUPPORTED_STAGES];
- enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2 };
+ enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2, TESSCTRL = 3, TESSEVAL = 4 };
};
struct QMetalCommandBufferData
{
id<MTLCommandBuffer> cb;
+ double lastGpuTime = 0;
id<MTLRenderCommandEncoder> currentRenderPassEncoder;
id<MTLComputeCommandEncoder> currentComputePassEncoder;
+ id<MTLComputeCommandEncoder> tessellationComputeEncoder;
MTLRenderPassDescriptor *currentPassRpDesc;
int currentFirstVertexBinding;
QRhiBatchedBindings<id<MTLBuffer> > currentVertexInputsBuffers;
@@ -294,8 +368,11 @@ struct QMetalRenderTargetData
struct {
ColorAtt colorAtt[QMetalRenderPassDescriptor::MAX_COLOR_ATTACHMENTS];
id<MTLTexture> dsTex = nil;
+ id<MTLTexture> dsResolveTex = nil;
bool hasStencil = false;
bool depthNeedsStore = false;
+ bool preserveColor = false;
+ bool preserveDs = false;
} fb;
QRhiRenderTargetAttachmentTracker::ResIdList currentResIdList;
@@ -303,6 +380,7 @@ struct QMetalRenderTargetData
struct QMetalGraphicsPipelineData
{
+ QMetalGraphicsPipeline *q = nullptr;
id<MTLRenderPipelineState> ps = nil;
id<MTLDepthStencilState> ds = nil;
MTLPrimitiveType primitiveType;
@@ -313,6 +391,54 @@ struct QMetalGraphicsPipelineData
float slopeScaledDepthBias;
QMetalShader vs;
QMetalShader fs;
+ struct ExtraBufferManager {
+ enum class WorkBufType {
+ DeviceLocal,
+ HostVisible
+ };
+ QMetalBuffer *acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type = WorkBufType::DeviceLocal);
+ QVector<QMetalBuffer *> deviceLocalWorkBuffers;
+ QVector<QMetalBuffer *> hostVisibleWorkBuffers;
+ } extraBufMgr;
+ struct Tessellation {
+ QMetalGraphicsPipelineData *q = nullptr;
+ bool enabled = false;
+ bool failed = false;
+ uint inControlPointCount;
+ uint outControlPointCount;
+ QMetalShader compVs[3];
+ std::array<id<MTLComputePipelineState>, 3> vertexComputeState = {};
+ id<MTLComputePipelineState> tessControlComputeState = nil;
+ QMetalShader compTesc;
+ QMetalShader vertTese;
+ quint32 vsCompOutputBufferSize(quint32 vertexOrIndexCount, quint32 instanceCount) const
+ {
+ // max vertex output components = resourceLimit(MaxVertexOutputs) * 4 = 60
+ return vertexOrIndexCount * instanceCount * sizeof(float) * 60;
+ }
+ quint32 tescCompOutputBufferSize(quint32 patchCount) const
+ {
+ return outControlPointCount * patchCount * sizeof(float) * 60;
+ }
+ quint32 tescCompPatchOutputBufferSize(quint32 patchCount) const
+ {
+ // assume maxTessellationControlPerPatchOutputComponents is 128
+ return patchCount * sizeof(float) * 128;
+ }
+ quint32 patchCountForDrawCall(quint32 vertexOrIndexCount, quint32 instanceCount) const
+ {
+ return ((vertexOrIndexCount + inControlPointCount - 1) / inControlPointCount) * instanceCount;
+ }
+ static int vsCompVariantToIndex(QShader::Variant vertexCompVariant);
+ id<MTLComputePipelineState> vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant);
+ id<MTLComputePipelineState> tescCompPipeline(QRhiMetal *rhiD);
+ id<MTLRenderPipelineState> teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline);
+ } tess;
+ void setupVertexInputDescriptor(MTLVertexDescriptor *desc);
+ void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc);
+
+ // SPIRV-Cross buffer size buffers
+ QMetalBuffer *bufferSizeBuffer = nullptr;
};
struct QMetalComputePipelineData
@@ -320,6 +446,9 @@ struct QMetalComputePipelineData
id<MTLComputePipelineState> ps = nil;
QMetalShader cs;
MTLSize localSize;
+
+ // SPIRV-Cross buffer size buffers
+ QMetalBuffer *bufferSizeBuffer = nullptr;
};
struct QMetalSwapChainData
@@ -327,10 +456,16 @@ struct QMetalSwapChainData
CAMetalLayer *layer = nullptr;
id<CAMetalDrawable> curDrawable = nil;
dispatch_semaphore_t sem[QMTL_FRAMES_IN_FLIGHT];
+ double lastGpuTime[QMTL_FRAMES_IN_FLIGHT];
MTLRenderPassDescriptor *rp = nullptr;
id<MTLTexture> msaaTex[QMTL_FRAMES_IN_FLIGHT];
QRhiTexture::Format rhiColorFormat;
MTLPixelFormat colorFormat;
+#ifdef Q_OS_MACOS
+ bool liveResizeObserverSet = false;
+ QMacNotificationObserver liveResizeStartObserver;
+ QMacNotificationObserver liveResizeEndObserver;
+#endif
};
QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *importDevice)
@@ -341,7 +476,7 @@ QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *import
importedDevice = importDevice != nullptr;
if (importedDevice) {
- if (d->dev) {
+ if (importDevice->dev) {
d->dev = (id<MTLDevice>) importDevice->dev;
importedCmdQueue = importDevice->cmdQueue != nullptr;
if (importedCmdQueue)
@@ -375,9 +510,44 @@ bool QRhiMetal::probe(QRhiMetalInitParams *params)
return false;
}
+id<MTLCommandBuffer> QRhiMetalData::newCommandBuffer()
+{
+#ifdef QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES
+ // Do not let the command buffer mess with the refcount of objects. We do
+ // have a proper render loop and will manage lifetimes similarly to other
+ // backends (Vulkan).
+ return [cmdQueue commandBufferWithUnretainedReferences];
+#else
+ return [cmdQueue commandBuffer];
+#endif
+}
+
+bool QRhiMetalData::setupBinaryArchive(NSURL *sourceFileUrl)
+{
+#ifdef QRHI_METAL_DISABLE_BINARY_ARCHIVE
+ return false;
+#endif
+
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ [binArch release];
+ MTLBinaryArchiveDescriptor *binArchDesc = [MTLBinaryArchiveDescriptor new];
+ binArchDesc.url = sourceFileUrl;
+ NSError *err = nil;
+ binArch = [dev newBinaryArchiveWithDescriptor: binArchDesc error: &err];
+ [binArchDesc release];
+ if (!binArch) {
+ const QString msg = QString::fromNSString(err.localizedDescription);
+ qWarning("newBinaryArchiveWithDescriptor failed: %s", qPrintable(msg));
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
bool QRhiMetal::create(QRhi::Flags flags)
{
- Q_UNUSED(flags);
+ rhiFlags = flags;
if (importedDevice)
[d->dev retain];
@@ -392,10 +562,12 @@ bool QRhiMetal::create(QRhi::Flags flags)
const QString deviceName = QString::fromNSString([d->dev name]);
qCDebug(QRHI_LOG_INFO, "Metal device: %s", qPrintable(deviceName));
driverInfoStruct.deviceName = deviceName.toUtf8();
- driverInfoStruct.deviceId = [d->dev registryID];
-#ifdef Q_OS_IOS
- driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice;
-#else
+
+ // deviceId and vendorId stay unset for now. Note that registryID is not
+ // suitable as deviceId because it does not seem stable on macOS and can
+ // apparently change when the system is rebooted.
+
+#ifdef Q_OS_MACOS
if (@available(macOS 10.15, *)) {
const MTLDeviceLocation deviceLocation = [d->dev location];
switch (deviceLocation) {
@@ -412,8 +584,14 @@ bool QRhiMetal::create(QRhi::Flags flags)
break;
}
}
+#else
+ driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice;
#endif
+ const QOperatingSystemVersion ver = QOperatingSystemVersion::current();
+ osMajor = ver.majorVersion();
+ osMinor = ver.minorVersion();
+
if (importedCmdQueue)
[d->cmdQueue retain];
else
@@ -433,6 +611,7 @@ bool QRhiMetal::create(QRhi::Flags flags)
if (@available(macOS 10.15, *))
caps.isAppleGPU = [d->dev supportsFamily:MTLGPUFamilyApple7];
caps.maxThreadGroupSize = 1024;
+ caps.multiView = true;
#elif defined(Q_OS_TVOS)
if ([d->dev supportsFeatureSet: MTLFeatureSet(30003)]) // MTLFeatureSet_tvOS_GPUFamily2_v1
caps.maxTextureSize = 16384;
@@ -459,8 +638,10 @@ bool QRhiMetal::create(QRhi::Flags flags)
}
caps.isAppleGPU = true;
if (@available(iOS 13, *)) {
- if ([d->dev supportsFamily:MTLGPUFamilyApple4])
+ if ([d->dev supportsFamily: MTLGPUFamilyApple4])
caps.maxThreadGroupSize = 1024;
+ if ([d->dev supportsFamily: MTLGPUFamilyApple5])
+ caps.multiView = true;
}
#endif
@@ -470,6 +651,9 @@ bool QRhiMetal::create(QRhi::Flags flags)
caps.supportedSampleCounts.append(sampleCount);
}
+ if (rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+ d->setupBinaryArchive();
+
nativeHandlesStruct.dev = (MTLDevice *) d->dev;
nativeHandlesStruct.cmdQueue = (MTLCommandQueue *) d->cmdQueue;
@@ -488,6 +672,11 @@ void QRhiMetal::destroy()
[d->captureScope release];
d->captureScope = nil;
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ [d->binArch release];
+ d->binArch = nil;
+ }
+
[d->cmdQueue release];
if (!importedCmdQueue)
d->cmdQueue = nil;
@@ -502,17 +691,6 @@ QVector<int> QRhiMetal::supportedSampleCounts() const
return caps.supportedSampleCounts;
}
-int QRhiMetal::effectiveSampleCount(int sampleCount) const
-{
- // Stay compatible with QSurfaceFormat and friends where samples == 0 means the same as 1.
- const int s = qBound(1, sampleCount, 64);
- if (!supportedSampleCounts().contains(s)) {
- qWarning("Attempted to set unsupported sample count %d", sampleCount);
- return 1;
- }
- return s;
-}
-
QRhiSwapChain *QRhiMetal::createSwapChain()
{
return new QMetalSwapChain(this);
@@ -600,7 +778,7 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
case QRhi::DebugMarkers:
return true;
case QRhi::Timestamps:
- return false;
+ return true;
case QRhi::Instancing:
return true;
case QRhi::CustomInstanceStepRate:
@@ -644,7 +822,12 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
case QRhi::ReadBackAnyTextureFormat:
return true;
case QRhi::PipelineCacheDataLoadSave:
- return false;
+ {
+ if (@available(macOS 11.0, iOS 14.0, *))
+ return true;
+ else
+ return false;
+ }
case QRhi::ImageDataStride:
return true;
case QRhi::RenderBufferImport:
@@ -656,13 +839,29 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
case QRhi::TextureArrays:
return true;
case QRhi::Tessellation:
- return false;
+ return true;
case QRhi::GeometryShader:
return false;
case QRhi::TextureArrayRange:
return false;
case QRhi::NonFillPolygonMode:
return true;
+ case QRhi::OneDimensionalTextures:
+ return true;
+ case QRhi::OneDimensionalTextureMipmaps:
+ return false;
+ case QRhi::HalfAttributes:
+ return true;
+ case QRhi::RenderToOneDimensionalTexture:
+ return false;
+ case QRhi::ThreeDimensionalTextureMipmaps:
+ return true;
+ case QRhi::MultiView:
+ return caps.multiView;
+ case QRhi::TextureViewFormat:
+ return false;
+ case QRhi::ResolveDepthStencil:
+ return true;
default:
Q_UNREACHABLE();
return false;
@@ -742,14 +941,133 @@ bool QRhiMetal::isDeviceLost() const
return false;
}
+struct QMetalPipelineCacheDataHeader
+{
+ quint32 rhiId;
+ quint32 arch;
+ quint32 dataSize;
+ quint32 osMajor;
+ quint32 osMinor;
+ char driver[236];
+};
+
QByteArray QRhiMetal::pipelineCacheData()
{
- return QByteArray();
+ Q_STATIC_ASSERT(sizeof(QMetalPipelineCacheDataHeader) == 256);
+ QByteArray data;
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ if (!d->binArch || !rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+ return data;
+
+ QTemporaryFile tmp;
+ if (!tmp.open()) {
+ qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal");
+ return data;
+ }
+ tmp.close(); // the file exists until the tmp dtor runs
+
+ const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath();
+ NSURL *url = QUrl::fromLocalFile(fn).toNSURL();
+ NSError *err = nil;
+ if (![d->binArch serializeToURL: url error: &err]) {
+ const QString msg = QString::fromNSString(err.localizedDescription);
+ // Some of these "errors" are not actual errors. (think of "Nothing to serialize")
+ qCDebug(QRHI_LOG_INFO, "Failed to serialize MTLBinaryArchive: %s", qPrintable(msg));
+ return data;
+ }
+
+ QFile f(fn);
+ if (!f.open(QIODevice::ReadOnly)) {
+ qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to reopen temporary file");
+ return data;
+ }
+ const QByteArray blob = f.readAll();
+ f.close();
+
+ const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader);
+ const quint32 dataSize = quint32(blob.size());
+
+ data.resize(headerSize + dataSize);
+
+ QMetalPipelineCacheDataHeader header = {};
+ header.rhiId = pipelineCacheRhiId();
+ header.arch = quint32(sizeof(void*));
+ header.dataSize = quint32(dataSize);
+ header.osMajor = osMajor;
+ header.osMinor = osMinor;
+ const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length()));
+ if (driverStrLen)
+ memcpy(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen);
+ header.driver[driverStrLen] = '\0';
+
+ memcpy(data.data(), &header, headerSize);
+ memcpy(data.data() + headerSize, blob.constData(), dataSize);
+ }
+ return data;
}
void QRhiMetal::setPipelineCacheData(const QByteArray &data)
{
- Q_UNUSED(data);
+ if (data.isEmpty())
+ return;
+
+ const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader);
+ if (data.size() < qsizetype(headerSize)) {
+ qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (header incomplete)");
+ return;
+ }
+
+ const size_t dataOffset = headerSize;
+ QMetalPipelineCacheDataHeader header;
+ memcpy(&header, data.constData(), headerSize);
+
+ const quint32 rhiId = pipelineCacheRhiId();
+ if (header.rhiId != rhiId) {
+ qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: The data is for a different QRhi version or backend (%u, %u)",
+ rhiId, header.rhiId);
+ return;
+ }
+
+ const quint32 arch = quint32(sizeof(void*));
+ if (header.arch != arch) {
+ qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Architecture does not match (%u, %u)",
+ arch, header.arch);
+ return;
+ }
+
+ if (header.osMajor != osMajor || header.osMinor != osMinor) {
+ qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: OS version does not match (%u.%u, %u.%u)",
+ osMajor, osMinor, header.osMajor, header.osMinor);
+ return;
+ }
+
+ const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length()));
+ if (strncmp(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen)) {
+ qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Metal device name does not match");
+ return;
+ }
+
+ if (data.size() < qsizetype(dataOffset + header.dataSize)) {
+ qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (data incomplete)");
+ return;
+ }
+
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ const char *p = data.constData() + dataOffset;
+
+ QTemporaryFile tmp;
+ if (!tmp.open()) {
+ qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal");
+ return;
+ }
+ tmp.write(p, header.dataSize);
+ tmp.close(); // the file exists until the tmp dtor runs
+
+ const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath();
+ NSURL *url = QUrl::fromLocalFile(fn).toNSURL();
+ if (d->setupBinaryArchive(url))
+ qCDebug(QRHI_LOG_INFO, "Created MTLBinaryArchive with initial data of %u bytes", header.dataSize);
+ }
}
QRhiRenderBuffer *QRhiMetal::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize,
@@ -819,6 +1137,136 @@ static inline int mapBinding(int binding,
return -1;
}
+static inline void bindStageBuffers(QMetalCommandBuffer *cbD,
+ int stage,
+ const QRhiBatchedBindings<id<MTLBuffer>>::Batch &bufferBatch,
+ const QRhiBatchedBindings<NSUInteger>::Batch &offsetBatch)
+{
+ switch (stage) {
+ case QMetalShaderResourceBindingsData::VERTEX:
+ [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData()
+ offsets: offsetBatch.resources.constData()
+ withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::FRAGMENT:
+ [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData()
+ offsets: offsetBatch.resources.constData()
+ withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::COMPUTE:
+ [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData()
+ offsets: offsetBatch.resources.constData()
+ withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::TESSCTRL:
+ case QMetalShaderResourceBindingsData::TESSEVAL:
+ // do nothing. These are used later for tessellation
+ break;
+ default:
+ Q_UNREACHABLE();
+ break;
+ }
+}
+
+static inline void bindStageTextures(QMetalCommandBuffer *cbD,
+ int stage,
+ const QRhiBatchedBindings<id<MTLTexture>>::Batch &textureBatch)
+{
+ switch (stage) {
+ case QMetalShaderResourceBindingsData::VERTEX:
+ [cbD->d->currentRenderPassEncoder setVertexTextures: textureBatch.resources.constData()
+ withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::FRAGMENT:
+ [cbD->d->currentRenderPassEncoder setFragmentTextures: textureBatch.resources.constData()
+ withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::COMPUTE:
+ [cbD->d->currentComputePassEncoder setTextures: textureBatch.resources.constData()
+ withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::TESSCTRL:
+ case QMetalShaderResourceBindingsData::TESSEVAL:
+ // do nothing. These are used later for tessellation
+ break;
+ default:
+ Q_UNREACHABLE();
+ break;
+ }
+}
+
+static inline void bindStageSamplers(QMetalCommandBuffer *cbD,
+ int encoderStage,
+ const QRhiBatchedBindings<id<MTLSamplerState>>::Batch &samplerBatch)
+{
+ switch (encoderStage) {
+ case QMetalShaderResourceBindingsData::VERTEX:
+ [cbD->d->currentRenderPassEncoder setVertexSamplerStates: samplerBatch.resources.constData()
+ withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::FRAGMENT:
+ [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: samplerBatch.resources.constData()
+ withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::COMPUTE:
+ [cbD->d->currentComputePassEncoder setSamplerStates: samplerBatch.resources.constData()
+ withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
+ break;
+ case QMetalShaderResourceBindingsData::TESSCTRL:
+ case QMetalShaderResourceBindingsData::TESSEVAL:
+ // do nothing. These are used later for tessellation
+ break;
+ default:
+ Q_UNREACHABLE();
+ break;
+ }
+}
+
+// Helper that is not used during the common vertex+fragment and compute
+// pipelines, but is necessary when tessellation is involved and so the
+// graphics pipeline is under the hood a combination of multiple compute and
+// render pipelines. We need to be able to set the buffers, textures, samplers
+// when a switching between render and compute encoders.
+static inline void rebindShaderResources(QMetalCommandBuffer *cbD, int resourceStage, int encoderStage,
+ const QMetalShaderResourceBindingsData *customBindingState = nullptr)
+{
+ const QMetalShaderResourceBindingsData *bindingData = customBindingState ? customBindingState : &cbD->d->currentShaderResourceBindingState;
+
+ for (int i = 0, ie = bindingData->res[resourceStage].bufferBatches.batches.count(); i != ie; ++i) {
+ const auto &bufferBatch(bindingData->res[resourceStage].bufferBatches.batches[i]);
+ const auto &offsetBatch(bindingData->res[resourceStage].bufferOffsetBatches.batches[i]);
+ bindStageBuffers(cbD, encoderStage, bufferBatch, offsetBatch);
+ }
+
+ for (int i = 0, ie = bindingData->res[resourceStage].textureBatches.batches.count(); i != ie; ++i) {
+ const auto &batch(bindingData->res[resourceStage].textureBatches.batches[i]);
+ bindStageTextures(cbD, encoderStage, batch);
+ }
+
+ for (int i = 0, ie = bindingData->res[resourceStage].samplerBatches.batches.count(); i != ie; ++i) {
+ const auto &batch(bindingData->res[resourceStage].samplerBatches.batches[i]);
+ bindStageSamplers(cbD, encoderStage, batch);
+ }
+}
+
+static inline QRhiShaderResourceBinding::StageFlag toRhiSrbStage(int stage)
+{
+ switch (stage) {
+ case QMetalShaderResourceBindingsData::VERTEX:
+ return QRhiShaderResourceBinding::StageFlag::VertexStage;
+ case QMetalShaderResourceBindingsData::TESSCTRL:
+ return QRhiShaderResourceBinding::StageFlag::TessellationControlStage;
+ case QMetalShaderResourceBindingsData::TESSEVAL:
+ return QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage;
+ case QMetalShaderResourceBindingsData::FRAGMENT:
+ return QRhiShaderResourceBinding::StageFlag::FragmentStage;
+ case QMetalShaderResourceBindingsData::COMPUTE:
+ return QRhiShaderResourceBinding::StageFlag::ComputeStage;
+ }
+
+ Q_UNREACHABLE_RETURN(QRhiShaderResourceBinding::StageFlag::VertexStage);
+}
+
void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD,
QMetalCommandBuffer *cbD,
int dynamicOffsetCount,
@@ -828,8 +1276,8 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
{
QMetalShaderResourceBindingsData bindingData;
- for (const QRhiShaderResourceBinding &binding : qAsConst(srbD->sortedBindings)) {
- const QRhiShaderResourceBinding::Data *b = binding.data();
+ for (const QRhiShaderResourceBinding &binding : std::as_const(srbD->sortedBindings)) {
+ const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(binding);
switch (b->type) {
case QRhiShaderResourceBinding::UniformBuffer:
{
@@ -843,20 +1291,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
break;
}
}
- if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Buffer);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::VERTEX].buffers.append({ nativeBinding, mtlbuf, offset });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Buffer);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset });
+
+ for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+ if (b->stage.testFlag(toRhiSrbStage(stage))) {
+ const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer);
+ if (nativeBinding >= 0)
+ bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset });
+ }
}
}
break;
@@ -868,36 +1309,21 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
for (int elem = 0; elem < data->count; ++elem) {
QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.stex.texSamplers[elem].tex);
QMetalSampler *samplerD = QRHI_RES(QMetalSampler, b->u.stex.texSamplers[elem].sampler);
- if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
- // Must handle all three cases (combined, separate, separate):
- // first = texture binding, second = sampler binding
- // first = texture binding
- // first = sampler binding (i.e. BindingType::Texture...)
- const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture);
- const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Sampler)
- : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture) : -1);
- if (textureBinding >= 0 && texD)
- bindingData.res[QMetalShaderResourceBindingsData::VERTEX].textures.append({ textureBinding + elem, texD->d->tex });
- if (samplerBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::VERTEX].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
- const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture);
- const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Sampler)
- : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture) : -1);
- if (textureBinding >= 0 && texD)
- bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].textures.append({ textureBinding + elem, texD->d->tex });
- if (samplerBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
- const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture);
- const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Sampler)
- : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture) : -1);
- if (textureBinding >= 0 && texD)
- bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].textures.append({ textureBinding + elem, texD->d->tex });
- if (samplerBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
+
+ for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+ if (b->stage.testFlag(toRhiSrbStage(stage))) {
+ // Must handle all three cases (combined, separate, separate):
+ // first = texture binding, second = sampler binding
+ // first = texture binding
+ // first = sampler binding (i.e. BindingType::Texture...)
+ const int textureBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture);
+ const int samplerBinding = texD && samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Sampler)
+ : (samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture) : -1);
+ if (textureBinding >= 0 && texD)
+ bindingData.res[stage].textures.append({ textureBinding + elem, texD->d->tex });
+ if (samplerBinding >= 0)
+ bindingData.res[stage].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
+ }
}
}
}
@@ -908,20 +1334,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
{
QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex);
id<MTLTexture> t = texD->d->viewForLevel(b->u.simage.level);
- if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::VERTEX].textures.append({ nativeBinding, t });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].textures.append({ nativeBinding, t });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].textures.append({ nativeBinding, t });
+
+ for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+ if (b->stage.testFlag(toRhiSrbStage(stage))) {
+ const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture);
+ if (nativeBinding >= 0)
+ bindingData.res[stage].textures.append({ nativeBinding, t });
+ }
}
}
break;
@@ -932,20 +1351,12 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf);
id<MTLBuffer> mtlbuf = bufD->d->buf[0];
quint32 offset = b->u.sbuf.offset;
- if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Buffer);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::VERTEX].buffers.append({ nativeBinding, mtlbuf, offset });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset });
- }
- if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
- const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Buffer);
- if (nativeBinding >= 0)
- bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset });
+ for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+ if (b->stage.testFlag(toRhiSrbStage(stage))) {
+ const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer);
+ if (nativeBinding >= 0)
+ bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset });
+ }
}
}
break;
@@ -956,9 +1367,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
}
for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
- if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT))
+ if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT
+ || stage == QMetalShaderResourceBindingsData::TESSCTRL || stage == QMetalShaderResourceBindingsData::TESSEVAL))
continue;
- if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && stage == QMetalShaderResourceBindingsData::COMPUTE)
+ if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && (stage == QMetalShaderResourceBindingsData::COMPUTE))
continue;
// QRhiBatchedBindings works with the native bindings and expects
@@ -970,7 +1382,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
return a.nativeBinding < b.nativeBinding;
});
- for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : qAsConst(bindingData.res[stage].buffers)) {
+ for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : std::as_const(bindingData.res[stage].buffers)) {
bindingData.res[stage].bufferBatches.feed(buf.nativeBinding, buf.mtlbuf);
bindingData.res[stage].bufferOffsetBatches.feed(buf.nativeBinding, buf.offset);
}
@@ -989,26 +1401,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
{
continue;
}
- switch (stage) {
- case QMetalShaderResourceBindingsData::VERTEX:
- [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData()
- offsets: offsetBatch.resources.constData()
- withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
- break;
- case QMetalShaderResourceBindingsData::FRAGMENT:
- [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData()
- offsets: offsetBatch.resources.constData()
- withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
- break;
- case QMetalShaderResourceBindingsData::COMPUTE:
- [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData()
- offsets: offsetBatch.resources.constData()
- withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
- break;
- default:
- Q_UNREACHABLE();
- break;
- }
+ bindStageBuffers(cbD, stage, bufferBatch, offsetBatch);
}
if (offsetOnlyChange)
@@ -1022,10 +1415,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
return a.nativeBinding < b.nativeBinding;
});
- for (const QMetalShaderResourceBindingsData::Stage::Texture &t : qAsConst(bindingData.res[stage].textures))
+ for (const QMetalShaderResourceBindingsData::Stage::Texture &t : std::as_const(bindingData.res[stage].textures))
bindingData.res[stage].textureBatches.feed(t.nativeBinding, t.mtltex);
- for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : qAsConst(bindingData.res[stage].samplers))
+ for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : std::as_const(bindingData.res[stage].samplers))
bindingData.res[stage].samplerBatches.feed(s.nativeBinding, s.mtlsampler);
bindingData.res[stage].textureBatches.finish();
@@ -1039,23 +1432,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
{
continue;
}
- switch (stage) {
- case QMetalShaderResourceBindingsData::VERTEX:
- [cbD->d->currentRenderPassEncoder setVertexTextures: batch.resources.constData()
- withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
- break;
- case QMetalShaderResourceBindingsData::FRAGMENT:
- [cbD->d->currentRenderPassEncoder setFragmentTextures: batch.resources.constData()
- withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
- break;
- case QMetalShaderResourceBindingsData::COMPUTE:
- [cbD->d->currentComputePassEncoder setTextures: batch.resources.constData()
- withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
- break;
- default:
- Q_UNREACHABLE();
- break;
- }
+ bindStageTextures(cbD, stage, batch);
}
for (int i = 0, ie = bindingData.res[stage].samplerBatches.batches.count(); i != ie; ++i) {
@@ -1066,65 +1443,68 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
{
continue;
}
- switch (stage) {
- case QMetalShaderResourceBindingsData::VERTEX:
- [cbD->d->currentRenderPassEncoder setVertexSamplerStates: batch.resources.constData()
- withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
- break;
- case QMetalShaderResourceBindingsData::FRAGMENT:
- [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: batch.resources.constData()
- withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
- break;
- case QMetalShaderResourceBindingsData::COMPUTE:
- [cbD->d->currentComputePassEncoder setSamplerStates: batch.resources.constData()
- withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
- break;
- default:
- Q_UNREACHABLE();
- break;
- }
+ bindStageSamplers(cbD, stage, batch);
}
}
+
cbD->d->currentShaderResourceBindingState = bindingData;
}
+void QMetalGraphicsPipeline::makeActiveForCurrentRenderPassEncoder(QMetalCommandBuffer *cbD)
+{
+ [cbD->d->currentRenderPassEncoder setRenderPipelineState: d->ps];
+
+ if (cbD->d->currentDepthStencilState != d->ds) {
+ [cbD->d->currentRenderPassEncoder setDepthStencilState: d->ds];
+ cbD->d->currentDepthStencilState = d->ds;
+ }
+
+ if (cbD->currentCullMode == -1 || d->cullMode != uint(cbD->currentCullMode)) {
+ [cbD->d->currentRenderPassEncoder setCullMode: d->cullMode];
+ cbD->currentCullMode = int(d->cullMode);
+ }
+ if (cbD->currentTriangleFillMode == -1 || d->triangleFillMode != uint(cbD->currentTriangleFillMode)) {
+ [cbD->d->currentRenderPassEncoder setTriangleFillMode: d->triangleFillMode];
+ cbD->currentTriangleFillMode = int(d->triangleFillMode);
+ }
+ if (cbD->currentFrontFaceWinding == -1 || d->winding != uint(cbD->currentFrontFaceWinding)) {
+ [cbD->d->currentRenderPassEncoder setFrontFacingWinding: d->winding];
+ cbD->currentFrontFaceWinding = int(d->winding);
+ }
+ if (!qFuzzyCompare(d->depthBias, cbD->currentDepthBiasValues.first)
+ || !qFuzzyCompare(d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second))
+ {
+ [cbD->d->currentRenderPassEncoder setDepthBias: d->depthBias
+ slopeScale: d->slopeScaledDepthBias
+ clamp: 0.0f];
+ cbD->currentDepthBiasValues = { d->depthBias, d->slopeScaledDepthBias };
+ }
+}
+
void QRhiMetal::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps)
{
QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
QMetalGraphicsPipeline *psD = QRHI_RES(QMetalGraphicsPipeline, ps);
- if (cbD->currentGraphicsPipeline != ps || cbD->currentPipelineGeneration != psD->generation) {
- cbD->currentGraphicsPipeline = ps;
- cbD->currentComputePipeline = nullptr;
- cbD->currentPipelineGeneration = psD->generation;
-
- [cbD->d->currentRenderPassEncoder setRenderPipelineState: psD->d->ps];
+ if (cbD->currentGraphicsPipeline == psD && cbD->currentPipelineGeneration == psD->generation)
+ return;
- if (cbD->d->currentDepthStencilState != psD->d->ds) {
- [cbD->d->currentRenderPassEncoder setDepthStencilState: psD->d->ds];
- cbD->d->currentDepthStencilState = psD->d->ds;
- }
+ cbD->currentGraphicsPipeline = psD;
+ cbD->currentComputePipeline = nullptr;
+ cbD->currentPipelineGeneration = psD->generation;
- if (cbD->currentCullMode == -1 || psD->d->cullMode != uint(cbD->currentCullMode)) {
- [cbD->d->currentRenderPassEncoder setCullMode: psD->d->cullMode];
- cbD->currentCullMode = int(psD->d->cullMode);
- }
- if (cbD->currentTriangleFillMode == -1 || psD->d->triangleFillMode != uint(cbD->currentTriangleFillMode)) {
- [cbD->d->currentRenderPassEncoder setTriangleFillMode: psD->d->triangleFillMode];
- cbD->currentTriangleFillMode = int(psD->d->triangleFillMode);
- }
- if (cbD->currentFrontFaceWinding == -1 || psD->d->winding != uint(cbD->currentFrontFaceWinding)) {
- [cbD->d->currentRenderPassEncoder setFrontFacingWinding: psD->d->winding];
- cbD->currentFrontFaceWinding = int(psD->d->winding);
+ if (!psD->d->tess.enabled && !psD->d->tess.failed) {
+ psD->makeActiveForCurrentRenderPassEncoder(cbD);
+ } else {
+ // mark work buffers that can now be safely reused as reusable
+ for (QMetalBuffer *workBuf : psD->d->extraBufMgr.deviceLocalWorkBuffers) {
+ if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot)
+ workBuf->lastActiveFrameSlot = -1;
}
- if (!qFuzzyCompare(psD->d->depthBias, cbD->currentDepthBiasValues.first)
- || !qFuzzyCompare(psD->d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second))
- {
- [cbD->d->currentRenderPassEncoder setDepthBias: psD->d->depthBias
- slopeScale: psD->d->slopeScaledDepthBias
- clamp: 0.0f];
- cbD->currentDepthBiasValues = { psD->d->depthBias, psD->d->slopeScaledDepthBias };
+ for (QMetalBuffer *workBuf : psD->d->extraBufMgr.hostVisibleWorkBuffers) {
+ if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot)
+ workBuf->lastActiveFrameSlot = -1;
}
}
@@ -1137,8 +1517,8 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
{
QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
Q_ASSERT(cbD->recordingPass != QMetalCommandBuffer::NoPass);
- QMetalGraphicsPipeline *gfxPsD = QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline);
- QMetalComputePipeline *compPsD = QRHI_RES(QMetalComputePipeline, cbD->currentComputePipeline);
+ QMetalGraphicsPipeline *gfxPsD = cbD->currentGraphicsPipeline;
+ QMetalComputePipeline *compPsD = cbD->currentComputePipeline;
if (!srb) {
if (gfxPsD)
@@ -1152,9 +1532,15 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
bool hasDynamicOffsetInSrb = false;
bool resNeedsRebind = false;
+ // SPIRV-Cross buffer size buffers
+ // Need to determine storage buffer sizes here as this is the last opportunity for storage
+ // buffer bindings (offset, size) to be specified before draw / dispatch call
+ const bool needsBufferSizeBuffer = (compPsD && compPsD->d->bufferSizeBuffer) || (gfxPsD && gfxPsD->d->bufferSizeBuffer);
+ QMap<QRhiShaderResourceBinding::StageFlag, QMap<int, quint32>> storageBufferSizes;
+
// do buffer writes, figure out if we need to rebind, and mark as in-use
for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) {
- const QRhiShaderResourceBinding::Data *b = srbD->sortedBindings.at(i).data();
+ const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->sortedBindings.at(i));
QMetalShaderResourceBindings::BoundResourceData &bd(srbD->boundResourceData[i]);
switch (b->type) {
case QRhiShaderResourceBinding::UniformBuffer:
@@ -1228,6 +1614,17 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
{
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf);
Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer));
+
+ if (needsBufferSizeBuffer) {
+ for (int i = 0; i < 6; ++i) {
+ const QRhiShaderResourceBinding::StageFlag stage =
+ QRhiShaderResourceBinding::StageFlag(1 << i);
+ if (b->stage.testFlag(stage)) {
+ storageBufferSizes[stage][b->binding] = b->u.sbuf.maybeSize ? b->u.sbuf.maybeSize : bufD->size();
+ }
+ }
+ }
+
executeBufferHostWritesForCurrentFrame(bufD);
if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) {
resNeedsRebind = true;
@@ -1243,26 +1640,141 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
}
}
+ if (needsBufferSizeBuffer) {
+ QMetalBuffer *bufD = nullptr;
+ QVarLengthArray<QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag>, 4> shaders;
+
+ if (compPsD) {
+ bufD = compPsD->d->bufferSizeBuffer;
+ Q_ASSERT(compPsD->d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
+ shaders.append(qMakePair(&compPsD->d->cs, QRhiShaderResourceBinding::StageFlag::ComputeStage));
+ } else {
+ bufD = gfxPsD->d->bufferSizeBuffer;
+ if (gfxPsD->d->tess.enabled) {
+
+ // Assumptions
+ // * We only use one of the compute vertex shader variants in a pipeline at any one time
+ // * The vertex shader variants all have the same storage block bindings
+ // * The vertex shader variants all have the same native resource binding map
+ // * The vertex shader variants all have the same MslBufferSizeBufferBinding requirement
+ // * The vertex shader variants all have the same MslBufferSizeBufferBinding binding
+ // => We only need to use one vertex shader variant to generate the identical shader
+ // resource bindings
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[1].desc.storageBlocks());
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[2].desc.storageBlocks());
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap);
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap);
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
+ == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
+ == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
+ == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
+ == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
+
+ if (gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+ shaders.append(qMakePair(&gfxPsD->d->tess.compVs[0], QRhiShaderResourceBinding::StageFlag::VertexStage));
+
+ if (gfxPsD->d->tess.compTesc.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+ shaders.append(qMakePair(&gfxPsD->d->tess.compTesc, QRhiShaderResourceBinding::StageFlag::TessellationControlStage));
+
+ if (gfxPsD->d->tess.vertTese.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+ shaders.append(qMakePair(&gfxPsD->d->tess.vertTese, QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage));
+
+ } else {
+ if (gfxPsD->d->vs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+ shaders.append(qMakePair(&gfxPsD->d->vs, QRhiShaderResourceBinding::StageFlag::VertexStage));
+ }
+ if (gfxPsD->d->fs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+ shaders.append(qMakePair(&gfxPsD->d->fs, QRhiShaderResourceBinding::StageFlag::FragmentStage));
+ }
+
+ quint32 offset = 0;
+ for (const QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag> &shader : shaders) {
+
+ const int binding = shader.first->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
+
+ // if we don't have a srb entry for the buffer size buffer
+ if (!(storageBufferSizes.contains(shader.second) && storageBufferSizes[shader.second].contains(binding))) {
+
+ int maxNativeBinding = 0;
+ for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks())
+ maxNativeBinding = qMax(maxNativeBinding, shader.first->nativeResourceBindingMap[block.binding].first);
+
+ const int size = (maxNativeBinding + 1) * sizeof(int);
+
+ Q_ASSERT(offset + size <= bufD->size());
+ srbD->sortedBindings.append(QRhiShaderResourceBinding::bufferLoad(binding, shader.second, bufD, offset, size));
+
+ QMetalShaderResourceBindings::BoundResourceData bd;
+ bd.sbuf.id = bufD->m_id;
+ bd.sbuf.generation = bufD->generation;
+ srbD->boundResourceData.append(bd);
+ }
+
+ // create the buffer size buffer data
+ QVarLengthArray<int, 8> bufferSizeBufferData;
+ Q_ASSERT(storageBufferSizes.contains(shader.second));
+ const QMap<int, quint32> &sizes(storageBufferSizes[shader.second]);
+ for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) {
+ const int index = shader.first->nativeResourceBindingMap[block.binding].first;
+
+ // if the native binding is -1, the buffer is present but not accessed in the shader
+ if (index < 0)
+ continue;
+
+ if (bufferSizeBufferData.size() <= index)
+ bufferSizeBufferData.resize(index + 1);
+
+ Q_ASSERT(sizes.contains(block.binding));
+ bufferSizeBufferData[index] = sizes[block.binding];
+ }
+
+ QRhiBufferData data;
+ const quint32 size = bufferSizeBufferData.size() * sizeof(int);
+ data.assign(reinterpret_cast<const char *>(bufferSizeBufferData.constData()), size);
+ Q_ASSERT(offset + size <= bufD->size());
+ bufD->d->pendingUpdates[bufD->d->slotted ? currentFrameSlot : 0].append({ offset, data });
+
+ // buffer offsets must be 32byte aligned
+ offset += ((size + 31) / 32) * 32;
+ }
+
+ executeBufferHostWritesForCurrentFrame(bufD);
+ bufD->lastActiveFrameSlot = currentFrameSlot;
+ }
+
// make sure the resources for the correct slot get bound
const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0;
if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot)
resNeedsRebind = true;
- const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb);
+ const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srbD) : (cbD->currentComputeSrb != srbD);
const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation;
// dynamic uniform buffer offsets always trigger a rebind
if (hasDynamicOffsetInSrb || resNeedsRebind || srbChanged || srbRebuilt) {
- const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr };
+ const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr, nullptr, nullptr };
if (gfxPsD) {
- cbD->currentGraphicsSrb = srb;
+ cbD->currentGraphicsSrb = srbD;
cbD->currentComputeSrb = nullptr;
- resBindMaps[0] = &gfxPsD->d->vs.nativeResourceBindingMap;
- resBindMaps[1] = &gfxPsD->d->fs.nativeResourceBindingMap;
+ if (gfxPsD->d->tess.enabled) {
+ // If tessellating, we don't know which compVs shader to use until the draw call is
+ // made. They should all have the same native resource binding map, so pick one.
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap);
+ Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap);
+ resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->tess.compVs[0].nativeResourceBindingMap;
+ resBindMaps[QMetalShaderResourceBindingsData::TESSCTRL] = &gfxPsD->d->tess.compTesc.nativeResourceBindingMap;
+ resBindMaps[QMetalShaderResourceBindingsData::TESSEVAL] = &gfxPsD->d->tess.vertTese.nativeResourceBindingMap;
+ } else {
+ resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->vs.nativeResourceBindingMap;
+ }
+ resBindMaps[QMetalShaderResourceBindingsData::FRAGMENT] = &gfxPsD->d->fs.nativeResourceBindingMap;
} else {
cbD->currentGraphicsSrb = nullptr;
- cbD->currentComputeSrb = srb;
- resBindMaps[2] = &compPsD->d->cs.nativeResourceBindingMap;
+ cbD->currentComputeSrb = srbD;
+ resBindMaps[QMetalShaderResourceBindingsData::COMPUTE] = &compPsD->d->cs.nativeResourceBindingMap;
}
cbD->currentSrbGeneration = srbD->generation;
cbD->currentResSlot = resSlot;
@@ -1293,13 +1805,13 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb,
offsets.finish();
// same binding space for vertex and constant buffers - work it around
- QRhiShaderResourceBindings *srb = cbD->currentGraphicsSrb;
+ QMetalShaderResourceBindings *srbD = cbD->currentGraphicsSrb;
// There's nothing guaranteeing setShaderResources() was called before
// setVertexInput()... but whatever srb will get bound will have to be
// layout-compatible anyways so maxBinding is the same.
- if (!srb)
- srb = cbD->currentGraphicsPipeline->shaderResourceBindings();
- const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, srb)->maxBinding + 1;
+ if (!srbD)
+ srbD = QRHI_RES(QMetalShaderResourceBindings, cbD->currentGraphicsPipeline->shaderResourceBindings());
+ const int firstVertexBinding = srbD->maxBinding + 1;
if (firstVertexBinding != cbD->d->currentFirstVertexBinding
|| buffers != cbD->d->currentVertexInputsBuffers
@@ -1323,7 +1835,7 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb,
QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, indexBuf);
executeBufferHostWritesForCurrentFrame(ibufD);
ibufD->lastActiveFrameSlot = currentFrameSlot;
- cbD->currentIndexBuffer = indexBuf;
+ cbD->currentIndexBuffer = ibufD;
cbD->currentIndexOffset = indexOffset;
cbD->currentIndexFormat = indexFormat;
} else {
@@ -1339,7 +1851,7 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport)
// x,y is top-left in MTLViewportRect but bottom-left in QRhiViewport
float x, y, w, h;
- if (!qrhi_toTopLeftRenderTargetRect(outputSize, viewport.viewport(), &x, &y, &w, &h))
+ if (!qrhi_toTopLeftRenderTargetRect<UnBounded>(outputSize, viewport.viewport(), &x, &y, &w, &h))
return;
MTLViewport vp;
@@ -1352,8 +1864,10 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport)
[cbD->d->currentRenderPassEncoder setViewport: vp];
- if (!QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) {
+ if (cbD->currentGraphicsPipeline
+ && !cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) {
MTLScissorRect s;
+ qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, viewport.viewport(), &x, &y, &w, &h);
s.x = NSUInteger(x);
s.y = NSUInteger(y);
s.width = NSUInteger(w);
@@ -1366,12 +1880,12 @@ void QRhiMetal::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor)
{
QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
- Q_ASSERT(QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor));
+ Q_ASSERT(cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor));
const QSize outputSize = cbD->currentTarget->pixelSize();
// x,y is top-left in MTLScissorRect but bottom-left in QRhiScissor
int x, y, w, h;
- if (!qrhi_toTopLeftRenderTargetRect(outputSize, scissor.scissor(), &x, &y, &w, &h))
+ if (!qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, scissor.scissor(), &x, &y, &w, &h))
return;
MTLScissorRect s;
@@ -1400,20 +1914,320 @@ void QRhiMetal::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue)
[cbD->d->currentRenderPassEncoder setStencilReferenceValue: refValue];
}
+static id<MTLComputeCommandEncoder> tessellationComputeEncoder(QMetalCommandBuffer *cbD)
+{
+ if (cbD->d->currentRenderPassEncoder) {
+ [cbD->d->currentRenderPassEncoder endEncoding];
+ cbD->d->currentRenderPassEncoder = nil;
+ }
+
+ if (!cbD->d->tessellationComputeEncoder)
+ cbD->d->tessellationComputeEncoder = [cbD->d->cb computeCommandEncoder];
+
+ return cbD->d->tessellationComputeEncoder;
+}
+
+static void endTessellationComputeEncoding(QMetalCommandBuffer *cbD)
+{
+ if (cbD->d->tessellationComputeEncoder) {
+ [cbD->d->tessellationComputeEncoder endEncoding];
+ cbD->d->tessellationComputeEncoder = nil;
+ }
+
+ QMetalRenderTargetData * rtD = nullptr;
+
+ switch (cbD->currentTarget->resourceType()) {
+ case QRhiResource::SwapChainRenderTarget:
+ rtD = QRHI_RES(QMetalSwapChainRenderTarget, cbD->currentTarget)->d;
+ break;
+ case QRhiResource::TextureRenderTarget:
+ rtD = QRHI_RES(QMetalTextureRenderTarget, cbD->currentTarget)->d;
+ break;
+ default:
+ break;
+ }
+
+ Q_ASSERT(rtD);
+
+ QVarLengthArray<MTLLoadAction, 4> oldColorLoad;
+ for (uint i = 0; i < uint(rtD->colorAttCount); ++i) {
+ oldColorLoad.append(cbD->d->currentPassRpDesc.colorAttachments[i].loadAction);
+ if (cbD->d->currentPassRpDesc.colorAttachments[i].storeAction != MTLStoreActionDontCare)
+ cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad;
+ }
+
+ MTLLoadAction oldDepthLoad;
+ MTLLoadAction oldStencilLoad;
+ if (rtD->dsAttCount) {
+ oldDepthLoad = cbD->d->currentPassRpDesc.depthAttachment.loadAction;
+ if (cbD->d->currentPassRpDesc.depthAttachment.storeAction != MTLStoreActionDontCare)
+ cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad;
+
+ oldStencilLoad = cbD->d->currentPassRpDesc.stencilAttachment.loadAction;
+ if (cbD->d->currentPassRpDesc.stencilAttachment.storeAction != MTLStoreActionDontCare)
+ cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad;
+ }
+
+ cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc];
+ cbD->resetPerPassCachedState();
+
+ for (uint i = 0; i < uint(rtD->colorAttCount); ++i) {
+ cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = oldColorLoad[i];
+ }
+
+ if (rtD->dsAttCount) {
+ cbD->d->currentPassRpDesc.depthAttachment.loadAction = oldDepthLoad;
+ cbD->d->currentPassRpDesc.stencilAttachment.loadAction = oldStencilLoad;
+ }
+
+}
+
+void QRhiMetal::tessellatedDraw(const TessDrawArgs &args)
+{
+ QMetalCommandBuffer *cbD = args.cbD;
+ QMetalGraphicsPipeline *graphicsPipeline = cbD->currentGraphicsPipeline;
+ if (graphicsPipeline->d->tess.failed)
+ return;
+
+ const bool indexed = args.type != TessDrawArgs::NonIndexed;
+ const quint32 instanceCount = indexed ? args.drawIndexed.instanceCount : args.draw.instanceCount;
+ const quint32 vertexOrIndexCount = indexed ? args.drawIndexed.indexCount : args.draw.vertexCount;
+
+ QMetalGraphicsPipelineData::Tessellation &tess(graphicsPipeline->d->tess);
+ QMetalGraphicsPipelineData::ExtraBufferManager &extraBufMgr(graphicsPipeline->d->extraBufMgr);
+ const quint32 patchCount = tess.patchCountForDrawCall(vertexOrIndexCount, instanceCount);
+ QMetalBuffer *vertOutBuf = nullptr;
+ QMetalBuffer *tescOutBuf = nullptr;
+ QMetalBuffer *tescPatchOutBuf = nullptr;
+ QMetalBuffer *tescFactorBuf = nullptr;
+ QMetalBuffer *tescParamsBuf = nullptr;
+ id<MTLComputeCommandEncoder> vertTescComputeEncoder = tessellationComputeEncoder(cbD);
+
+ // Step 1: vertex shader (as compute)
+ {
+ id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder;
+ QShader::Variant shaderVariant = QShader::NonIndexedVertexAsComputeShader;
+ if (args.type == TessDrawArgs::U16Indexed)
+ shaderVariant = QShader::UInt16IndexedVertexAsComputeShader;
+ else if (args.type == TessDrawArgs::U32Indexed)
+ shaderVariant = QShader::UInt32IndexedVertexAsComputeShader;
+ const int varIndex = QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(shaderVariant);
+ id<MTLComputePipelineState> computePipelineState = tess.vsCompPipeline(this, shaderVariant);
+ [computeEncoder setComputePipelineState: computePipelineState];
+
+ // Make uniform buffers, textures, and samplers (meant for the
+ // vertex stage from the client's point of view) visible in the
+ // "vertex as compute" shader
+ cbD->d->currentComputePassEncoder = computeEncoder;
+ rebindShaderResources(cbD, QMetalShaderResourceBindingsData::VERTEX, QMetalShaderResourceBindingsData::COMPUTE);
+ cbD->d->currentComputePassEncoder = nil;
+
+ const QMap<int, int> &ebb(tess.compVs[varIndex].nativeShaderInfo.extraBufferBindings);
+ const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+ const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1);
+
+ if (outputBufferBinding >= 0) {
+ const quint32 workBufSize = tess.vsCompOutputBufferSize(vertexOrIndexCount, instanceCount);
+ vertOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
+ if (!vertOutBuf)
+ return;
+ [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
+ }
+
+ if (indexBufferBinding >= 0)
+ [computeEncoder setBuffer: (id<MTLBuffer>) args.drawIndexed.indexBuffer offset: 0 atIndex: indexBufferBinding];
+
+ for (int i = 0, ie = cbD->d->currentVertexInputsBuffers.batches.count(); i != ie; ++i) {
+ const auto &bufferBatch(cbD->d->currentVertexInputsBuffers.batches[i]);
+ const auto &offsetBatch(cbD->d->currentVertexInputOffsets.batches[i]);
+ [computeEncoder setBuffers: bufferBatch.resources.constData()
+ offsets: offsetBatch.resources.constData()
+ withRange: NSMakeRange(uint(cbD->d->currentFirstVertexBinding) + bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+ }
+
+ if (indexed) {
+ [computeEncoder setStageInRegion: MTLRegionMake2D(args.drawIndexed.vertexOffset, args.drawIndexed.firstInstance,
+ args.drawIndexed.indexCount, args.drawIndexed.instanceCount)];
+ } else {
+ [computeEncoder setStageInRegion: MTLRegionMake2D(args.draw.firstVertex, args.draw.firstInstance,
+ args.draw.vertexCount, args.draw.instanceCount)];
+ }
+
+ [computeEncoder dispatchThreads: MTLSizeMake(vertexOrIndexCount, instanceCount, 1)
+ threadsPerThreadgroup: MTLSizeMake(computePipelineState.threadExecutionWidth, 1, 1)];
+ }
+
+ // Step 2: tessellation control shader (as compute)
+ {
+ id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder;
+ id<MTLComputePipelineState> computePipelineState = tess.tescCompPipeline(this);
+ [computeEncoder setComputePipelineState: computePipelineState];
+
+ cbD->d->currentComputePassEncoder = computeEncoder;
+ rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSCTRL, QMetalShaderResourceBindingsData::COMPUTE);
+ cbD->d->currentComputePassEncoder = nil;
+
+ const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings);
+ const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+ const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
+ const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
+ const int paramsBufferBinding = ebb.value(QShaderPrivate::MslTessTescParamsBufferBinding, -1);
+ const int inputBufferBinding = ebb.value(QShaderPrivate::MslTessTescInputBufferBinding, -1);
+
+ if (outputBufferBinding >= 0) {
+ const quint32 workBufSize = tess.tescCompOutputBufferSize(patchCount);
+ tescOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
+ if (!tescOutBuf)
+ return;
+ [computeEncoder setBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
+ }
+
+ if (patchOutputBufferBinding >= 0) {
+ const quint32 workBufSize = tess.tescCompPatchOutputBufferSize(patchCount);
+ tescPatchOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
+ if (!tescPatchOutBuf)
+ return;
+ [computeEncoder setBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding];
+ }
+
+ if (tessFactorBufferBinding >= 0) {
+ tescFactorBuf = extraBufMgr.acquireWorkBuffer(this, patchCount * sizeof(MTLQuadTessellationFactorsHalf));
+ [computeEncoder setBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding];
+ }
+
+ if (paramsBufferBinding >= 0) {
+ struct {
+ quint32 inControlPointCount;
+ quint32 patchCount;
+ } params;
+ tescParamsBuf = extraBufMgr.acquireWorkBuffer(this, sizeof(params), QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible);
+ if (!tescParamsBuf)
+ return;
+ params.inControlPointCount = tess.inControlPointCount;
+ params.patchCount = patchCount;
+ id<MTLBuffer> paramsBuf = tescParamsBuf->d->buf[0];
+ char *p = reinterpret_cast<char *>([paramsBuf contents]);
+ memcpy(p, &params, sizeof(params));
+ [computeEncoder setBuffer: paramsBuf offset: 0 atIndex: paramsBufferBinding];
+ }
+
+ if (vertOutBuf && inputBufferBinding >= 0)
+ [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: inputBufferBinding];
+
+ int sgSize = int(computePipelineState.threadExecutionWidth);
+ int wgSize = std::lcm(tess.outControlPointCount, sgSize);
+ while (wgSize > caps.maxThreadGroupSize) {
+ sgSize /= 2;
+ wgSize = std::lcm(tess.outControlPointCount, sgSize);
+ }
+ [computeEncoder dispatchThreads: MTLSizeMake(patchCount * tess.outControlPointCount, 1, 1)
+ threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
+ }
+
+ // Much of the state in the QMetalCommandBuffer is going to be reset
+ // when we get a new render encoder. Save what we need. (cheaper than
+ // starting to walk over the srb again)
+ const QMetalShaderResourceBindingsData resourceBindings = cbD->d->currentShaderResourceBindingState;
+
+ endTessellationComputeEncoding(cbD);
+
+ // Step 3: tessellation evaluation (as vertex) + fragment shader
+ {
+ // No need to call tess.teseFragRenderPipeline because it was done
+ // once and we know the result is stored in the standard place
+ // (graphicsPipeline->d->ps).
+
+ graphicsPipeline->makeActiveForCurrentRenderPassEncoder(cbD);
+ id<MTLRenderCommandEncoder> renderEncoder = cbD->d->currentRenderPassEncoder;
+
+ rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSEVAL, QMetalShaderResourceBindingsData::VERTEX, &resourceBindings);
+ rebindShaderResources(cbD, QMetalShaderResourceBindingsData::FRAGMENT, QMetalShaderResourceBindingsData::FRAGMENT, &resourceBindings);
+
+ const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings);
+ const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+ const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
+ const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
+
+ if (outputBufferBinding >= 0 && tescOutBuf)
+ [renderEncoder setVertexBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
+
+ if (patchOutputBufferBinding >= 0 && tescPatchOutBuf)
+ [renderEncoder setVertexBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding];
+
+ if (tessFactorBufferBinding >= 0 && tescFactorBuf) {
+ [renderEncoder setTessellationFactorBuffer: tescFactorBuf->d->buf[0] offset: 0 instanceStride: 0];
+ [renderEncoder setVertexBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding];
+ }
+
+ [cbD->d->currentRenderPassEncoder drawPatches: tess.outControlPointCount
+ patchStart: 0
+ patchCount: patchCount
+ patchIndexBuffer: nil
+ patchIndexBufferOffset: 0
+ instanceCount: 1
+ baseInstance: 0];
+ }
+}
+
+void QRhiMetal::adjustForMultiViewDraw(quint32 *instanceCount, QRhiCommandBuffer *cb)
+{
+ QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
+ const int multiViewCount = cbD->currentGraphicsPipeline->m_multiViewCount;
+ if (multiViewCount <= 1)
+ return;
+
+ const QMap<int, int> &ebb(cbD->currentGraphicsPipeline->d->vs.nativeShaderInfo.extraBufferBindings);
+ const int viewMaskBufBinding = ebb.value(QShaderPrivate::MslMultiViewMaskBufferBinding, -1);
+ if (viewMaskBufBinding == -1) {
+ qWarning("No extra buffer for multiview in the vertex shader; was it built with --view-count specified?");
+ return;
+ }
+ struct {
+ quint32 viewOffset;
+ quint32 viewCount;
+ } multiViewInfo;
+ multiViewInfo.viewOffset = 0;
+ multiViewInfo.viewCount = quint32(multiViewCount);
+ QMetalBuffer *buf = cbD->currentGraphicsPipeline->d->extraBufMgr.acquireWorkBuffer(this, sizeof(multiViewInfo),
+ QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible);
+ if (buf) {
+ id<MTLBuffer> mtlbuf = buf->d->buf[0];
+ char *p = reinterpret_cast<char *>([mtlbuf contents]);
+ memcpy(p, &multiViewInfo, sizeof(multiViewInfo));
+ [cbD->d->currentRenderPassEncoder setVertexBuffer: mtlbuf offset: 0 atIndex: viewMaskBufBinding];
+ // The instance count is adjusted for layered rendering. The vertex shader is expected to contain something like:
+ // uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1];
+ // where spvViewMask is the buffer with multiViewInfo passed in above.
+ *instanceCount *= multiViewCount;
+ }
+}
+
void QRhiMetal::draw(QRhiCommandBuffer *cb, quint32 vertexCount,
quint32 instanceCount, quint32 firstVertex, quint32 firstInstance)
{
QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
+ if (cbD->currentGraphicsPipeline->d->tess.enabled) {
+ TessDrawArgs a;
+ a.cbD = cbD;
+ a.type = TessDrawArgs::NonIndexed;
+ a.draw.vertexCount = vertexCount;
+ a.draw.instanceCount = instanceCount;
+ a.draw.firstVertex = firstVertex;
+ a.draw.firstInstance = firstInstance;
+ tessellatedDraw(a);
+ return;
+ }
+
+ adjustForMultiViewDraw(&instanceCount, cb);
+
if (caps.baseVertexAndInstance) {
- [cbD->d->currentRenderPassEncoder drawPrimitives:
- QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
- vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance];
+ [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
+ vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance];
} else {
- [cbD->d->currentRenderPassEncoder drawPrimitives:
- QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
- vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount];
+ [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
+ vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount];
}
}
@@ -1429,23 +2243,39 @@ void QRhiMetal::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount,
const quint32 indexOffset = cbD->currentIndexOffset + firstIndex * (cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? 2 : 4);
Q_ASSERT(indexOffset == aligned(indexOffset, 4u));
- QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, cbD->currentIndexBuffer);
- id<MTLBuffer> mtlbuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0];
+ QMetalBuffer *ibufD = cbD->currentIndexBuffer;
+ id<MTLBuffer> mtlibuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0];
+
+ if (cbD->currentGraphicsPipeline->d->tess.enabled) {
+ TessDrawArgs a;
+ a.cbD = cbD;
+ a.type = cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? TessDrawArgs::U16Indexed : TessDrawArgs::U32Indexed;
+ a.drawIndexed.indexCount = indexCount;
+ a.drawIndexed.instanceCount = instanceCount;
+ a.drawIndexed.firstIndex = firstIndex;
+ a.drawIndexed.vertexOffset = vertexOffset;
+ a.drawIndexed.firstInstance = firstInstance;
+ a.drawIndexed.indexBuffer = mtlibuf;
+ tessellatedDraw(a);
+ return;
+ }
+
+ adjustForMultiViewDraw(&instanceCount, cb);
if (caps.baseVertexAndInstance) {
- [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
+ [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
indexCount: indexCount
indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32
- indexBuffer: mtlbuf
+ indexBuffer: mtlibuf
indexBufferOffset: indexOffset
instanceCount: instanceCount
baseVertex: vertexOffset
baseInstance: firstInstance];
} else {
- [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
+ [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
indexCount: indexCount
indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32
- indexBuffer: mtlbuf
+ indexBuffer: mtlibuf
indexBufferOffset: indexOffset
instanceCount: instanceCount];
}
@@ -1502,34 +2332,39 @@ void QRhiMetal::endExternal(QRhiCommandBuffer *cb)
cbD->resetPerPassCachedState();
}
+double QRhiMetal::lastCompletedGpuTime(QRhiCommandBuffer *cb)
+{
+ QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
+ return cbD->d->lastGpuTime;
+}
+
QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
{
Q_UNUSED(flags);
QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain);
+ currentSwapChain = swapChainD;
+ currentFrameSlot = swapChainD->currentFrameSlot;
- // This is a bit messed up since for this swapchain we want to wait for the
- // commands+present to complete, while for others just for the commands
- // (for this same frame slot) but not sure how to do that in a sane way so
- // wait for full cb completion for now.
- for (QMetalSwapChain *sc : qAsConst(swapchains)) {
- dispatch_semaphore_t sem = sc->d->sem[swapChainD->currentFrameSlot];
- dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
+ // If we are too far ahead, block. This is also what ensures that any
+ // resource used in the previous frame for this slot is now not in use
+ // anymore by the GPU.
+ dispatch_semaphore_wait(swapChainD->d->sem[currentFrameSlot], DISPATCH_TIME_FOREVER);
+
+ // Do this also for any other swapchain's commands with the same frame slot
+ // While this reduces concurrency, it keeps resource usage safe: swapchain
+ // A starting its frame 0, followed by swapchain B starting its own frame 0
+ // will make B wait for A's frame 0 commands, so if a resource is written
+ // in B's frame or when B checks for pending resource releases, that won't
+ // mess up A's in-flight commands (as they are not in flight anymore).
+ for (QMetalSwapChain *sc : std::as_const(swapchains)) {
if (sc != swapChainD)
- dispatch_semaphore_signal(sem);
+ sc->waitUntilCompleted(currentFrameSlot); // wait+signal
}
- currentSwapChain = swapChainD;
- currentFrameSlot = swapChainD->currentFrameSlot;
- if (swapChainD->ds)
- swapChainD->ds->lastActiveFrameSlot = currentFrameSlot;
-
[d->captureScope beginScope];
- // Do not let the command buffer mess with the refcount of objects. We do
- // have a proper render loop and will manage lifetimes similarly to other
- // backends (Vulkan).
- swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
+ swapChainD->cbWrapper.d->cb = d->newCommandBuffer();
QMetalRenderTargetData::ColorAtt colorAtt;
if (swapChainD->samples > 1) {
@@ -1541,11 +2376,16 @@ QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF
swapChainD->rtWrapper.d->fb.colorAtt[0] = colorAtt;
swapChainD->rtWrapper.d->fb.dsTex = swapChainD->ds ? swapChainD->ds->d->tex : nil;
+ swapChainD->rtWrapper.d->fb.dsResolveTex = nil;
swapChainD->rtWrapper.d->fb.hasStencil = swapChainD->ds ? true : false;
swapChainD->rtWrapper.d->fb.depthNeedsStore = false;
+ if (swapChainD->ds)
+ swapChainD->ds->lastActiveFrameSlot = currentFrameSlot;
+
executeDeferredReleases();
- swapChainD->cbWrapper.resetState();
+ swapChainD->cbWrapper.resetState(swapChainD->d->lastGpuTime[currentFrameSlot]);
+ swapChainD->d->lastGpuTime[currentFrameSlot] = 0;
finishActiveReadbacks();
return QRhi::FrameOpSuccess;
@@ -1556,26 +2396,30 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain);
Q_ASSERT(currentSwapChain == swapChainD);
+ __block int thisFrameSlot = currentFrameSlot;
+ [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer> cb) {
+ swapChainD->d->lastGpuTime[thisFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
+ dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]);
+ }];
+
const bool needsPresent = !flags.testFlag(QRhi::SkipPresent);
- if (needsPresent) {
- // beginFrame-endFrame without a render pass inbetween means there is no
- // drawable, handle this gracefully because presentDrawable does not like
- // null arguments.
- if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) {
- // QTBUG-103415: while the docs suggest the following two approaches are
- // equivalent, there is a difference in case a frame is recorded earlier than
- // (i.e. not in response to) the next CVDisplayLink callback. Therefore, stick
- // with presentDrawable, which gives results identical to OpenGL, and all other
- // platforms, i.e. throttles to vsync as expected, meaning constant 15-17 ms with
- // a 60 Hz screen, no jumps with smaller intervals, regardless of when the frame
- // is submitted by the app)
-#if 1
+ const bool presentsWithTransaction = swapChainD->d->layer.presentsWithTransaction;
+ if (!presentsWithTransaction && needsPresent) {
+ // beginFrame-endFrame without a render pass inbetween means there is no drawable.
+ if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable)
[swapChainD->cbWrapper.d->cb presentDrawable: drawable];
-#else
- [swapChainD->cbWrapper.d->cb addScheduledHandler:^(id<MTLCommandBuffer>) {
- [drawable present];
- }];
-#endif
+ }
+
+ [swapChainD->cbWrapper.d->cb commit];
+
+ if (presentsWithTransaction && needsPresent) {
+ // beginFrame-endFrame without a render pass inbetween means there is no drawable.
+ if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) {
+ // The layer has presentsWithTransaction set to true to avoid flicker on resizing,
+ // so here it is important to follow what the Metal docs say when it comes to the
+ // issuing the present.
+ [swapChainD->cbWrapper.d->cb waitUntilScheduled];
+ [drawable present];
}
}
@@ -1583,13 +2427,6 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
[swapChainD->d->curDrawable release];
swapChainD->d->curDrawable = nil;
- __block int thisFrameSlot = currentFrameSlot;
- [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer>) {
- dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]);
- }];
-
- [swapChainD->cbWrapper.d->cb commit];
-
[d->captureScope endScope];
if (needsPresent)
@@ -1605,23 +2442,17 @@ QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi:
Q_UNUSED(flags);
currentFrameSlot = (currentFrameSlot + 1) % QMTL_FRAMES_IN_FLIGHT;
- if (swapchains.count() > 1) {
- for (QMetalSwapChain *sc : qAsConst(swapchains)) {
- // wait+signal is the general pattern to ensure the commands for a
- // given frame slot have completed (if sem is 1, we go 0 then 1; if
- // sem is 0 we go -1, block, completion increments to 0, then us to 1)
- dispatch_semaphore_t sem = sc->d->sem[currentFrameSlot];
- dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
- dispatch_semaphore_signal(sem);
- }
- }
+
+ for (QMetalSwapChain *sc : std::as_const(swapchains))
+ sc->waitUntilCompleted(currentFrameSlot);
d->ofr.active = true;
*cb = &d->ofr.cbWrapper;
- d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
+ d->ofr.cbWrapper.d->cb = d->newCommandBuffer();
executeDeferredReleases();
- d->ofr.cbWrapper.resetState();
+ d->ofr.cbWrapper.resetState(d->ofr.lastGpuTime);
+ d->ofr.lastGpuTime = 0;
finishActiveReadbacks();
return QRhi::FrameOpSuccess;
@@ -1633,10 +2464,13 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame(QRhi::EndFrameFlags flags)
Q_ASSERT(d->ofr.active);
d->ofr.active = false;
- [d->ofr.cbWrapper.d->cb commit];
+ id<MTLCommandBuffer> cb = d->ofr.cbWrapper.d->cb;
+ [cb commit];
// offscreen frames wait for completion, unlike swapchain ones
- [d->ofr.cbWrapper.d->cb waitUntilCompleted];
+ [cb waitUntilCompleted];
+
+ d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
finishActiveReadbacks(true);
@@ -1660,16 +2494,14 @@ QRhi::FrameOpResult QRhiMetal::finish()
}
}
- for (QMetalSwapChain *sc : qAsConst(swapchains)) {
+ for (QMetalSwapChain *sc : std::as_const(swapchains)) {
for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
if (currentSwapChain && sc == currentSwapChain && i == currentFrameSlot) {
// no wait as this is the thing we're going to be commit below and
// beginFrame decremented sem already and going to be signaled by endFrame
continue;
}
- dispatch_semaphore_t sem = sc->d->sem[i];
- dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
- dispatch_semaphore_signal(sem);
+ sc->waitUntilCompleted(i);
}
}
@@ -1679,10 +2511,13 @@ QRhi::FrameOpResult QRhiMetal::finish()
}
if (inFrame) {
- if (d->ofr.active)
- d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
- else
- swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
+ if (d->ofr.active) {
+ d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
+ d->ofr.cbWrapper.d->cb = d->newCommandBuffer();
+ } else {
+ swapChainD->d->lastGpuTime[currentFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
+ swapChainD->cbWrapper.d->cb = d->newCommandBuffer();
+ }
}
executeDeferredReleases(true);
@@ -1860,6 +2695,15 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates);
+ id<MTLBlitCommandEncoder> blitEnc = nil;
+ auto ensureBlit = [&blitEnc, cbD, this]() {
+ if (!blitEnc) {
+ blitEnc = [cbD->d->cb blitCommandEncoder];
+ if (debugMarkers)
+ [blitEnc pushDebugGroup: @"Texture upload/copy"];
+ }
+ };
+
for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) {
const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]);
if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) {
@@ -1882,25 +2726,33 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
executeBufferHostWritesForCurrentFrame(bufD);
const int idx = bufD->d->slotted ? currentFrameSlot : 0;
- char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
- if (p) {
- u.result->data.resize(u.readSize);
- memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
+ if (bufD->m_type == QRhiBuffer::Dynamic) {
+ char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
+ if (p) {
+ u.result->data.resize(u.readSize);
+ memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
+ }
+ if (u.result->completed)
+ u.result->completed();
+ } else {
+ QRhiMetalData::BufferReadback readback;
+ readback.activeFrameSlot = idx;
+ readback.buf = bufD->d->buf[idx];
+ readback.offset = u.offset;
+ readback.readSize = u.readSize;
+ readback.result = u.result;
+ d->activeBufferReadbacks.append(readback);
+#ifdef Q_OS_MACOS
+ if (bufD->d->managed) {
+ // On non-Apple Silicon, manually synchronize memory from GPU to CPU
+ ensureBlit();
+ [blitEnc synchronizeResource:readback.buf];
+ }
+#endif
}
- if (u.result->completed)
- u.result->completed();
}
}
- id<MTLBlitCommandEncoder> blitEnc = nil;
- auto ensureBlit = [&blitEnc, cbD, this] {
- if (!blitEnc) {
- blitEnc = [cbD->d->cb blitCommandEncoder];
- if (debugMarkers)
- [blitEnc pushDebugGroup: @"Texture upload/copy"];
- }
- };
-
for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]);
if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) {
@@ -1908,7 +2760,7 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
qsizetype stagingSize = 0;
for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) {
for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
- for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level]))
+ for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level]))
stagingSize += subresUploadByteSize(subresDesc);
}
}
@@ -1922,7 +2774,7 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
qsizetype curOfs = 0;
for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) {
for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
- for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level]))
+ for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level]))
enqueueSubresUpload(utexD, mp, blitEnc, layer, level, subresDesc, &curOfs);
}
}
@@ -2035,7 +2887,7 @@ void QRhiMetal::executeBufferHostWritesForSlot(QMetalBuffer *bufD, int slot)
void *p = [bufD->d->buf[slot] contents];
quint32 changeBegin = UINT32_MAX;
quint32 changeEnd = 0;
- for (const QMetalBufferData::BufferUpdate &u : qAsConst(bufD->d->pendingUpdates[slot])) {
+ for (const QMetalBufferData::BufferUpdate &u : std::as_const(bufD->d->pendingUpdates[slot])) {
memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.data.size()));
if (u.offset < changeBegin)
changeBegin = u.offset;
@@ -2111,21 +2963,24 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QMetalTexture, QMetalRenderBuffer>(rtTex->description(), rtD->currentResIdList))
rtTex->create();
cbD->d->currentPassRpDesc = d->createDefaultRenderPass(rtD->dsAttCount, colorClearValue, depthStencilClearValue, rtD->colorAttCount);
- if (rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents)) {
+ if (rtD->fb.preserveColor) {
for (uint i = 0; i < uint(rtD->colorAttCount); ++i)
cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad;
}
- if (rtD->dsAttCount && rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents)) {
+ if (rtD->dsAttCount && rtD->fb.preserveDs) {
cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad;
cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad;
}
for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments();
it != itEnd; ++it)
{
- if (it->texture())
+ if (it->texture()) {
QRHI_RES(QMetalTexture, it->texture())->lastActiveFrameSlot = currentFrameSlot;
- else if (it->renderBuffer())
+ if (it->multiViewCount() >= 2)
+ cbD->d->currentPassRpDesc.renderTargetArrayLength = NSUInteger(it->multiViewCount());
+ } else if (it->renderBuffer()) {
QRHI_RES(QMetalRenderBuffer, it->renderBuffer())->lastActiveFrameSlot = currentFrameSlot;
+ }
if (it->resolveTexture())
QRHI_RES(QMetalTexture, it->resolveTexture())->lastActiveFrameSlot = currentFrameSlot;
}
@@ -2133,6 +2988,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
QRHI_RES(QMetalRenderBuffer, rtTex->m_desc.depthStencilBuffer())->lastActiveFrameSlot = currentFrameSlot;
if (rtTex->m_desc.depthTexture())
QRHI_RES(QMetalTexture, rtTex->m_desc.depthTexture())->lastActiveFrameSlot = currentFrameSlot;
+ if (rtTex->m_desc.depthResolveTexture())
+ QRHI_RES(QMetalTexture, rtTex->m_desc.depthResolveTexture())->lastActiveFrameSlot = currentFrameSlot;
}
break;
default:
@@ -2146,7 +3003,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
cbD->d->currentPassRpDesc.colorAttachments[i].depthPlane = NSUInteger(rtD->fb.colorAtt[i].slice);
cbD->d->currentPassRpDesc.colorAttachments[i].level = NSUInteger(rtD->fb.colorAtt[i].level);
if (rtD->fb.colorAtt[i].resolveTex) {
- cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = MTLStoreActionMultisampleResolve;
+ cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = rtD->fb.preserveColor ? MTLStoreActionStoreAndMultisampleResolve
+ : MTLStoreActionMultisampleResolve;
cbD->d->currentPassRpDesc.colorAttachments[i].resolveTexture = rtD->fb.colorAtt[i].resolveTex;
cbD->d->currentPassRpDesc.colorAttachments[i].resolveSlice = NSUInteger(rtD->fb.colorAtt[i].resolveLayer);
cbD->d->currentPassRpDesc.colorAttachments[i].resolveLevel = NSUInteger(rtD->fb.colorAtt[i].resolveLevel);
@@ -2159,6 +3017,15 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
cbD->d->currentPassRpDesc.stencilAttachment.texture = rtD->fb.hasStencil ? rtD->fb.dsTex : nil;
if (rtD->fb.depthNeedsStore) // Depth/Stencil is set to DontCare by default, override if needed
cbD->d->currentPassRpDesc.depthAttachment.storeAction = MTLStoreActionStore;
+ if (rtD->fb.dsResolveTex) {
+ cbD->d->currentPassRpDesc.depthAttachment.storeAction = rtD->fb.depthNeedsStore ? MTLStoreActionStoreAndMultisampleResolve
+ : MTLStoreActionMultisampleResolve;
+ cbD->d->currentPassRpDesc.depthAttachment.resolveTexture = rtD->fb.dsResolveTex;
+ if (rtD->fb.hasStencil) {
+ cbD->d->currentPassRpDesc.stencilAttachment.resolveTexture = rtD->fb.dsResolveTex;
+ cbD->d->currentPassRpDesc.stencilAttachment.storeAction = cbD->d->currentPassRpDesc.depthAttachment.storeAction;
+ }
+ }
}
cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc];
@@ -2216,9 +3083,9 @@ void QRhiMetal::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *p
Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass);
QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, ps);
- if (cbD->currentComputePipeline != ps || cbD->currentPipelineGeneration != psD->generation) {
+ if (cbD->currentComputePipeline != psD || cbD->currentPipelineGeneration != psD->generation) {
cbD->currentGraphicsPipeline = nullptr;
- cbD->currentComputePipeline = ps;
+ cbD->currentComputePipeline = psD;
cbD->currentPipelineGeneration = psD->generation;
[cbD->d->currentComputePassEncoder setComputePipelineState: psD->d->ps];
@@ -2284,8 +3151,12 @@ void QRhiMetal::executeDeferredReleases(bool forced)
[e.stagingBuffer.buffer release];
break;
case QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline:
- [e.graphicsPipeline.depthStencilState release];
[e.graphicsPipeline.pipelineState release];
+ [e.graphicsPipeline.depthStencilState release];
+ [e.graphicsPipeline.tessVertexComputeState[0] release];
+ [e.graphicsPipeline.tessVertexComputeState[1] release];
+ [e.graphicsPipeline.tessVertexComputeState[2] release];
+ [e.graphicsPipeline.tessTessControlComputeState release];
break;
case QRhiMetalData::DeferredReleaseEntry::ComputePipeline:
[e.computePipeline.pipelineState release];
@@ -2315,7 +3186,23 @@ void QRhiMetal::finishActiveReadbacks(bool forced)
if (readback.result->completed)
completedCallbacks.append(readback.result->completed);
- d->activeTextureReadbacks.removeLast();
+ d->activeTextureReadbacks.remove(i);
+ }
+ }
+
+ for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) {
+ const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]);
+ if (forced || currentFrameSlot == readback.activeFrameSlot
+ || readback.activeFrameSlot < 0) {
+ readback.result->data.resize(readback.readSize);
+ char *p = reinterpret_cast<char *>([readback.buf contents]);
+ Q_ASSERT(p);
+ memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize));
+
+ if (readback.result->completed)
+ completedCallbacks.append(readback.result->completed);
+
+ d->activeBufferReadbacks.remove(i);
}
}
@@ -2388,6 +3275,9 @@ bool QMetalBuffer::create()
// Static maps to on macOS) is not safe when another frame reading from the
// same buffer is still in flight.
d->slotted = !m_usage.testFlag(QRhiBuffer::StorageBuffer); // except for SSBOs written in the shader
+ // and a special case for internal work buffers
+ if (int(m_usage) == WorkBufPoolUsage)
+ d->slotted = false;
for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
if (i == 0 || d->slotted) {
@@ -2863,11 +3753,14 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize)
if (d->tex)
destroy();
- const QSize size = m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize;
const bool isCube = m_flags.testFlag(CubeMap);
const bool is3D = m_flags.testFlag(ThreeDimensional);
const bool isArray = m_flags.testFlag(TextureArray);
const bool hasMipMaps = m_flags.testFlag(MipMapped);
+ const bool is1D = m_flags.testFlag(OneDimensional);
+
+ const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1)
+ : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize);
QRHI_RES_RHI(QRhiMetal);
d->format = toMetalTextureFormat(m_format, m_flags, rhiD);
@@ -2895,12 +3788,18 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize)
qWarning("Texture cannot be both array and 3D");
return false;
}
- m_depth = qMax(1, m_depth);
+ if (is1D && is3D) {
+ qWarning("Texture cannot be both 1D and 3D");
+ return false;
+ }
+ if (is1D && isCube) {
+ qWarning("Texture cannot be both 1D and cube");
+ return false;
+ }
if (m_depth > 1 && !is3D) {
qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth);
return false;
}
- m_arraySize = qMax(0, m_arraySize);
if (m_arraySize > 0 && !isArray) {
qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize);
return false;
@@ -2927,17 +3826,20 @@ bool QMetalTexture::create()
const bool isCube = m_flags.testFlag(CubeMap);
const bool is3D = m_flags.testFlag(ThreeDimensional);
const bool isArray = m_flags.testFlag(TextureArray);
+ const bool is1D = m_flags.testFlag(OneDimensional);
if (isCube) {
desc.textureType = MTLTextureTypeCube;
} else if (is3D) {
desc.textureType = MTLTextureType3D;
+ } else if (is1D) {
+ desc.textureType = isArray ? MTLTextureType1DArray : MTLTextureType1D;
} else if (isArray) {
#ifdef Q_OS_IOS
- if (samples > 1) {
- // would be available on iOS 14.0+ but cannot test for that with a 13 SDK
- qWarning("Multisample 2D texture array is not supported on iOS");
+ if (@available(iOS 14, *)) {
+ desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray;
+ } else {
+ desc.textureType = MTLTextureType2DArray;
}
- desc.textureType = MTLTextureType2DArray;
#else
desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray;
#endif
@@ -2947,12 +3849,12 @@ bool QMetalTexture::create()
desc.pixelFormat = d->format;
desc.width = NSUInteger(size.width());
desc.height = NSUInteger(size.height());
- desc.depth = is3D ? m_depth : 1;
+ desc.depth = is3D ? qMax(1, m_depth) : 1;
desc.mipmapLevelCount = NSUInteger(mipLevelCount);
if (samples > 1)
desc.sampleCount = NSUInteger(samples);
if (isArray)
- desc.arrayLength = NSUInteger(m_arraySize);
+ desc.arrayLength = NSUInteger(qMax(0, m_arraySize));
desc.resourceOptions = MTLResourceStorageModePrivate;
desc.storageMode = MTLStorageModePrivate;
desc.usage = MTLTextureUsageShaderRead;
@@ -3011,7 +3913,8 @@ id<MTLTexture> QMetalTextureData::viewForLevel(int level)
const bool isCube = q->m_flags.testFlag(QRhiTexture::CubeMap);
const bool isArray = q->m_flags.testFlag(QRhiTexture::TextureArray);
id<MTLTexture> view = [tex newTextureViewWithPixelFormat: format textureType: type
- levels: NSMakeRange(NSUInteger(level), 1) slices: NSMakeRange(0, isCube ? 6 : (isArray ? q->m_arraySize : 1))];
+ levels: NSMakeRange(NSUInteger(level), 1)
+ slices: NSMakeRange(0, isCube ? 6 : (isArray ? qMax(0, q->m_arraySize) : 1))];
perLevelViews[level] = view;
return view;
@@ -3156,7 +4059,9 @@ QMetalRenderPassDescriptor::~QMetalRenderPassDescriptor()
void QMetalRenderPassDescriptor::destroy()
{
- // nothing to do here
+ QRHI_RES_RHI(QRhiMetal);
+ if (rhiD)
+ rhiD->unregisterResource(this);
}
bool QMetalRenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const
@@ -3199,13 +4104,17 @@ void QMetalRenderPassDescriptor::updateSerializedFormat()
QRhiRenderPassDescriptor *QMetalRenderPassDescriptor::newCompatibleRenderPassDescriptor() const
{
- QMetalRenderPassDescriptor *rp = new QMetalRenderPassDescriptor(m_rhi);
- rp->colorAttachmentCount = colorAttachmentCount;
- rp->hasDepthStencil = hasDepthStencil;
- memcpy(rp->colorFormat, colorFormat, sizeof(colorFormat));
- rp->dsFormat = dsFormat;
- rp->updateSerializedFormat();
- return rp;
+ QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
+ rpD->colorAttachmentCount = colorAttachmentCount;
+ rpD->hasDepthStencil = hasDepthStencil;
+ memcpy(rpD->colorFormat, colorFormat, sizeof(colorFormat));
+ rpD->dsFormat = dsFormat;
+
+ rpD->updateSerializedFormat();
+
+ QRHI_RES_RHI(QRhiMetal);
+ rhiD->registerResource(rpD, false);
+ return rpD;
}
QVector<quint32> QMetalRenderPassDescriptor::serializedFormat() const
@@ -3261,12 +4170,14 @@ QMetalTextureRenderTarget::~QMetalTextureRenderTarget()
void QMetalTextureRenderTarget::destroy()
{
- // nothing to do here
+ QRHI_RES_RHI(QRhiMetal);
+ if (rhiD)
+ rhiD->unregisterResource(this);
}
QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDescriptor()
{
- const int colorAttachmentCount = m_desc.cendColorAttachments() - m_desc.cbeginColorAttachments();
+ const int colorAttachmentCount = int(m_desc.colorAttachmentCount());
QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
rpD->colorAttachmentCount = colorAttachmentCount;
rpD->hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
@@ -3284,14 +4195,16 @@ QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDesc
rpD->dsFormat = int(QRHI_RES(QMetalRenderBuffer, m_desc.depthStencilBuffer())->d->format);
rpD->updateSerializedFormat();
+
+ QRHI_RES_RHI(QRhiMetal);
+ rhiD->registerResource(rpD, false);
return rpD;
}
bool QMetalTextureRenderTarget::create()
{
QRHI_RES_RHI(QRhiMetal);
- const bool hasColorAttachments = m_desc.cbeginColorAttachments() != m_desc.cendColorAttachments();
- Q_ASSERT(hasColorAttachments || m_desc.depthTexture());
+ Q_ASSERT(m_desc.colorAttachmentCount() > 0 || m_desc.depthTexture());
Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture());
const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
@@ -3335,8 +4248,9 @@ bool QMetalTextureRenderTarget::create()
if (m_desc.depthTexture()) {
QMetalTexture *depthTexD = QRHI_RES(QMetalTexture, m_desc.depthTexture());
d->fb.dsTex = depthTexD->d->tex;
- d->fb.hasStencil = false;
- d->fb.depthNeedsStore = true;
+ d->fb.hasStencil = rhiD->isStencilSupportingFormat(depthTexD->format());
+ d->fb.depthNeedsStore = !m_flags.testFlag(DoNotStoreDepthStencilContents) && !m_desc.depthResolveTexture();
+ d->fb.preserveDs = m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents);
if (d->colorAttCount == 0) {
d->pixelSize = depthTexD->pixelSize();
d->sampleCount = depthTexD->samples;
@@ -3346,18 +4260,27 @@ bool QMetalTextureRenderTarget::create()
d->fb.dsTex = depthRbD->d->tex;
d->fb.hasStencil = true;
d->fb.depthNeedsStore = false;
+ d->fb.preserveDs = false;
if (d->colorAttCount == 0) {
d->pixelSize = depthRbD->pixelSize();
d->sampleCount = depthRbD->samples;
}
}
+ if (m_desc.depthResolveTexture()) {
+ QMetalTexture *depthResolveTexD = QRHI_RES(QMetalTexture, m_desc.depthResolveTexture());
+ d->fb.dsResolveTex = depthResolveTexD->d->tex;
+ }
d->dsAttCount = 1;
} else {
d->dsAttCount = 0;
}
+ if (d->colorAttCount > 0)
+ d->fb.preserveColor = m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents);
+
QRhiRenderTargetAttachmentTracker::updateResIdList<QMetalTexture, QMetalRenderBuffer>(m_desc, &d->currentResIdList);
+ rhiD->registerResource(this, false);
return true;
}
@@ -3393,6 +4316,10 @@ void QMetalShaderResourceBindings::destroy()
{
sortedBindings.clear();
maxBinding = -1;
+
+ QRHI_RES_RHI(QRhiMetal);
+ if (rhiD)
+ rhiD->unregisterResource(this);
}
bool QMetalShaderResourceBindings::create()
@@ -3407,13 +4334,9 @@ bool QMetalShaderResourceBindings::create()
rhiD->updateLayoutDesc(this);
std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings));
- std::sort(sortedBindings.begin(), sortedBindings.end(),
- [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b)
- {
- return a.data()->binding < b.data()->binding;
- });
+ std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan);
if (!sortedBindings.isEmpty())
- maxBinding = sortedBindings.last().data()->binding;
+ maxBinding = QRhiImplementation::shaderResourceBindingData(sortedBindings.last())->binding;
else
maxBinding = -1;
@@ -3423,6 +4346,7 @@ bool QMetalShaderResourceBindings::create()
memset(&bd, 0, sizeof(BoundResourceData));
generation += 1;
+ rhiD->registerResource(this, false);
return true;
}
@@ -3430,13 +4354,8 @@ void QMetalShaderResourceBindings::updateResources(UpdateFlags flags)
{
sortedBindings.clear();
std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings));
- if (!flags.testFlag(BindingsAreSorted)) {
- std::sort(sortedBindings.begin(), sortedBindings.end(),
- [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b)
- {
- return a.data()->binding < b.data()->binding;
- });
- }
+ if (!flags.testFlag(BindingsAreSorted))
+ std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan);
for (BoundResourceData &bd : boundResourceData)
memset(&bd, 0, sizeof(BoundResourceData));
@@ -3448,6 +4367,8 @@ QMetalGraphicsPipeline::QMetalGraphicsPipeline(QRhiImplementation *rhi)
: QRhiGraphicsPipeline(rhi),
d(new QMetalGraphicsPipelineData)
{
+ d->q = this;
+ d->tess.q = d;
}
QMetalGraphicsPipeline::~QMetalGraphicsPipeline()
@@ -3461,16 +4382,39 @@ void QMetalGraphicsPipeline::destroy()
d->vs.destroy();
d->fs.destroy();
- if (!d->ps)
+ d->tess.compVs[0].destroy();
+ d->tess.compVs[1].destroy();
+ d->tess.compVs[2].destroy();
+
+ d->tess.compTesc.destroy();
+ d->tess.vertTese.destroy();
+
+ qDeleteAll(d->extraBufMgr.deviceLocalWorkBuffers);
+ d->extraBufMgr.deviceLocalWorkBuffers.clear();
+ qDeleteAll(d->extraBufMgr.hostVisibleWorkBuffers);
+ d->extraBufMgr.hostVisibleWorkBuffers.clear();
+
+ delete d->bufferSizeBuffer;
+ d->bufferSizeBuffer = nullptr;
+
+ if (!d->ps && !d->ds
+ && !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2]
+ && !d->tess.tessControlComputeState)
+ {
return;
+ }
QRhiMetalData::DeferredReleaseEntry e;
e.type = QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline;
e.lastActiveFrameSlot = lastActiveFrameSlot;
- e.graphicsPipeline.depthStencilState = d->ds;
e.graphicsPipeline.pipelineState = d->ps;
- d->ds = nil;
+ e.graphicsPipeline.depthStencilState = d->ds;
+ e.graphicsPipeline.tessVertexComputeState = d->tess.vertexComputeState;
+ e.graphicsPipeline.tessTessControlComputeState = d->tess.tessControlComputeState;
d->ps = nil;
+ d->ds = nil;
+ d->tess.vertexComputeState = {};
+ d->tess.tessControlComputeState = nil;
QRHI_RES_RHI(QRhiMetal);
if (rhiD) {
@@ -3512,6 +4456,30 @@ static inline MTLVertexFormat toMetalAttributeFormat(QRhiVertexInputAttribute::F
return MTLVertexFormatInt2;
case QRhiVertexInputAttribute::SInt:
return MTLVertexFormatInt;
+ case QRhiVertexInputAttribute::Half4:
+ return MTLVertexFormatHalf4;
+ case QRhiVertexInputAttribute::Half3:
+ return MTLVertexFormatHalf3;
+ case QRhiVertexInputAttribute::Half2:
+ return MTLVertexFormatHalf2;
+ case QRhiVertexInputAttribute::Half:
+ return MTLVertexFormatHalf;
+ case QRhiVertexInputAttribute::UShort4:
+ return MTLVertexFormatUShort4;
+ case QRhiVertexInputAttribute::UShort3:
+ return MTLVertexFormatUShort3;
+ case QRhiVertexInputAttribute::UShort2:
+ return MTLVertexFormatUShort2;
+ case QRhiVertexInputAttribute::UShort:
+ return MTLVertexFormatUShort;
+ case QRhiVertexInputAttribute::SShort4:
+ return MTLVertexFormatShort4;
+ case QRhiVertexInputAttribute::SShort3:
+ return MTLVertexFormatShort3;
+ case QRhiVertexInputAttribute::SShort2:
+ return MTLVertexFormatShort2;
+ case QRhiVertexInputAttribute::SShort:
+ return MTLVertexFormatShort;
default:
Q_UNREACHABLE();
return MTLVertexFormatFloat4;
@@ -3667,6 +4635,24 @@ static inline MTLPrimitiveType toMetalPrimitiveType(QRhiGraphicsPipeline::Topolo
}
}
+static inline MTLPrimitiveTopologyClass toMetalPrimitiveTopologyClass(QRhiGraphicsPipeline::Topology t)
+{
+ switch (t) {
+ case QRhiGraphicsPipeline::Triangles:
+ case QRhiGraphicsPipeline::TriangleStrip:
+ case QRhiGraphicsPipeline::TriangleFan:
+ return MTLPrimitiveTopologyClassTriangle;
+ case QRhiGraphicsPipeline::Lines:
+ case QRhiGraphicsPipeline::LineStrip:
+ return MTLPrimitiveTopologyClassLine;
+ case QRhiGraphicsPipeline::Points:
+ return MTLPrimitiveTopologyClassPoint;
+ default:
+ Q_UNREACHABLE();
+ return MTLPrimitiveTopologyClassTriangle;
+ }
+}
+
static inline MTLCullMode toMetalCullMode(QRhiGraphicsPipeline::CullMode c)
{
switch (c) {
@@ -3695,15 +4681,67 @@ static inline MTLTriangleFillMode toMetalTriangleFillMode(QRhiGraphicsPipeline::
}
}
+static inline MTLWinding toMetalTessellationWindingOrder(QShaderDescription::TessellationWindingOrder w)
+{
+ switch (w) {
+ case QShaderDescription::CwTessellationWindingOrder:
+ return MTLWindingClockwise;
+ case QShaderDescription::CcwTessellationWindingOrder:
+ return MTLWindingCounterClockwise;
+ default:
+ // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown
+ return MTLWindingCounterClockwise;
+ }
+}
+
+static inline MTLTessellationPartitionMode toMetalTessellationPartitionMode(QShaderDescription::TessellationPartitioning p)
+{
+ switch (p) {
+ case QShaderDescription::EqualTessellationPartitioning:
+ return MTLTessellationPartitionModePow2;
+ case QShaderDescription::FractionalEvenTessellationPartitioning:
+ return MTLTessellationPartitionModeFractionalEven;
+ case QShaderDescription::FractionalOddTessellationPartitioning:
+ return MTLTessellationPartitionModeFractionalOdd;
+ default:
+ // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown
+ return MTLTessellationPartitionModePow2;
+ }
+}
+
+static inline MTLLanguageVersion toMetalLanguageVersion(const QShaderVersion &version)
+{
+ int v = version.version();
+ return MTLLanguageVersion(((v / 10) << 16) + (v % 10));
+}
+
id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Variant shaderVariant,
QString *error, QByteArray *entryPoint, QShaderKey *activeKey)
{
- QShaderKey key = { QShader::MetalLibShader, 20, shaderVariant };
- QShaderCode mtllib = shader.shader(key);
- if (mtllib.shader().isEmpty()) {
- key.setSourceVersion(12);
- mtllib = shader.shader(key);
+ QVarLengthArray<int, 8> versions;
+ if (@available(macOS 13, iOS 16, *))
+ versions << 30;
+ if (@available(macOS 12, iOS 15, *))
+ versions << 24;
+ if (@available(macOS 11, iOS 14, *))
+ versions << 23;
+ if (@available(macOS 10.15, iOS 13, *))
+ versions << 22;
+ if (@available(macOS 10.14, iOS 12, *))
+ versions << 21;
+ versions << 20 << 12;
+
+ const QList<QShaderKey> shaders = shader.availableShaders();
+
+ QShaderKey key;
+
+ for (const int &version : versions) {
+ key = { QShader::Source::MetalLibShader, version, shaderVariant };
+ if (shaders.contains(key))
+ break;
}
+
+ QShaderCode mtllib = shader.shader(key);
if (!mtllib.shader().isEmpty()) {
dispatch_data_t data = dispatch_data_create(mtllib.shader().constData(),
size_t(mtllib.shader().size()),
@@ -3722,12 +4760,13 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var
}
}
- key = { QShader::MslShader, 20, shaderVariant };
- QShaderCode mslSource = shader.shader(key);
- if (mslSource.shader().isEmpty()) {
- key.setSourceVersion(12);
- mslSource = shader.shader(key);
+ for (const int &version : versions) {
+ key = { QShader::Source::MslShader, version, shaderVariant };
+ if (shaders.contains(key))
+ break;
}
+
+ QShaderCode mslSource = shader.shader(key);
if (mslSource.shader().isEmpty()) {
qWarning() << "No MSL 2.0 or 1.2 code found in baked shader" << shader;
return nil;
@@ -3735,7 +4774,7 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var
NSString *src = [NSString stringWithUTF8String: mslSource.shader().constData()];
MTLCompileOptions *opts = [[MTLCompileOptions alloc] init];
- opts.languageVersion = key.sourceVersion() == 20 ? MTLLanguageVersion2_0 : MTLLanguageVersion1_2;
+ opts.languageVersion = toMetalLanguageVersion(key.sourceVersion());
NSError *err = nil;
id<MTLLibrary> lib = [dev newLibraryWithSource: src options: opts error: &err];
[opts release];
@@ -3757,56 +4796,192 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var
id<MTLFunction> QRhiMetalData::createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint)
{
- NSString *name = [NSString stringWithUTF8String: entryPoint.constData()];
- id<MTLFunction> f = [lib newFunctionWithName: name];
- [name release];
- return f;
+ return [lib newFunctionWithName:[NSString stringWithUTF8String:entryPoint.constData()]];
}
-bool QMetalGraphicsPipeline::create()
+void QMetalGraphicsPipeline::setupAttachmentsInMetalRenderPassDescriptor(void *metalRpDesc, QMetalRenderPassDescriptor *rpD)
{
- if (d->ps)
- destroy();
+ MTLRenderPipelineDescriptor *rpDesc = reinterpret_cast<MTLRenderPipelineDescriptor *>(metalRpDesc);
+
+ if (rpD->colorAttachmentCount) {
+ // defaults when no targetBlends are provided
+ rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]);
+ rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll;
+ rpDesc.colorAttachments[0].blendingEnabled = false;
+
+ Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount
+ || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1));
+
+ for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) {
+ const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]);
+ rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]);
+ rpDesc.colorAttachments[i].blendingEnabled = b.enable;
+ rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor);
+ rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor);
+ rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor);
+ rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha);
+ rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha);
+ rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha);
+ rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite);
+ }
+ }
+
+ if (rpD->hasDepthStencil) {
+ // Must only be set when a depth-stencil buffer will actually be bound,
+ // validation blows up otherwise.
+ MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat);
+ rpDesc.depthAttachmentPixelFormat = fmt;
+#if defined(Q_OS_MACOS)
+ if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float)
+#else
+ if (fmt != MTLPixelFormatDepth32Float)
+#endif
+ rpDesc.stencilAttachmentPixelFormat = fmt;
+ }
QRHI_RES_RHI(QRhiMetal);
- rhiD->pipelineCreationStart();
- if (!rhiD->sanityCheckGraphicsPipeline(this))
- return false;
+ rpDesc.sampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount));
+}
+void QMetalGraphicsPipeline::setupMetalDepthStencilDescriptor(void *metalDsDesc)
+{
+ MTLDepthStencilDescriptor *dsDesc = reinterpret_cast<MTLDepthStencilDescriptor *>(metalDsDesc);
+
+ dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways;
+ dsDesc.depthWriteEnabled = m_depthWrite;
+ if (m_stencilTest) {
+ dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init];
+ dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp);
+ dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp);
+ dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp);
+ dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp);
+ dsDesc.frontFaceStencil.readMask = m_stencilReadMask;
+ dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask;
+
+ dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init];
+ dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp);
+ dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp);
+ dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp);
+ dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp);
+ dsDesc.backFaceStencil.readMask = m_stencilReadMask;
+ dsDesc.backFaceStencil.writeMask = m_stencilWriteMask;
+ }
+}
+
+void QMetalGraphicsPipeline::mapStates()
+{
+ d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise;
+ d->cullMode = toMetalCullMode(m_cullMode);
+ d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode);
+ d->depthBias = float(m_depthBias);
+ d->slopeScaledDepthBias = m_slopeScaledDepthBias;
+}
+
+void QMetalGraphicsPipelineData::setupVertexInputDescriptor(MTLVertexDescriptor *desc)
+{
// same binding space for vertex and constant buffers - work it around
- const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, m_shaderResourceBindings)->maxBinding + 1;
+ // should be in native resource binding not SPIR-V, but this will work anyway
+ const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1;
- MTLVertexDescriptor *inputLayout = [MTLVertexDescriptor vertexDescriptor];
- for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes();
+ QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout();
+ for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes();
it != itEnd; ++it)
{
const uint loc = uint(it->location());
- inputLayout.attributes[loc].format = toMetalAttributeFormat(it->format());
- inputLayout.attributes[loc].offset = NSUInteger(it->offset());
- inputLayout.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
+ desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format()));
+ desc.attributes[loc].offset = NSUInteger(it->offset());
+ desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
}
int bindingIndex = 0;
- for (auto it = m_vertexInputLayout.cbeginBindings(), itEnd = m_vertexInputLayout.cendBindings();
+ const NSUInteger viewCount = qMax<NSUInteger>(1, q->multiViewCount());
+ for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings();
it != itEnd; ++it, ++bindingIndex)
{
const uint layoutIdx = uint(firstVertexBinding + bindingIndex);
- inputLayout.layouts[layoutIdx].stepFunction =
- it->classification() == QRhiVertexInputBinding::PerInstance
- ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
- inputLayout.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
- inputLayout.layouts[layoutIdx].stride = it->stride();
+ desc.layouts[layoutIdx].stepFunction =
+ it->classification() == QRhiVertexInputBinding::PerInstance
+ ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
+ desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
+ if (desc.layouts[layoutIdx].stepFunction == MTLVertexStepFunctionPerInstance)
+ desc.layouts[layoutIdx].stepRate *= viewCount;
+ desc.layouts[layoutIdx].stride = it->stride();
}
+}
- MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
+void QMetalGraphicsPipelineData::setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc)
+{
+ // same binding space for vertex and constant buffers - work it around
+ // should be in native resource binding not SPIR-V, but this will work anyway
+ const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1;
+
+ QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout();
+ for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes();
+ it != itEnd; ++it)
+ {
+ const uint loc = uint(it->location());
+ desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format()));
+ desc.attributes[loc].offset = NSUInteger(it->offset());
+ desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
+ }
+ int bindingIndex = 0;
+ for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings();
+ it != itEnd; ++it, ++bindingIndex)
+ {
+ const uint layoutIdx = uint(firstVertexBinding + bindingIndex);
+ if (desc.indexBufferIndex) {
+ desc.layouts[layoutIdx].stepFunction =
+ it->classification() == QRhiVertexInputBinding::PerInstance
+ ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridXIndexed;
+ } else {
+ desc.layouts[layoutIdx].stepFunction =
+ it->classification() == QRhiVertexInputBinding::PerInstance
+ ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridX;
+ }
+ desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
+ desc.layouts[layoutIdx].stride = it->stride();
+ }
+}
+
+void QRhiMetalData::trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
+{
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ if (binArch) {
+ NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil];
+ rpDesc.binaryArchives = binArchArray;
+ }
+ }
+}
+
+void QRhiMetalData::addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
+{
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ if (binArch) {
+ NSError *err = nil;
+ if (![binArch addRenderPipelineFunctionsWithDescriptor: rpDesc error: &err]) {
+ const QString msg = QString::fromNSString(err.localizedDescription);
+ qWarning("Failed to collect render pipeline functions to binary archive: %s", qPrintable(msg));
+ }
+ }
+ }
+}
- rpDesc.vertexDescriptor = inputLayout;
+bool QMetalGraphicsPipeline::createVertexFragmentPipeline()
+{
+ QRHI_RES_RHI(QRhiMetal);
+
+ MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor];
+ d->setupVertexInputDescriptor(vertexDesc);
+
+ MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
+ rpDesc.vertexDescriptor = vertexDesc;
- // mutability cannot be determined (slotted buffers could be set as
+ // Mutability cannot be determined (slotted buffers could be set as
// MTLMutabilityImmutable, but then we potentially need a different
// descriptor for each buffer combination as this depends on the actual
- // buffers not just the resource binding layout) so leave it at the default
+ // buffers not just the resource binding layout), so leave
+ // rpDesc.vertex/fragmentBuffers at the defaults.
- for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) {
+ for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
auto cacheIt = rhiD->d->shaderCache.constFind(shaderStage);
if (cacheIt != rhiD->d->shaderCache.constEnd()) {
switch (shaderStage.type()) {
@@ -3853,6 +5028,8 @@ bool QMetalGraphicsPipeline::create()
d->vs.lib = lib;
d->vs.func = func;
d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
+ d->vs.desc = shader.description();
+ d->vs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
rhiD->d->shaderCache.insert(shaderStage, d->vs);
[d->vs.lib retain];
[d->vs.func retain];
@@ -3862,6 +5039,8 @@ bool QMetalGraphicsPipeline::create()
d->fs.lib = lib;
d->fs.func = func;
d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
+ d->fs.desc = shader.description();
+ d->fs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
rhiD->d->shaderCache.insert(shaderStage, d->fs);
[d->fs.lib retain];
[d->fs.func retain];
@@ -3876,85 +5055,836 @@ bool QMetalGraphicsPipeline::create()
}
QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, m_renderPassDesc);
+ setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD);
- if (rpD->colorAttachmentCount) {
- // defaults when no targetBlends are provided
- rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]);
- rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll;
- rpDesc.colorAttachments[0].blendingEnabled = false;
+ if (m_multiViewCount >= 2)
+ rpDesc.inputPrimitiveTopology = toMetalPrimitiveTopologyClass(m_topology);
- Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount
- || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1));
+ rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc);
- for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) {
- const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]);
- rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]);
- rpDesc.colorAttachments[i].blendingEnabled = b.enable;
- rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor);
- rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor);
- rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor);
- rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha);
- rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha);
- rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha);
- rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite);
+ if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+ rhiD->d->addRenderPipelineToBinaryArchive(rpDesc);
+
+ NSError *err = nil;
+ d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
+ [rpDesc release];
+ if (!d->ps) {
+ const QString msg = QString::fromNSString(err.localizedDescription);
+ qWarning("Failed to create render pipeline state: %s", qPrintable(msg));
+ return false;
+ }
+
+ MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
+ setupMetalDepthStencilDescriptor(dsDesc);
+ d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc];
+ [dsDesc release];
+
+ d->primitiveType = toMetalPrimitiveType(m_topology);
+ mapStates();
+
+ return true;
+}
+
+int QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(QShader::Variant vertexCompVariant)
+{
+ switch (vertexCompVariant) {
+ case QShader::NonIndexedVertexAsComputeShader:
+ return 0;
+ case QShader::UInt32IndexedVertexAsComputeShader:
+ return 1;
+ case QShader::UInt16IndexedVertexAsComputeShader:
+ return 2;
+ default:
+ break;
+ }
+ return -1;
+}
+
+id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant)
+{
+ const int varIndex = vsCompVariantToIndex(vertexCompVariant);
+ if (varIndex >= 0 && vertexComputeState[varIndex])
+ return vertexComputeState[varIndex];
+
+ id<MTLFunction> func = nil;
+ if (varIndex >= 0)
+ func = compVs[varIndex].func;
+
+ if (!func) {
+ qWarning("No compute function found for vertex shader translated for tessellation, this should not happen");
+ return nil;
+ }
+
+ const QMap<int, int> &ebb(compVs[varIndex].nativeShaderInfo.extraBufferBindings);
+ const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1);
+
+ MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
+ cpDesc.computeFunction = func;
+ cpDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = YES;
+ cpDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor];
+ if (indexBufferBinding >= 0) {
+ if (vertexCompVariant == QShader::UInt32IndexedVertexAsComputeShader) {
+ cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32;
+ cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding;
+ } else if (vertexCompVariant == QShader::UInt16IndexedVertexAsComputeShader) {
+ cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16;
+ cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding;
}
}
+ q->setupStageInputDescriptor(cpDesc.stageInputDescriptor);
- if (rpD->hasDepthStencil) {
- // Must only be set when a depth-stencil buffer will actually be bound,
- // validation blows up otherwise.
- MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat);
- rpDesc.depthAttachmentPixelFormat = fmt;
-#if defined(Q_OS_MACOS)
- if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float)
-#else
- if (fmt != MTLPixelFormatDepth32Float)
-#endif
- rpDesc.stencilAttachmentPixelFormat = fmt;
+ rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
+
+ if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+ rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
+
+ NSError *err = nil;
+ id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
+ options: MTLPipelineOptionNone
+ reflection: nil
+ error: &err];
+ [cpDesc release];
+ if (!ps) {
+ const QString msg = QString::fromNSString(err.localizedDescription);
+ qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
+ } else {
+ vertexComputeState[varIndex] = ps;
}
+ // not retained, the only owner is vertexComputeState and so the QRhiGraphicsPipeline
+ return ps;
+}
- rpDesc.sampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount));
+id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::tescCompPipeline(QRhiMetal *rhiD)
+{
+ if (tessControlComputeState)
+ return tessControlComputeState;
+
+ MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
+ cpDesc.computeFunction = compTesc.func;
+
+ rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
+
+ if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+ rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
NSError *err = nil;
- d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
- if (!d->ps) {
+ id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
+ options: MTLPipelineOptionNone
+ reflection: nil
+ error: &err];
+ [cpDesc release];
+ if (!ps) {
const QString msg = QString::fromNSString(err.localizedDescription);
- qWarning("Failed to create render pipeline state: %s", qPrintable(msg));
- [rpDesc release];
- return false;
+ qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
+ } else {
+ tessControlComputeState = ps;
+ }
+ // not retained, the only owner is tessControlComputeState and so the QRhiGraphicsPipeline
+ return ps;
+}
+
+static inline bool indexTaken(quint32 index, quint64 indices)
+{
+ return (indices >> index) & 0x1;
+}
+
+static inline void takeIndex(quint32 index, quint64 &indices)
+{
+ indices |= 1 << index;
+}
+
+static inline int nextAttributeIndex(quint64 indices)
+{
+ // Maximum number of vertex attributes per vertex descriptor. There does
+ // not appear to be a way to query this from the implementation.
+ // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf indicates
+ // that all GPU families have a value of 31.
+ static const int maxVertexAttributes = 31;
+
+ for (int index = 0; index < maxVertexAttributes; ++index) {
+ if (!indexTaken(index, indices))
+ return index;
+ }
+
+ Q_UNREACHABLE_RETURN(-1);
+}
+
+static inline int aligned(quint32 offset, quint32 alignment)
+{
+ return ((offset + alignment - 1) / alignment) * alignment;
+}
+
+template<typename T>
+static void addUnusedVertexAttribute(const T &variable, QRhiMetal *rhiD, quint32 &offset, quint32 &vertexAlignment)
+{
+
+ int elements = 1;
+ for (const int dim : variable.arrayDims)
+ elements *= dim;
+
+ if (variable.type == QShaderDescription::VariableType::Struct) {
+ for (int element = 0; element < elements; ++element) {
+ for (const auto &member : variable.structMembers) {
+ addUnusedVertexAttribute(member, rhiD, offset, vertexAlignment);
+ }
+ }
+ } else {
+ const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type);
+ const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format);
+
+ // MSL specification 3.0 says alignment = size for non packed scalars and vectors
+ const quint32 alignment = size;
+ vertexAlignment = std::max(vertexAlignment, alignment);
+
+ for (int element = 0; element < elements; ++element) {
+ // adjust alignment
+ offset = aligned(offset, alignment);
+ offset += size;
+ }
}
+}
+
+template<typename T>
+static void addVertexAttribute(const T &variable, int binding, QRhiMetal *rhiD, int &index, quint32 &offset, MTLVertexAttributeDescriptorArray *attributes, quint64 &indices, quint32 &vertexAlignment)
+{
+
+ int elements = 1;
+ for (const int dim : variable.arrayDims)
+ elements *= dim;
+
+ if (variable.type == QShaderDescription::VariableType::Struct) {
+ for (int element = 0; element < elements; ++element) {
+ for (const auto &member : variable.structMembers) {
+ addVertexAttribute(member, binding, rhiD, index, offset, attributes, indices, vertexAlignment);
+ }
+ }
+ } else {
+ const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type);
+ const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format);
+
+ // MSL specification 3.0 says alignment = size for non packed scalars and vectors
+ const quint32 alignment = size;
+ vertexAlignment = std::max(vertexAlignment, alignment);
+
+ for (int element = 0; element < elements; ++element) {
+ Q_ASSERT(!indexTaken(index, indices));
+
+ // adjust alignment
+ offset = aligned(offset, alignment);
+
+ attributes[index].bufferIndex = binding;
+ attributes[index].format = toMetalAttributeFormat(format);
+ attributes[index].offset = offset;
+
+ takeIndex(index, indices);
+ index++;
+ if (indexTaken(index, indices))
+ index = nextAttributeIndex(indices);
+
+ offset += size;
+ }
+ }
+}
+
+static inline bool matches(const QList<QShaderDescription::BlockVariable> &a, const QList<QShaderDescription::BlockVariable> &b)
+{
+ if (a.size() == b.size()) {
+ bool match = true;
+ for (int i = 0; i < a.size() && match; ++i) {
+ match &= a[i].type == b[i].type
+ && a[i].arrayDims == b[i].arrayDims
+ && matches(a[i].structMembers, b[i].structMembers);
+ }
+ return match;
+ }
+
+ return false;
+}
+
+static inline bool matches(const QShaderDescription::InOutVariable &a, const QShaderDescription::InOutVariable &b)
+{
+ return a.location == b.location
+ && a.type == b.type
+ && a.perPatch == b.perPatch
+ && matches(a.structMembers, b.structMembers);
+}
+
+//
+// Create the tessellation evaluation render pipeline state
+//
+// The tesc runs as a compute shader in a compute pipeline and writes per patch and per patch
+// control point data into separate storage buffers. The tese runs as a vertex shader in a render
+// pipeline. Our task is to generate a render pipeline descriptor for the tese that pulls vertices
+// from these buffers.
+//
+// As the buffers we are pulling vertices from are written by a compute pipeline, they follow the
+// MSL alignment conventions which we must take into account when generating our
+// MTLVertexDescriptor. We must include the user defined tese input attributes, and any builtins
+// that were used.
+//
+// SPIRV-Cross generates the MSL tese shader code with input attribute indices that reflect the
+// specified GLSL locations. Interface blocks are flattened with each member having an incremented
+// attribute index. SPIRV-Cross reports an error on compilation if there are clashes in the index
+// address space.
+//
+// After the user specified attributes are processed, SPIRV-Cross places the in-use builtins at the
+// next available (lowest value) attribute index. Tese builtins are processed in the following
+// order:
+//
+// in gl_PerVertex
+// {
+// vec4 gl_Position;
+// float gl_PointSize;
+// float gl_ClipDistance[];
+// };
+//
+// patch in float gl_TessLevelOuter[4];
+// patch in float gl_TessLevelInner[2];
+//
+// Enumerations in QShaderDescription::BuiltinType are defined in this order.
+//
+// For quads, SPIRV-Cross places MTLQuadTessellationFactorsHalf per patch in the tessellation
+// factor buffer. For triangles it uses MTLTriangleTessellationFactorsHalf.
+//
+// It should be noted that SPIRV-Cross handles the following builtin inputs internally, with no
+// host side support required.
+//
+// in vec3 gl_TessCoord;
+// in int gl_PatchVerticesIn;
+// in int gl_PrimitiveID;
+//
+id<MTLRenderPipelineState> QMetalGraphicsPipelineData::Tessellation::teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline)
+{
+ if (pipeline->d->ps)
+ return pipeline->d->ps;
+
+ MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
+ MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor];
+
+ // tesc output buffers
+ const QMap<int, int> &ebb(compTesc.nativeShaderInfo.extraBufferBindings);
+ const int tescOutputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+ const int tescPatchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
+ const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
+ quint32 offsetInTescOutput = 0;
+ quint32 offsetInTescPatchOutput = 0;
+ quint32 offsetInTessFactorBuffer = 0;
+ quint32 tescOutputAlignment = 0;
+ quint32 tescPatchOutputAlignment = 0;
+ quint32 tessFactorAlignment = 0;
+ QSet<int> usedBuffers;
+
+ // tesc output variables in ascending location order
+ QMap<int, QShaderDescription::InOutVariable> tescOutVars;
+ for (const auto &tescOutVar : compTesc.desc.outputVariables())
+ tescOutVars[tescOutVar.location] = tescOutVar;
+
+ // tese input variables in ascending location order
+ QMap<int, QShaderDescription::InOutVariable> teseInVars;
+ for (const auto &teseInVar : vertTese.desc.inputVariables())
+ teseInVars[teseInVar.location] = teseInVar;
+
+ // bit mask tracking usage of vertex attribute indices
+ quint64 indices = 0;
+
+ for (QShaderDescription::InOutVariable &tescOutVar : tescOutVars) {
+
+ int index = tescOutVar.location;
+ int binding = -1;
+ quint32 *offset = nullptr;
+ quint32 *alignment = nullptr;
+
+ if (tescOutVar.perPatch) {
+ binding = tescPatchOutputBufferBinding;
+ offset = &offsetInTescPatchOutput;
+ alignment = &tescPatchOutputAlignment;
+ } else {
+ tescOutVar.arrayDims.removeLast();
+ binding = tescOutputBufferBinding;
+ offset = &offsetInTescOutput;
+ alignment = &tescOutputAlignment;
+ }
+
+ if (teseInVars.contains(index)) {
+
+ if (!matches(teseInVars[index], tescOutVar)) {
+ qWarning() << "mismatched tessellation control output -> tesssellation evaluation input at location" << index;
+ qWarning() << " tesc out:" << tescOutVar;
+ qWarning() << " tese in:" << teseInVars[index];
+ }
+
+ if (binding != -1) {
+ addVertexAttribute(tescOutVar, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment);
+ usedBuffers << binding;
+ } else {
+ qWarning() << "baked tessellation control shader missing output buffer binding information";
+ addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment);
+ }
+
+ } else {
+ qWarning() << "missing tessellation evaluation input for tessellation control output:" << tescOutVar;
+ addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment);
+ }
+
+ teseInVars.remove(tescOutVar.location);
+ }
+
+ for (const QShaderDescription::InOutVariable &teseInVar : teseInVars)
+ qWarning() << "missing tessellation control output for tessellation evaluation input:" << teseInVar;
+
+ // tesc output builtins in ascending location order
+ QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> tescOutBuiltins;
+ for (const auto &tescOutBuiltin : compTesc.desc.outputBuiltinVariables())
+ tescOutBuiltins[tescOutBuiltin.type] = tescOutBuiltin;
+
+ // tese input builtins in ascending location order
+ QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> teseInBuiltins;
+ for (const auto &teseInBuiltin : vertTese.desc.inputBuiltinVariables())
+ teseInBuiltins[teseInBuiltin.type] = teseInBuiltin;
+
+ const bool trianglesMode = vertTese.desc.tessellationMode() == QShaderDescription::TrianglesTessellationMode;
+ bool tessLevelAdded = false;
+
+ for (const QShaderDescription::BuiltinVariable &builtin : tescOutBuiltins) {
+
+ QShaderDescription::InOutVariable variable;
+ int binding = -1;
+ quint32 *offset = nullptr;
+ quint32 *alignment = nullptr;
+
+ switch (builtin.type) {
+ case QShaderDescription::BuiltinType::PositionBuiltin:
+ variable.type = QShaderDescription::VariableType::Vec4;
+ binding = tescOutputBufferBinding;
+ offset = &offsetInTescOutput;
+ alignment = &tescOutputAlignment;
+ break;
+ case QShaderDescription::BuiltinType::PointSizeBuiltin:
+ variable.type = QShaderDescription::VariableType::Float;
+ binding = tescOutputBufferBinding;
+ offset = &offsetInTescOutput;
+ alignment = &tescOutputAlignment;
+ break;
+ case QShaderDescription::BuiltinType::ClipDistanceBuiltin:
+ variable.type = QShaderDescription::VariableType::Float;
+ variable.arrayDims = builtin.arrayDims;
+ binding = tescOutputBufferBinding;
+ offset = &offsetInTescOutput;
+ alignment = &tescOutputAlignment;
+ break;
+ case QShaderDescription::BuiltinType::TessLevelOuterBuiltin:
+ variable.type = QShaderDescription::VariableType::Half4;
+ binding = tessFactorBufferBinding;
+ offset = &offsetInTessFactorBuffer;
+ tessLevelAdded = trianglesMode;
+ alignment = &tessFactorAlignment;
+ break;
+ case QShaderDescription::BuiltinType::TessLevelInnerBuiltin:
+ if (trianglesMode) {
+ if (!tessLevelAdded) {
+ variable.type = QShaderDescription::VariableType::Half4;
+ binding = tessFactorBufferBinding;
+ offsetInTessFactorBuffer = 0;
+ offset = &offsetInTessFactorBuffer;
+ alignment = &tessFactorAlignment;
+ tessLevelAdded = true;
+ } else {
+ teseInBuiltins.remove(builtin.type);
+ continue;
+ }
+ } else {
+ variable.type = QShaderDescription::VariableType::Half2;
+ binding = tessFactorBufferBinding;
+ offsetInTessFactorBuffer = 8;
+ offset = &offsetInTessFactorBuffer;
+ alignment = &tessFactorAlignment;
+ }
+ break;
+ default:
+ Q_UNREACHABLE();
+ break;
+ }
+
+ if (teseInBuiltins.contains(builtin.type)) {
+ if (binding != -1) {
+ int index = nextAttributeIndex(indices);
+ addVertexAttribute(variable, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment);
+ usedBuffers << binding;
+ } else {
+ qWarning() << "baked tessellation control shader missing output buffer binding information";
+ addUnusedVertexAttribute(variable, rhiD, *offset, *alignment);
+ }
+ } else {
+ addUnusedVertexAttribute(variable, rhiD, *offset, *alignment);
+ }
+
+ teseInBuiltins.remove(builtin.type);
+ }
+
+ for (const QShaderDescription::BuiltinVariable &builtin : teseInBuiltins) {
+ switch (builtin.type) {
+ case QShaderDescription::BuiltinType::PositionBuiltin:
+ case QShaderDescription::BuiltinType::PointSizeBuiltin:
+ case QShaderDescription::BuiltinType::ClipDistanceBuiltin:
+ qWarning() << "missing tessellation control output for tessellation evaluation builtin input:" << builtin;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (usedBuffers.contains(tescOutputBufferBinding)) {
+ vertexDesc.layouts[tescOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatchControlPoint;
+ vertexDesc.layouts[tescOutputBufferBinding].stride = aligned(offsetInTescOutput, tescOutputAlignment);
+ }
+
+ if (usedBuffers.contains(tescPatchOutputBufferBinding)) {
+ vertexDesc.layouts[tescPatchOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch;
+ vertexDesc.layouts[tescPatchOutputBufferBinding].stride = aligned(offsetInTescPatchOutput, tescPatchOutputAlignment);
+ }
+
+ if (usedBuffers.contains(tessFactorBufferBinding)) {
+ vertexDesc.layouts[tessFactorBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch;
+ vertexDesc.layouts[tessFactorBufferBinding].stride = trianglesMode ? sizeof(MTLTriangleTessellationFactorsHalf) : sizeof(MTLQuadTessellationFactorsHalf);
+ }
+
+ rpDesc.vertexDescriptor = vertexDesc;
+ rpDesc.vertexFunction = vertTese.func;
+ rpDesc.fragmentFunction = pipeline->d->fs.func;
+
+ // The portable, cross-API approach is to use CCW, the results are then
+ // identical (assuming the applied clipSpaceCorrMatrix) for all the 3D
+ // APIs. The tess.eval. GLSL shader is thus expected to specify ccw. If it
+ // doesn't, things may not work as expected.
+ rpDesc.tessellationOutputWindingOrder = toMetalTessellationWindingOrder(vertTese.desc.tessellationWindingOrder());
+
+ rpDesc.tessellationPartitionMode = toMetalTessellationPartitionMode(vertTese.desc.tessellationPartitioning());
+
+ QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, pipeline->renderPassDescriptor());
+ pipeline->setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD);
+
+ rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc);
+
+ if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+ rhiD->d->addRenderPipelineToBinaryArchive(rpDesc);
+
+ NSError *err = nil;
+ id<MTLRenderPipelineState> ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
[rpDesc release];
+ if (!ps) {
+ const QString msg = QString::fromNSString(err.localizedDescription);
+ qWarning("Failed to create render pipeline state for tessellation: %s", qPrintable(msg));
+ } else {
+ // ps is stored in the QMetalGraphicsPipelineData so the end result in this
+ // regard is no different from what createVertexFragmentPipeline does
+ pipeline->d->ps = ps;
+ }
+ return ps;
+}
- MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
- dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways;
- dsDesc.depthWriteEnabled = m_depthWrite;
- if (m_stencilTest) {
- dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init];
- dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp);
- dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp);
- dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp);
- dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp);
- dsDesc.frontFaceStencil.readMask = m_stencilReadMask;
- dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask;
+QMetalBuffer *QMetalGraphicsPipelineData::ExtraBufferManager::acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type)
+{
+ QVector<QMetalBuffer *> *workBuffers = type == WorkBufType::DeviceLocal ? &deviceLocalWorkBuffers : &hostVisibleWorkBuffers;
- dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init];
- dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp);
- dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp);
- dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp);
- dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp);
- dsDesc.backFaceStencil.readMask = m_stencilReadMask;
- dsDesc.backFaceStencil.writeMask = m_stencilWriteMask;
+ // Check if something is reusable as-is.
+ for (QMetalBuffer *workBuf : *workBuffers) {
+ if (workBuf && workBuf->lastActiveFrameSlot == -1 && workBuf->size() >= size) {
+ workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot;
+ return workBuf;
+ }
+ }
+
+ // Once the pool is above a certain threshold, see if there is something
+ // unused (but too small) and recreate that our size.
+ if (workBuffers->count() > QMTL_FRAMES_IN_FLIGHT * 8) {
+ for (QMetalBuffer *workBuf : *workBuffers) {
+ if (workBuf && workBuf->lastActiveFrameSlot == -1) {
+ workBuf->setSize(size);
+ if (workBuf->create()) {
+ workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot;
+ return workBuf;
+ }
+ }
+ }
+ }
+
+ // Add a new buffer to the pool.
+ QMetalBuffer *buf;
+ if (type == WorkBufType::DeviceLocal) {
+ // for GPU->GPU data (non-slotted, not necessarily host writable)
+ buf = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size);
+ } else {
+ // for CPU->GPU (non-slotted, host writable/coherent)
+ buf = new QMetalBuffer(rhiD, QRhiBuffer::Dynamic, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size);
+ }
+ if (buf->create()) {
+ buf->lastActiveFrameSlot = rhiD->currentFrameSlot;
+ workBuffers->append(buf);
+ return buf;
+ }
+
+ qWarning("Failed to acquire work buffer of size %u", size);
+ return nullptr;
+}
+
+bool QMetalGraphicsPipeline::createTessellationPipelines(const QShader &tessVert, const QShader &tesc, const QShader &tese, const QShader &tessFrag)
+{
+ QRHI_RES_RHI(QRhiMetal);
+ QString error;
+ QByteArray entryPoint;
+ QShaderKey activeKey;
+
+ const QShaderDescription tescDesc = tesc.description();
+ const QShaderDescription teseDesc = tese.description();
+ d->tess.inControlPointCount = uint(m_patchControlPointCount);
+ d->tess.outControlPointCount = tescDesc.tessellationOutputVertexCount();
+ if (!d->tess.outControlPointCount)
+ d->tess.outControlPointCount = teseDesc.tessellationOutputVertexCount();
+
+ if (!d->tess.outControlPointCount) {
+ qWarning("Failed to determine output vertex count from the tessellation control or evaluation shader, cannot tessellate");
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+
+ if (m_multiViewCount >= 2)
+ qWarning("Multiview is not supported with tessellation");
+
+ // Now the vertex shader is a compute shader.
+ // It should have three dedicated *VertexAsComputeShader variants.
+ // What the requested variant was (Standard or Batchable) plays no role here.
+ // (the Qt Quick scenegraph does not use tessellation with its materials)
+ // Create all three versions.
+
+ bool variantsPresent[3] = {};
+ const QVector<QShaderKey> tessVertKeys = tessVert.availableShaders();
+ for (const QShaderKey &k : tessVertKeys) {
+ switch (k.sourceVariant()) {
+ case QShader::NonIndexedVertexAsComputeShader:
+ variantsPresent[0] = true;
+ break;
+ case QShader::UInt32IndexedVertexAsComputeShader:
+ variantsPresent[1] = true;
+ break;
+ case QShader::UInt16IndexedVertexAsComputeShader:
+ variantsPresent[2] = true;
+ break;
+ default:
+ break;
+ }
+ }
+ if (!(variantsPresent[0] && variantsPresent[1] && variantsPresent[2])) {
+ qWarning("Vertex shader is not prepared for Metal tessellation. Cannot tessellate. "
+ "Perhaps the relevant variants (UInt32IndexedVertexAsComputeShader et al) were not generated? "
+ "Try passing --msltess to qsb.");
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+
+ int varIndex = 0; // Will map NonIndexed as 0, UInt32 as 1, UInt16 as 2. Do not change this ordering.
+ for (QShader::Variant variant : {
+ QShader::NonIndexedVertexAsComputeShader,
+ QShader::UInt32IndexedVertexAsComputeShader,
+ QShader::UInt16IndexedVertexAsComputeShader })
+ {
+ id<MTLLibrary> lib = rhiD->d->createMetalLib(tessVert, variant, &error, &entryPoint, &activeKey);
+ if (!lib) {
+ qWarning("MSL shader compilation failed for vertex-as-compute shader %d: %s", int(variant), qPrintable(error));
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint);
+ if (!func) {
+ qWarning("MSL function for entry point %s not found", entryPoint.constData());
+ [lib release];
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ QMetalShader &compVs(d->tess.compVs[varIndex]);
+ compVs.lib = lib;
+ compVs.func = func;
+ compVs.desc = tessVert.description();
+ compVs.nativeResourceBindingMap = tessVert.nativeResourceBindingMap(activeKey);
+ compVs.nativeShaderInfo = tessVert.nativeShaderInfo(activeKey);
+
+ // pre-create all three MTLComputePipelineStates
+ if (!d->tess.vsCompPipeline(rhiD, variant)) {
+ qWarning("Failed to pre-generate compute pipeline for vertex compute shader (tessellation variant %d)", int(variant));
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+
+ ++varIndex;
+ }
+
+ // Pipeline #2 is a compute that runs the tessellation control (compute) shader
+ id<MTLLibrary> tessControlLib = rhiD->d->createMetalLib(tesc, QShader::StandardShader, &error, &entryPoint, &activeKey);
+ if (!tessControlLib) {
+ qWarning("MSL shader compilation failed for tessellation control compute shader: %s", qPrintable(error));
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ id<MTLFunction> tessControlFunc = rhiD->d->createMSLShaderFunction(tessControlLib, entryPoint);
+ if (!tessControlFunc) {
+ qWarning("MSL function for entry point %s not found", entryPoint.constData());
+ [tessControlLib release];
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ d->tess.compTesc.lib = tessControlLib;
+ d->tess.compTesc.func = tessControlFunc;
+ d->tess.compTesc.desc = tesc.description();
+ d->tess.compTesc.nativeResourceBindingMap = tesc.nativeResourceBindingMap(activeKey);
+ d->tess.compTesc.nativeShaderInfo = tesc.nativeShaderInfo(activeKey);
+ if (!d->tess.tescCompPipeline(rhiD)) {
+ qWarning("Failed to pre-generate compute pipeline for tessellation control shader");
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
}
+ // Pipeline #3 is a render pipeline with the tessellation evaluation (vertex) + the fragment shader
+ id<MTLLibrary> tessEvalLib = rhiD->d->createMetalLib(tese, QShader::StandardShader, &error, &entryPoint, &activeKey);
+ if (!tessEvalLib) {
+ qWarning("MSL shader compilation failed for tessellation evaluation vertex shader: %s", qPrintable(error));
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ id<MTLFunction> tessEvalFunc = rhiD->d->createMSLShaderFunction(tessEvalLib, entryPoint);
+ if (!tessEvalFunc) {
+ qWarning("MSL function for entry point %s not found", entryPoint.constData());
+ [tessEvalLib release];
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ d->tess.vertTese.lib = tessEvalLib;
+ d->tess.vertTese.func = tessEvalFunc;
+ d->tess.vertTese.desc = tese.description();
+ d->tess.vertTese.nativeResourceBindingMap = tese.nativeResourceBindingMap(activeKey);
+ d->tess.vertTese.nativeShaderInfo = tese.nativeShaderInfo(activeKey);
+
+ id<MTLLibrary> fragLib = rhiD->d->createMetalLib(tessFrag, QShader::StandardShader, &error, &entryPoint, &activeKey);
+ if (!fragLib) {
+ qWarning("MSL shader compilation failed for fragment shader: %s", qPrintable(error));
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ id<MTLFunction> fragFunc = rhiD->d->createMSLShaderFunction(fragLib, entryPoint);
+ if (!fragFunc) {
+ qWarning("MSL function for entry point %s not found", entryPoint.constData());
+ [fragLib release];
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+ d->fs.lib = fragLib;
+ d->fs.func = fragFunc;
+ d->fs.desc = tessFrag.description();
+ d->fs.nativeShaderInfo = tessFrag.nativeShaderInfo(activeKey);
+ d->fs.nativeResourceBindingMap = tessFrag.nativeResourceBindingMap(activeKey);
+
+ if (!d->tess.teseFragRenderPipeline(rhiD, this)) {
+ qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader");
+ d->tess.enabled = false;
+ d->tess.failed = true;
+ return false;
+ }
+
+ MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
+ setupMetalDepthStencilDescriptor(dsDesc);
d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc];
[dsDesc release];
- d->primitiveType = toMetalPrimitiveType(m_topology);
- d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise;
- d->cullMode = toMetalCullMode(m_cullMode);
- d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode);
- d->depthBias = float(m_depthBias);
- d->slopeScaledDepthBias = m_slopeScaledDepthBias;
+ // no primitiveType
+ mapStates();
+
+ return true;
+}
+
+bool QMetalGraphicsPipeline::create()
+{
+ destroy(); // no early test, always invoke and leave it to destroy to decide what to clean up
+
+ QRHI_RES_RHI(QRhiMetal);
+ rhiD->pipelineCreationStart();
+ if (!rhiD->sanityCheckGraphicsPipeline(this))
+ return false;
+
+ // See if tessellation is involved. Things will be very different, if so.
+ QShader tessVert;
+ QShader tesc;
+ QShader tese;
+ QShader tessFrag;
+ for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
+ switch (shaderStage.type()) {
+ case QRhiShaderStage::Vertex:
+ tessVert = shaderStage.shader();
+ break;
+ case QRhiShaderStage::TessellationControl:
+ tesc = shaderStage.shader();
+ break;
+ case QRhiShaderStage::TessellationEvaluation:
+ tese = shaderStage.shader();
+ break;
+ case QRhiShaderStage::Fragment:
+ tessFrag = shaderStage.shader();
+ break;
+ default:
+ break;
+ }
+ }
+ d->tess.enabled = tesc.isValid() && tese.isValid() && m_topology == Patches && m_patchControlPointCount > 0;
+ d->tess.failed = false;
+
+ bool ok = d->tess.enabled ? createTessellationPipelines(tessVert, tesc, tese, tessFrag) : createVertexFragmentPipeline();
+ if (!ok)
+ return false;
+
+ // SPIRV-Cross buffer size buffers
+ int buffers = 0;
+ QVarLengthArray<QMetalShader *, 6> shaders;
+ if (d->tess.enabled) {
+ shaders.append(&d->tess.compVs[0]);
+ shaders.append(&d->tess.compVs[1]);
+ shaders.append(&d->tess.compVs[2]);
+ shaders.append(&d->tess.compTesc);
+ shaders.append(&d->tess.vertTese);
+ } else {
+ shaders.append(&d->vs);
+ }
+ shaders.append(&d->fs);
+
+ for (QMetalShader *shader : shaders) {
+ if (shader->nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
+ const int binding = shader->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
+ shader->nativeResourceBindingMap[binding] = qMakePair(binding, -1);
+ int maxNativeBinding = 0;
+ for (const QShaderDescription::StorageBlock &block : shader->desc.storageBlocks())
+ maxNativeBinding = qMax(maxNativeBinding, shader->nativeResourceBindingMap[block.binding].first);
+
+ // we use one buffer to hold data for all graphics shader stages, each with a different offset.
+ // buffer offsets must be 32byte aligned - adjust buffer count accordingly
+ buffers += ((maxNativeBinding + 1 + 7) / 8) * 8;
+ }
+ }
+
+ if (buffers) {
+ if (!d->bufferSizeBuffer)
+ d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
+
+ d->bufferSizeBuffer->setSize(buffers * sizeof(int));
+ d->bufferSizeBuffer->create();
+ }
rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1;
@@ -3982,6 +5912,9 @@ void QMetalComputePipeline::destroy()
if (!d->ps)
return;
+ delete d->bufferSizeBuffer;
+ d->bufferSizeBuffer = nullptr;
+
QRhiMetalData::DeferredReleaseEntry e;
e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline;
e.lastActiveFrameSlot = lastActiveFrameSlot;
@@ -3995,6 +5928,29 @@ void QMetalComputePipeline::destroy()
}
}
+void QRhiMetalData::trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
+{
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ if (binArch) {
+ NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil];
+ cpDesc.binaryArchives = binArchArray;
+ }
+ }
+}
+
+void QRhiMetalData::addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
+{
+ if (@available(macOS 11.0, iOS 14.0, *)) {
+ if (binArch) {
+ NSError *err = nil;
+ if (![binArch addComputePipelineFunctionsWithDescriptor: cpDesc error: &err]) {
+ const QString msg = QString::fromNSString(err.localizedDescription);
+ qWarning("Failed to collect compute pipeline functions to binary archive: %s", qPrintable(msg));
+ }
+ }
+ }
+}
+
bool QMetalComputePipeline::create()
{
if (d->ps)
@@ -4027,6 +5983,14 @@ bool QMetalComputePipeline::create()
d->cs.func = func;
d->cs.localSize = shader.description().computeShaderLocalSize();
d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
+ d->cs.desc = shader.description();
+ d->cs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
+
+ // SPIRV-Cross buffer size buffers
+ if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
+ const int binding = d->cs.nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
+ d->cs.nativeResourceBindingMap[binding] = qMakePair(binding, -1);
+ }
if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) {
for (QMetalShader &s : rhiD->d->shaderCache)
@@ -4041,14 +6005,41 @@ bool QMetalComputePipeline::create()
d->localSize = MTLSizeMake(d->cs.localSize[0], d->cs.localSize[1], d->cs.localSize[2]);
+ MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
+ cpDesc.computeFunction = d->cs.func;
+
+ rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
+
+ if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+ rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
+
NSError *err = nil;
- d->ps = [rhiD->d->dev newComputePipelineStateWithFunction: d->cs.func error: &err];
+ d->ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
+ options: MTLPipelineOptionNone
+ reflection: nil
+ error: &err];
+ [cpDesc release];
if (!d->ps) {
const QString msg = QString::fromNSString(err.localizedDescription);
- qWarning("Failed to create render pipeline state: %s", qPrintable(msg));
+ qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
return false;
}
+ // SPIRV-Cross buffer size buffers
+ if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
+ int buffers = 0;
+ for (const QShaderDescription::StorageBlock &block : d->cs.desc.storageBlocks())
+ buffers = qMax(buffers, d->cs.nativeResourceBindingMap[block.binding].first);
+
+ buffers += 1;
+
+ if (!d->bufferSizeBuffer)
+ d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
+
+ d->bufferSizeBuffer->setSize(buffers * sizeof(int));
+ d->bufferSizeBuffer->create();
+ }
+
rhiD->pipelineCreationEnd();
lastActiveFrameSlot = -1;
generation += 1;
@@ -4081,10 +6072,12 @@ const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles()
return &nativeHandlesStruct;
}
-void QMetalCommandBuffer::resetState()
+void QMetalCommandBuffer::resetState(double lastGpuTime)
{
+ d->lastGpuTime = lastGpuTime;
d->currentRenderPassEncoder = nil;
d->currentComputePassEncoder = nil;
+ d->tessellationComputeEncoder = nil;
d->currentPassRpDesc = nil;
resetPerPassState();
}
@@ -4146,8 +6139,7 @@ void QMetalSwapChain::destroy()
for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
if (d->sem[i]) {
// the semaphores cannot be released if they do not have the initial value
- dispatch_semaphore_wait(d->sem[i], DISPATCH_TIME_FOREVER);
- dispatch_semaphore_signal(d->sem[i]);
+ waitUntilCompleted(i);
dispatch_release(d->sem[i]);
d->sem[i] = nullptr;
@@ -4159,7 +6151,14 @@ void QMetalSwapChain::destroy()
d->msaaTex[i] = nil;
}
+#ifdef Q_OS_MACOS
+ d->liveResizeStartObserver.remove();
+ d->liveResizeEndObserver.remove();
+ d->liveResizeObserverSet = false;
+#endif
+
d->layer = nullptr;
+ m_proxyData = {};
[d->curDrawable release];
d->curDrawable = nil;
@@ -4181,6 +6180,9 @@ QRhiRenderTarget *QMetalSwapChain::currentFrameRenderTarget()
return &rtWrapper;
}
+// view.layer should ideally be called on the main thread, otherwise the UI
+// Thread Checker in Xcode drops a warning. Hence trying to proxy it through
+// QRhiSwapChainProxyData instead of just calling this function directly.
static inline CAMetalLayer *layerForWindow(QWindow *window)
{
Q_ASSERT(window);
@@ -4193,29 +6195,51 @@ static inline CAMetalLayer *layerForWindow(QWindow *window)
return static_cast<CAMetalLayer *>(view.layer);
}
+// If someone calls this, it is hopefully from the main thread, and they will
+// then set the returned data on the QRhiSwapChain, so it won't need to query
+// the layer on its own later on.
+QRhiSwapChainProxyData QRhiMetal::updateSwapChainProxyData(QWindow *window)
+{
+ QRhiSwapChainProxyData d;
+ d.reserved[0] = layerForWindow(window);
+ return d;
+}
+
QSize QMetalSwapChain::surfacePixelSize()
{
Q_ASSERT(m_window);
CAMetalLayer *layer = d->layer;
if (!layer)
- layer = layerForWindow(m_window);
+ layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, m_window, QRhi::Metal, 0);
- CGSize layerSize = layer.bounds.size;
- layerSize.width *= layer.contentsScale;
- layerSize.height *= layer.contentsScale;
- return QSizeF::fromCGSize(layerSize).toSize();
+ Q_ASSERT(layer);
+ int height = (int)layer.bounds.size.height;
+ int width = (int)layer.bounds.size.width;
+ width *= layer.contentsScale;
+ height *= layer.contentsScale;
+ return QSize(width, height);
}
bool QMetalSwapChain::isFormatSupported(Format f)
{
-#ifdef Q_OS_MACOS
- return f == SDR || f == HDRExtendedSrgbLinear;
-#endif
+ if (f == HDRExtendedSrgbLinear) {
+ if (@available(macOS 10.11, iOS 16.0, *))
+ return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f;
+ else
+ return false;
+ } else if (f == HDRExtendedDisplayP3Linear) {
+ if (@available(macOS 11.0, iOS 14.0, *))
+ return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f;
+ else
+ return false;
+ }
return f == SDR;
}
QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor()
{
+ QRHI_RES_RHI(QRhiMetal);
+
chooseFormats(); // ensure colorFormat and similar are filled out
QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
@@ -4226,7 +6250,6 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor()
#ifdef Q_OS_MACOS
// m_depthStencil may not be built yet so cannot rely on computed fields in it
- QRHI_RES_RHI(QRhiMetal);
rpD->dsFormat = rhiD->d->dev.depth24Stencil8PixelFormatSupported
? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8;
#else
@@ -4234,6 +6257,8 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor()
#endif
rpD->updateSerializedFormat();
+
+ rhiD->registerResource(rpD, false);
return rpD;
}
@@ -4242,7 +6267,7 @@ void QMetalSwapChain::chooseFormats()
QRHI_RES_RHI(QRhiMetal);
samples = rhiD->effectiveSampleCount(m_sampleCount);
// pick a format that is allowed for CAMetalLayer.pixelFormat
- if (m_format == HDRExtendedSrgbLinear) {
+ if (m_format == HDRExtendedSrgbLinear || m_format == HDRExtendedDisplayP3Linear) {
d->colorFormat = MTLPixelFormatRGBA16Float;
d->rhiColorFormat = QRhiTexture::RGBA16F;
return;
@@ -4251,6 +6276,17 @@ void QMetalSwapChain::chooseFormats()
d->rhiColorFormat = QRhiTexture::BGRA8;
}
+void QMetalSwapChain::waitUntilCompleted(int slot)
+{
+ // wait+signal is the general pattern to ensure the commands for a
+ // given frame slot have completed (if sem is 1, we go 0 then 1; if
+ // sem is 0 we go -1, block, completion increments to 0, then us to 1)
+
+ dispatch_semaphore_t sem = d->sem[slot];
+ dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
+ dispatch_semaphore_signal(sem);
+}
+
bool QMetalSwapChain::createOrResize()
{
Q_ASSERT(m_window);
@@ -4272,19 +6308,24 @@ bool QMetalSwapChain::createOrResize()
return false;
}
- d->layer = layerForWindow(window);
+ d->layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, window, QRhi::Metal, 0);
Q_ASSERT(d->layer);
chooseFormats();
if (d->colorFormat != d->layer.pixelFormat)
d->layer.pixelFormat = d->colorFormat;
-#ifdef Q_OS_MACOS
- // Can't enable this on iOS until wantsExtendedDynamicRangeContent is available
+
if (m_format == HDRExtendedSrgbLinear) {
- d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB);
- d->layer.wantsExtendedDynamicRangeContent = YES;
+ if (@available(macOS 10.11, iOS 16.0, *)) {
+ d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB);
+ d->layer.wantsExtendedDynamicRangeContent = YES;
+ }
+ } else if (m_format == HDRExtendedDisplayP3Linear) {
+ if (@available(macOS 11.0, iOS 16.0, *)) {
+ d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3);
+ d->layer.wantsExtendedDynamicRangeContent = YES;
+ }
}
-#endif
if (m_flags.testFlag(UsedAsTransferSource))
d->layer.framebufferOnly = NO;
@@ -4309,9 +6350,12 @@ bool QMetalSwapChain::createOrResize()
// Now set the layer's drawableSize which will stay set to the same value
// until the next createOrResize(), thus ensuring atomicity with regards to
// the drawable size in frames.
- CGSize layerSize = d->layer.bounds.size;
- layerSize.width *= d->layer.contentsScale;
- layerSize.height *= d->layer.contentsScale;
+ int width = (int)d->layer.bounds.size.width;
+ int height = (int)d->layer.bounds.size.height;
+ CGSize layerSize = CGSizeMake(width, height);
+ const float scaleFactor = d->layer.contentsScale;
+ layerSize.width *= scaleFactor;
+ layerSize.height *= scaleFactor;
d->layer.drawableSize = layerSize;
m_currentPixelSize = QSizeF::fromCGSize(layerSize).toSize();
@@ -4319,10 +6363,39 @@ bool QMetalSwapChain::createOrResize()
[d->layer setDevice: rhiD->d->dev];
+#ifdef Q_OS_MACOS
+ // Can only use presentsWithTransaction (to get smooth resizing) when
+ // presenting from the main (gui) thread. We predict that based on the
+ // thread this function is called on since if the QRhiSwapChain is
+ // initialied on a given thread then that's almost certainly the thread on
+ // which the QRhi renders and presents.
+ const bool canUsePresentsWithTransaction = NSThread.isMainThread;
+
+ // Have an env.var. just in case it turns out presentsWithTransaction is
+ // not desired in some specific case.
+ static bool allowPresentsWithTransaction = !qEnvironmentVariableIntValue("QT_MTL_NO_TRANSACTION");
+
+ if (allowPresentsWithTransaction && canUsePresentsWithTransaction && !d->liveResizeObserverSet) {
+ d->liveResizeObserverSet = true;
+ NSView *view = reinterpret_cast<NSView *>(window->winId());
+ NSWindow *window = view.window;
+ if (window) {
+ qCDebug(QRHI_LOG_INFO, "will set presentsWithTransaction during live resize");
+ d->liveResizeStartObserver = QMacNotificationObserver(window, NSWindowWillStartLiveResizeNotification, [this] {
+ d->layer.presentsWithTransaction = true;
+ });
+ d->liveResizeEndObserver = QMacNotificationObserver(window, NSWindowDidEndLiveResizeNotification, [this] {
+ d->layer.presentsWithTransaction = false;
+ });
+ }
+ }
+#endif
+
[d->curDrawable release];
d->curDrawable = nil;
for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
+ d->lastGpuTime[i] = 0;
if (!d->sem[i])
d->sem[i] = dispatch_semaphore_create(QMTL_FRAMES_IN_FLIGHT - 1);
}
@@ -4350,12 +6423,13 @@ bool QMetalSwapChain::createOrResize()
rtWrapper.setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget
rtWrapper.d->pixelSize = pixelSize;
- rtWrapper.d->dpr = float(window->devicePixelRatio());
+ rtWrapper.d->dpr = scaleFactor;
rtWrapper.d->sampleCount = samples;
rtWrapper.d->colorAttCount = 1;
rtWrapper.d->dsAttCount = ds ? 1 : 0;
- qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, size %dx%d", pixelSize.width(), pixelSize.height());
+ qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, pixel size %dx%d (scale %.2f)",
+ pixelSize.width(), pixelSize.height(), scaleFactor);
if (samples > 1) {
MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init];
@@ -4384,21 +6458,28 @@ QRhiSwapChainHdrInfo QMetalSwapChain::hdrInfo()
{
QRhiSwapChainHdrInfo info;
info.limitsType = QRhiSwapChainHdrInfo::ColorComponentValue;
- if (m_format == SDR) {
- info.limits.colorComponentValue.maxColorComponentValue = 1;
- return info;
- }
+ info.limits.colorComponentValue.maxColorComponentValue = 1;
+ info.limits.colorComponentValue.maxPotentialColorComponentValue = 1;
+ info.luminanceBehavior = QRhiSwapChainHdrInfo::DisplayReferred; // 1.0 = SDR white
+ info.sdrWhiteLevel = 200; // typical value, but dummy (don't know the real one); won't matter due to being display-referred
-#ifdef Q_OS_MACOS
- info.isHardCodedDefaults = false;
- NSView *view = reinterpret_cast<NSView *>(window->winId());
- info.limits.colorComponentValue.maxColorComponentValue = view.window.screen.maximumExtendedDynamicRangeColorComponentValue;
-#else
- // ### Fixme: Maybe retrieve the brightness from the screen and if we're not at full brightness we might be able to do more.
- // For now, assume 2, in line with iPhone 12 specs that claim 625 nits max brightness and 1200 nits max HDR brightness.
- info.isHardCodedDefaults = true;
- info.limits.colorComponentValue.maxColorComponentValue = 2;
+ if (m_window) {
+ // Must use m_window, not window, given this may be called before createOrResize().
+#if defined(Q_OS_MACOS)
+ NSView *view = reinterpret_cast<NSView *>(m_window->winId());
+ NSScreen *screen = view.window.screen;
+ info.limits.colorComponentValue.maxColorComponentValue = screen.maximumExtendedDynamicRangeColorComponentValue;
+ info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.maximumPotentialExtendedDynamicRangeColorComponentValue;
+#elif defined(Q_OS_IOS)
+ if (@available(iOS 16.0, *)) {
+ UIView *view = reinterpret_cast<UIView *>(m_window->winId());
+ UIScreen *screen = view.window.windowScene.screen;
+ info.limits.colorComponentValue.maxColorComponentValue = view.window.windowScene.screen.currentEDRHeadroom;
+ info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.potentialEDRHeadroom;
+ }
#endif
+ }
+
return info;
}