1 files changed, 2582 insertions, 501 deletions
diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm
index dc21feb74c..1c7b397193 100644
--- a/src/gui/rhi/qrhimetal.mm
+++ b/src/gui/rhi/qrhimetal.mm
@@ -1,10 +1,18 @@
-// Copyright (C) 2019 The Qt Company Ltd.
+// Copyright (C) 2023 The Qt Company Ltd.
 // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
 
-#include "qrhimetal_p_p.h"
+#include "qrhimetal_p.h"
+#include "qshader_p.h"
 #include <QGuiApplication>
 #include <QWindow>
+#include <QUrl>
+#include <QFile>
+#include <QTemporaryFile>
+#include <QFileInfo>
 #include <qmath.h>
+#include <QOperatingSystemVersion>
+
+#include <QtCore/private/qcore_mac_p.h>
 
 #ifdef Q_OS_MACOS
 #include <AppKit/AppKit.h>
@@ -32,19 +40,28 @@ QT_BEGIN_NAMESPACE
 #error ARC not supported
 #endif
 
-// Note: we expect everything here pass the Metal API validation when running
-// in Debug mode in XCode. Some of the issues that break validation are not
-// obvious and not visible when running outside XCode.
-//
-// An exception is the nextDrawable Called Early blah blah warning, which is
-// plain and simply false.
+// Even though the macOS 13 MTLBinaryArchive problem (QTBUG-106703) seems
+// to be solved in later 13.x releases, we have reports from old Intel hardware
+// and older macOS versions where this causes problems (QTBUG-114338).
+// Thus we no longer do OS version based differentiation, but rather have a
+// single toggle that is currently on, and so QRhi::(set)pipelineCache()
+// does nothing with Metal.
+#define QRHI_METAL_DISABLE_BINARY_ARCHIVE
+
+// We should be able to operate with command buffers that do not automatically
+// retain/release the resources used by them. (since we have logic that mirrors
+// other backends such as the Vulkan one anyway)
+#define QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES
 
 /*!
     \class QRhiMetalInitParams
     \inmodule QtRhi
-    \internal
+    \since 6.6
     \brief Metal specific initialization parameters.
 
+    \note This is a RHI API with limited compatibility guarantees, see \l QRhi
+    for details.
+
     A Metal-based QRhi needs no special parameters for initialization.
 
     \badcode
@@ -52,10 +69,13 @@ QT_BEGIN_NAMESPACE
         rhi = QRhi::create(QRhi::Metal, &params);
     \endcode
 
-    \note Metal API validation cannot be enabled by the application. Instead,
-    run the debug build of the application in XCode. Generating a
-    \c{.xcodeproj} file via \c{qmake -spec macx-xcode} provides a convenient
-    way to enable this.
+    \note Metal API validation cannot be enabled programmatically by the QRhi.
+    Instead, either run the debug build of the application in XCode, by
+    generating a \c{.xcodeproj} file via \c{cmake -G Xcode}, or set the
+    environment variable \c{METAL_DEVICE_WRAPPER_TYPE=1}. The variable needs to
+    be set early on in the environment, perferably before starting the process;
+    attempting to set it at QRhi creation time is not functional in practice.
+    (too late probably)
 
     \note QRhiSwapChain can only target QWindow instances that have their
     surface type set to QSurface::MetalSurface.
@@ -74,14 +94,30 @@ QT_BEGIN_NAMESPACE
 /*!
     \class QRhiMetalNativeHandles
     \inmodule QtRhi
-    \internal
+    \since 6.6
     \brief Holds the Metal device used by the QRhi.
+
+    \note This is a RHI API with limited compatibility guarantees, see \l QRhi
+    for details.
  */
 
 /*!
+    \variable QRhiMetalNativeHandles::dev
+
+    Set to a valid MTLDevice to import an existing device.
+*/
+
+/*!
+    \variable QRhiMetalNativeHandles::cmdQueue
+
+    Set to a valid MTLCommandQueue when importing an existing command queue.
+    When \nullptr, QRhi will create a new command queue.
+*/
+
+/*!
     \class QRhiMetalCommandBufferNativeHandles
     \inmodule QtRhi
-    \internal
+    \since 6.6
     \brief Holds the MTLCommandBuffer and MTLRenderCommandEncoder objects that are backing a QRhiCommandBuffer.
 
     \note The command buffer object is only guaranteed to be valid while
@@ -93,14 +129,28 @@ QT_BEGIN_NAMESPACE
     \note The command encoder is only valid while recording a pass, that is,
     between \l{QRhiCommandBuffer::beginPass()} -
     \l{QRhiCommandBuffer::endPass()}.
+
+    \note This is a RHI API with limited compatibility guarantees, see \l QRhi
+    for details.
  */
 
+/*!
+    \variable QRhiMetalCommandBufferNativeHandles::commandBuffer
+*/
+
+/*!
+    \variable QRhiMetalCommandBufferNativeHandles::encoder
+*/
+
 struct QMetalShader
 {
     id<MTLLibrary> lib = nil;
     id<MTLFunction> func = nil;
-    std::array<uint, 3> localSize;
+    std::array<uint, 3> localSize = {};
+    uint outputVertexCount = 0;
+    QShaderDescription desc;
     QShader::NativeResourceBindingMap nativeResourceBindingMap;
+    QShader::NativeShaderInfo nativeShaderInfo;
 
     void destroy() {
         nativeResourceBindingMap.clear();
@@ -113,11 +163,14 @@ struct QMetalShader
 
 struct QRhiMetalData
 {
-    QRhiMetalData(QRhiImplementation *rhi) : ofr(rhi) { }
+    QRhiMetalData(QRhiMetal *rhi) : q(rhi), ofr(rhi) { }
 
+    QRhiMetal *q;
     id<MTLDevice> dev = nil;
     id<MTLCommandQueue> cmdQueue = nil;
+    API_AVAILABLE(macosx(11.0), ios(14.0)) id<MTLBinaryArchive> binArch = nil;
 
+    id<MTLCommandBuffer> newCommandBuffer();
     MTLRenderPassDescriptor *createDefaultRenderPass(bool hasDepthStencil,
                                                      const QColor &colorClearValue,
                                                      const QRhiDepthStencilClearValue &depthStencilClearValue,
@@ -125,6 +178,11 @@ struct QRhiMetalData
     id<MTLLibrary> createMetalLib(const QShader &shader, QShader::Variant shaderVariant,
                                   QString *error, QByteArray *entryPoint, QShaderKey *activeKey);
     id<MTLFunction> createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint);
+    bool setupBinaryArchive(NSURL *sourceFileUrl = nil);
+    void addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc);
+    void trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc);
+    void addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc);
+    void trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc);
 
     struct DeferredReleaseEntry {
         enum Type {
@@ -159,6 +217,8 @@ struct QRhiMetalData
             struct {
                 id<MTLRenderPipelineState> pipelineState;
                 id<MTLDepthStencilState> depthStencilState;
+                std::array<id<MTLComputePipelineState>, 3> tessVertexComputeState;
+                id<MTLComputePipelineState> tessTessControlComputeState;
             } graphicsPipeline;
             struct {
                 id<MTLComputePipelineState> pipelineState;
@@ -170,6 +230,7 @@ struct QRhiMetalData
     struct OffscreenFrame {
         OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { }
         bool active = false;
+        double lastGpuTime = 0;
         QMetalCommandBuffer cbWrapper;
     } ofr;
 
@@ -184,6 +245,17 @@ struct QRhiMetalData
     };
     QVarLengthArray<TextureReadback, 2> activeTextureReadbacks;
 
+    struct BufferReadback
+    {
+        int activeFrameSlot = -1;
+        QRhiReadbackResult *result;
+        quint32 offset;
+        quint32 readSize;
+        id<MTLBuffer> buf;
+    };
+
+    QVarLengthArray<BufferReadback, 2> activeBufferReadbacks;
+
     MTLCaptureManager *captureMgr;
     id<MTLCaptureScope> captureScope = nil;
 
@@ -255,14 +327,16 @@ struct QMetalShaderResourceBindingsData {
         QRhiBatchedBindings<id<MTLTexture> > textureBatches;
         QRhiBatchedBindings<id<MTLSamplerState> > samplerBatches;
     } res[QRhiMetal::SUPPORTED_STAGES];
-    enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2 };
+    enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2, TESSCTRL = 3, TESSEVAL = 4 };
 };
 
 struct QMetalCommandBufferData
 {
     id<MTLCommandBuffer> cb;
+    double lastGpuTime = 0;
     id<MTLRenderCommandEncoder> currentRenderPassEncoder;
     id<MTLComputeCommandEncoder> currentComputePassEncoder;
+    id<MTLComputeCommandEncoder> tessellationComputeEncoder;
     MTLRenderPassDescriptor *currentPassRpDesc;
     int currentFirstVertexBinding;
     QRhiBatchedBindings<id<MTLBuffer> > currentVertexInputsBuffers;
@@ -294,8 +368,11 @@ struct QMetalRenderTargetData
     struct {
         ColorAtt colorAtt[QMetalRenderPassDescriptor::MAX_COLOR_ATTACHMENTS];
         id<MTLTexture> dsTex = nil;
+        id<MTLTexture> dsResolveTex = nil;
         bool hasStencil = false;
         bool depthNeedsStore = false;
+        bool preserveColor = false;
+        bool preserveDs = false;
     } fb;
 
     QRhiRenderTargetAttachmentTracker::ResIdList currentResIdList;
@@ -303,6 +380,7 @@ struct QMetalRenderTargetData
 
 struct QMetalGraphicsPipelineData
 {
+    QMetalGraphicsPipeline *q = nullptr;
     id<MTLRenderPipelineState> ps = nil;
     id<MTLDepthStencilState> ds = nil;
     MTLPrimitiveType primitiveType;
@@ -313,6 +391,54 @@ struct QMetalGraphicsPipelineData
     float slopeScaledDepthBias;
     QMetalShader vs;
     QMetalShader fs;
+    struct ExtraBufferManager {
+        enum class WorkBufType {
+            DeviceLocal,
+            HostVisible
+        };
+        QMetalBuffer *acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type = WorkBufType::DeviceLocal);
+        QVector<QMetalBuffer *> deviceLocalWorkBuffers;
+        QVector<QMetalBuffer *> hostVisibleWorkBuffers;
+    } extraBufMgr;
+    struct Tessellation {
+        QMetalGraphicsPipelineData *q = nullptr;
+        bool enabled = false;
+        bool failed = false;
+        uint inControlPointCount;
+        uint outControlPointCount;
+        QMetalShader compVs[3];
+        std::array<id<MTLComputePipelineState>, 3> vertexComputeState = {};
+        id<MTLComputePipelineState> tessControlComputeState = nil;
+        QMetalShader compTesc;
+        QMetalShader vertTese;
+        quint32 vsCompOutputBufferSize(quint32 vertexOrIndexCount, quint32 instanceCount) const
+        {
+            // max vertex output components = resourceLimit(MaxVertexOutputs) * 4 = 60
+            return vertexOrIndexCount * instanceCount * sizeof(float) * 60;
+        }
+        quint32 tescCompOutputBufferSize(quint32 patchCount) const
+        {
+            return outControlPointCount * patchCount * sizeof(float) * 60;
+        }
+        quint32 tescCompPatchOutputBufferSize(quint32 patchCount) const
+        {
+            // assume maxTessellationControlPerPatchOutputComponents is 128
+            return patchCount * sizeof(float) * 128;
+        }
+        quint32 patchCountForDrawCall(quint32 vertexOrIndexCount, quint32 instanceCount) const
+        {
+            return ((vertexOrIndexCount + inControlPointCount - 1) / inControlPointCount) * instanceCount;
+        }
+        static int vsCompVariantToIndex(QShader::Variant vertexCompVariant);
+        id<MTLComputePipelineState> vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant);
+        id<MTLComputePipelineState> tescCompPipeline(QRhiMetal *rhiD);
+        id<MTLRenderPipelineState> teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline);
+    } tess;
+    void setupVertexInputDescriptor(MTLVertexDescriptor *desc);
+    void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc);
+
+    // SPIRV-Cross buffer size buffers
+    QMetalBuffer *bufferSizeBuffer = nullptr;
 };
 
 struct QMetalComputePipelineData
@@ -320,6 +446,9 @@ struct QMetalComputePipelineData
     id<MTLComputePipelineState> ps = nil;
     QMetalShader cs;
     MTLSize localSize;
+
+    // SPIRV-Cross buffer size buffers
+    QMetalBuffer *bufferSizeBuffer = nullptr;
 };
 
 struct QMetalSwapChainData
@@ -327,10 +456,16 @@ struct QMetalSwapChainData
     CAMetalLayer *layer = nullptr;
     id<CAMetalDrawable> curDrawable = nil;
     dispatch_semaphore_t sem[QMTL_FRAMES_IN_FLIGHT];
+    double lastGpuTime[QMTL_FRAMES_IN_FLIGHT];
     MTLRenderPassDescriptor *rp = nullptr;
     id<MTLTexture> msaaTex[QMTL_FRAMES_IN_FLIGHT];
     QRhiTexture::Format rhiColorFormat;
     MTLPixelFormat colorFormat;
+#ifdef Q_OS_MACOS
+    bool liveResizeObserverSet = false;
+    QMacNotificationObserver liveResizeStartObserver;
+    QMacNotificationObserver liveResizeEndObserver;
+#endif
 };
 
 QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *importDevice)
@@ -341,7 +476,7 @@ QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *import
 
     importedDevice = importDevice != nullptr;
     if (importedDevice) {
-        if (d->dev) {
+        if (importDevice->dev) {
             d->dev = (id<MTLDevice>) importDevice->dev;
             importedCmdQueue = importDevice->cmdQueue != nullptr;
             if (importedCmdQueue)
@@ -375,9 +510,44 @@ bool QRhiMetal::probe(QRhiMetalInitParams *params)
     return false;
 }
 
+id<MTLCommandBuffer> QRhiMetalData::newCommandBuffer()
+{
+#ifdef QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES
+    // Do not let the command buffer mess with the refcount of objects. We do
+    // have a proper render loop and will manage lifetimes similarly to other
+    // backends (Vulkan).
+    return [cmdQueue commandBufferWithUnretainedReferences];
+#else
+    return [cmdQueue commandBuffer];
+#endif
+}
+
+bool QRhiMetalData::setupBinaryArchive(NSURL *sourceFileUrl)
+{
+#ifdef QRHI_METAL_DISABLE_BINARY_ARCHIVE
+    return false;
+#endif
+
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        [binArch release];
+        MTLBinaryArchiveDescriptor *binArchDesc = [MTLBinaryArchiveDescriptor new];
+        binArchDesc.url = sourceFileUrl;
+        NSError *err = nil;
+        binArch = [dev newBinaryArchiveWithDescriptor: binArchDesc error: &err];
+        [binArchDesc release];
+        if (!binArch) {
+            const QString msg = QString::fromNSString(err.localizedDescription);
+            qWarning("newBinaryArchiveWithDescriptor failed: %s", qPrintable(msg));
+            return false;
+        }
+        return true;
+    }
+    return false;
+}
+
 bool QRhiMetal::create(QRhi::Flags flags)
 {
-    Q_UNUSED(flags);
+    rhiFlags = flags;
 
     if (importedDevice)
         [d->dev retain];
@@ -392,10 +562,12 @@ bool QRhiMetal::create(QRhi::Flags flags)
     const QString deviceName = QString::fromNSString([d->dev name]);
     qCDebug(QRHI_LOG_INFO, "Metal device: %s", qPrintable(deviceName));
     driverInfoStruct.deviceName = deviceName.toUtf8();
-    driverInfoStruct.deviceId = [d->dev registryID];
-#ifdef Q_OS_IOS
-    driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice;
-#else
+
+    // deviceId and vendorId stay unset for now. Note that registryID is not
+    // suitable as deviceId because it does not seem stable on macOS and can
+    // apparently change when the system is rebooted.
+
+#ifdef Q_OS_MACOS
     if (@available(macOS 10.15, *)) {
         const MTLDeviceLocation deviceLocation = [d->dev location];
         switch (deviceLocation) {
@@ -412,8 +584,14 @@ bool QRhiMetal::create(QRhi::Flags flags)
             break;
         }
     }
+#else
+    driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice;
 #endif
 
+    const QOperatingSystemVersion ver = QOperatingSystemVersion::current();
+    osMajor = ver.majorVersion();
+    osMinor = ver.minorVersion();
+
     if (importedCmdQueue)
         [d->cmdQueue retain];
     else
@@ -433,6 +611,7 @@ bool QRhiMetal::create(QRhi::Flags flags)
     if (@available(macOS 10.15, *))
         caps.isAppleGPU = [d->dev supportsFamily:MTLGPUFamilyApple7];
     caps.maxThreadGroupSize = 1024;
+    caps.multiView = true;
 #elif defined(Q_OS_TVOS)
     if ([d->dev supportsFeatureSet: MTLFeatureSet(30003)]) // MTLFeatureSet_tvOS_GPUFamily2_v1
         caps.maxTextureSize = 16384;
@@ -459,8 +638,10 @@ bool QRhiMetal::create(QRhi::Flags flags)
     }
     caps.isAppleGPU = true;
     if (@available(iOS 13, *)) {
-        if ([d->dev supportsFamily:MTLGPUFamilyApple4])
+        if ([d->dev supportsFamily: MTLGPUFamilyApple4])
             caps.maxThreadGroupSize = 1024;
+        if ([d->dev supportsFamily: MTLGPUFamilyApple5])
+            caps.multiView = true;
     }
 #endif
 
@@ -470,6 +651,9 @@ bool QRhiMetal::create(QRhi::Flags flags)
             caps.supportedSampleCounts.append(sampleCount);
     }
 
+    if (rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+        d->setupBinaryArchive();
+
     nativeHandlesStruct.dev = (MTLDevice *) d->dev;
     nativeHandlesStruct.cmdQueue = (MTLCommandQueue *) d->cmdQueue;
 
@@ -488,6 +672,11 @@ void QRhiMetal::destroy()
     [d->captureScope release];
     d->captureScope = nil;
 
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        [d->binArch release];
+        d->binArch = nil;
+    }
+
     [d->cmdQueue release];
     if (!importedCmdQueue)
         d->cmdQueue = nil;
@@ -502,17 +691,6 @@ QVector<int> QRhiMetal::supportedSampleCounts() const
     return caps.supportedSampleCounts;
 }
 
-int QRhiMetal::effectiveSampleCount(int sampleCount) const
-{
-    // Stay compatible with QSurfaceFormat and friends where samples == 0 means the same as 1.
-    const int s = qBound(1, sampleCount, 64);
-    if (!supportedSampleCounts().contains(s)) {
-        qWarning("Attempted to set unsupported sample count %d", sampleCount);
-        return 1;
-    }
-    return s;
-}
-
 QRhiSwapChain *QRhiMetal::createSwapChain()
 {
     return new QMetalSwapChain(this);
@@ -600,7 +778,7 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
     case QRhi::DebugMarkers:
         return true;
     case QRhi::Timestamps:
-        return false;
+        return true;
     case QRhi::Instancing:
         return true;
     case QRhi::CustomInstanceStepRate:
@@ -644,7 +822,12 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
     case QRhi::ReadBackAnyTextureFormat:
         return true;
     case QRhi::PipelineCacheDataLoadSave:
-        return false;
+    {
+        if (@available(macOS 11.0, iOS 14.0, *))
+            return true;
+        else
+            return false;
+    }
     case QRhi::ImageDataStride:
         return true;
     case QRhi::RenderBufferImport:
@@ -656,13 +839,29 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const
     case QRhi::TextureArrays:
         return true;
     case QRhi::Tessellation:
-        return false;
+        return true;
     case QRhi::GeometryShader:
         return false;
     case QRhi::TextureArrayRange:
         return false;
     case QRhi::NonFillPolygonMode:
         return true;
+    case QRhi::OneDimensionalTextures:
+        return true;
+    case QRhi::OneDimensionalTextureMipmaps:
+        return false;
+    case QRhi::HalfAttributes:
+        return true;
+    case QRhi::RenderToOneDimensionalTexture:
+        return false;
+    case QRhi::ThreeDimensionalTextureMipmaps:
+        return true;
+    case QRhi::MultiView:
+        return caps.multiView;
+    case QRhi::TextureViewFormat:
+        return false;
+    case QRhi::ResolveDepthStencil:
+        return true;
     default:
         Q_UNREACHABLE();
         return false;
@@ -742,14 +941,133 @@ bool QRhiMetal::isDeviceLost() const
     return false;
 }
 
+struct QMetalPipelineCacheDataHeader
+{
+    quint32 rhiId;
+    quint32 arch;
+    quint32 dataSize;
+    quint32 osMajor;
+    quint32 osMinor;
+    char driver[236];
+};
+
 QByteArray QRhiMetal::pipelineCacheData()
 {
-    return QByteArray();
+    Q_STATIC_ASSERT(sizeof(QMetalPipelineCacheDataHeader) == 256);
+    QByteArray data;
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        if (!d->binArch || !rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+            return data;
+
+        QTemporaryFile tmp;
+        if (!tmp.open()) {
+            qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal");
+            return data;
+        }
+        tmp.close(); // the file exists until the tmp dtor runs
+
+        const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath();
+        NSURL *url = QUrl::fromLocalFile(fn).toNSURL();
+        NSError *err = nil;
+        if (![d->binArch serializeToURL: url error: &err]) {
+            const QString msg = QString::fromNSString(err.localizedDescription);
+            // Some of these "errors" are not actual errors. (think of "Nothing to serialize")
+            qCDebug(QRHI_LOG_INFO, "Failed to serialize MTLBinaryArchive: %s", qPrintable(msg));
+            return data;
+        }
+
+        QFile f(fn);
+        if (!f.open(QIODevice::ReadOnly)) {
+            qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to reopen temporary file");
+            return data;
+        }
+        const QByteArray blob = f.readAll();
+        f.close();
+
+        const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader);
+        const quint32 dataSize = quint32(blob.size());
+
+        data.resize(headerSize + dataSize);
+
+        QMetalPipelineCacheDataHeader header = {};
+        header.rhiId = pipelineCacheRhiId();
+        header.arch = quint32(sizeof(void*));
+        header.dataSize = quint32(dataSize);
+        header.osMajor = osMajor;
+        header.osMinor = osMinor;
+        const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length()));
+        if (driverStrLen)
+            memcpy(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen);
+        header.driver[driverStrLen] = '\0';
+
+        memcpy(data.data(), &header, headerSize);
+        memcpy(data.data() + headerSize, blob.constData(), dataSize);
+    }
+    return data;
 }
 
 void QRhiMetal::setPipelineCacheData(const QByteArray &data)
 {
-    Q_UNUSED(data);
+    if (data.isEmpty())
+        return;
+
+    const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader);
+    if (data.size() < qsizetype(headerSize)) {
+        qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (header incomplete)");
+        return;
+    }
+
+    const size_t dataOffset = headerSize;
+    QMetalPipelineCacheDataHeader header;
+    memcpy(&header, data.constData(), headerSize);
+
+    const quint32 rhiId = pipelineCacheRhiId();
+    if (header.rhiId != rhiId) {
+        qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: The data is for a different QRhi version or backend (%u, %u)",
+                rhiId, header.rhiId);
+        return;
+    }
+
+    const quint32 arch = quint32(sizeof(void*));
+    if (header.arch != arch) {
+        qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Architecture does not match (%u, %u)",
+                arch, header.arch);
+        return;
+    }
+
+    if (header.osMajor != osMajor || header.osMinor != osMinor) {
+        qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: OS version does not match (%u.%u, %u.%u)",
+                osMajor, osMinor, header.osMajor, header.osMinor);
+        return;
+    }
+
+    const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length()));
+    if (strncmp(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen)) {
+        qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Metal device name does not match");
+        return;
+    }
+
+    if (data.size() < qsizetype(dataOffset + header.dataSize)) {
+        qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (data incomplete)");
+        return;
+    }
+
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        const char *p = data.constData() + dataOffset;
+
+        QTemporaryFile tmp;
+        if (!tmp.open()) {
+            qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal");
+            return;
+        }
+        tmp.write(p, header.dataSize);
+        tmp.close(); // the file exists until the tmp dtor runs
+
+        const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath();
+        NSURL *url = QUrl::fromLocalFile(fn).toNSURL();
+        if (d->setupBinaryArchive(url))
+            qCDebug(QRHI_LOG_INFO, "Created MTLBinaryArchive with initial data of %u bytes", header.dataSize);
+    }
 }
 
 QRhiRenderBuffer *QRhiMetal::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize,
@@ -819,6 +1137,136 @@ static inline int mapBinding(int binding,
     return -1;
 }
 
+static inline void bindStageBuffers(QMetalCommandBuffer *cbD,
+                                    int stage,
+                                    const QRhiBatchedBindings<id<MTLBuffer>>::Batch &bufferBatch,
+                                    const QRhiBatchedBindings<NSUInteger>::Batch &offsetBatch)
+{
+    switch (stage) {
+    case QMetalShaderResourceBindingsData::VERTEX:
+        [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData()
+          offsets: offsetBatch.resources.constData()
+          withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::FRAGMENT:
+        [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData()
+          offsets: offsetBatch.resources.constData()
+          withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::COMPUTE:
+        [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData()
+          offsets: offsetBatch.resources.constData()
+          withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::TESSCTRL:
+    case QMetalShaderResourceBindingsData::TESSEVAL:
+        // do nothing.  These are used later for tessellation
+        break;
+    default:
+        Q_UNREACHABLE();
+        break;
+    }
+}
+
+static inline void bindStageTextures(QMetalCommandBuffer *cbD,
+                                     int stage,
+                                     const QRhiBatchedBindings<id<MTLTexture>>::Batch &textureBatch)
+{
+    switch (stage) {
+    case QMetalShaderResourceBindingsData::VERTEX:
+        [cbD->d->currentRenderPassEncoder setVertexTextures: textureBatch.resources.constData()
+          withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::FRAGMENT:
+        [cbD->d->currentRenderPassEncoder setFragmentTextures: textureBatch.resources.constData()
+          withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::COMPUTE:
+        [cbD->d->currentComputePassEncoder setTextures: textureBatch.resources.constData()
+          withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::TESSCTRL:
+    case QMetalShaderResourceBindingsData::TESSEVAL:
+        // do nothing.  These are used later for tessellation
+        break;
+    default:
+        Q_UNREACHABLE();
+        break;
+    }
+}
+
+static inline void bindStageSamplers(QMetalCommandBuffer *cbD,
+                                     int encoderStage,
+                                     const QRhiBatchedBindings<id<MTLSamplerState>>::Batch &samplerBatch)
+{
+    switch (encoderStage) {
+    case QMetalShaderResourceBindingsData::VERTEX:
+        [cbD->d->currentRenderPassEncoder setVertexSamplerStates: samplerBatch.resources.constData()
+          withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::FRAGMENT:
+        [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: samplerBatch.resources.constData()
+          withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::COMPUTE:
+        [cbD->d->currentComputePassEncoder setSamplerStates: samplerBatch.resources.constData()
+          withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))];
+        break;
+    case QMetalShaderResourceBindingsData::TESSCTRL:
+    case QMetalShaderResourceBindingsData::TESSEVAL:
+        // do nothing.  These are used later for tessellation
+        break;
+    default:
+        Q_UNREACHABLE();
+        break;
+    }
+}
+
+// Helper that is not used during the common vertex+fragment and compute
+// pipelines, but is necessary when tessellation is involved and so the
+// graphics pipeline is under the hood a combination of multiple compute and
+// render pipelines. We need to be able to set the buffers, textures, samplers
+// when a switching between render and compute encoders.
+static inline void rebindShaderResources(QMetalCommandBuffer *cbD, int resourceStage, int encoderStage,
+                                         const QMetalShaderResourceBindingsData *customBindingState = nullptr)
+{
+    const QMetalShaderResourceBindingsData *bindingData = customBindingState ? customBindingState : &cbD->d->currentShaderResourceBindingState;
+
+    for (int i = 0, ie = bindingData->res[resourceStage].bufferBatches.batches.count(); i != ie; ++i) {
+        const auto &bufferBatch(bindingData->res[resourceStage].bufferBatches.batches[i]);
+        const auto &offsetBatch(bindingData->res[resourceStage].bufferOffsetBatches.batches[i]);
+        bindStageBuffers(cbD, encoderStage, bufferBatch, offsetBatch);
+    }
+
+    for (int i = 0, ie = bindingData->res[resourceStage].textureBatches.batches.count(); i != ie; ++i) {
+        const auto &batch(bindingData->res[resourceStage].textureBatches.batches[i]);
+        bindStageTextures(cbD, encoderStage, batch);
+    }
+
+    for (int i = 0, ie = bindingData->res[resourceStage].samplerBatches.batches.count(); i != ie; ++i) {
+        const auto &batch(bindingData->res[resourceStage].samplerBatches.batches[i]);
+        bindStageSamplers(cbD, encoderStage, batch);
+    }
+}
+
+static inline QRhiShaderResourceBinding::StageFlag toRhiSrbStage(int stage)
+{
+    switch (stage) {
+    case QMetalShaderResourceBindingsData::VERTEX:
+        return QRhiShaderResourceBinding::StageFlag::VertexStage;
+    case QMetalShaderResourceBindingsData::TESSCTRL:
+        return QRhiShaderResourceBinding::StageFlag::TessellationControlStage;
+    case QMetalShaderResourceBindingsData::TESSEVAL:
+        return QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage;
+    case QMetalShaderResourceBindingsData::FRAGMENT:
+        return QRhiShaderResourceBinding::StageFlag::FragmentStage;
+    case QMetalShaderResourceBindingsData::COMPUTE:
+        return QRhiShaderResourceBinding::StageFlag::ComputeStage;
+    }
+
+    Q_UNREACHABLE_RETURN(QRhiShaderResourceBinding::StageFlag::VertexStage);
+}
+
 void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD,
                                               QMetalCommandBuffer *cbD,
                                               int dynamicOffsetCount,
@@ -828,8 +1276,8 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
 {
     QMetalShaderResourceBindingsData bindingData;
 
-    for (const QRhiShaderResourceBinding &binding : qAsConst(srbD->sortedBindings)) {
-        const QRhiShaderResourceBinding::Data *b = binding.data();
+    for (const QRhiShaderResourceBinding &binding : std::as_const(srbD->sortedBindings)) {
+        const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(binding);
         switch (b->type) {
         case QRhiShaderResourceBinding::UniformBuffer:
         {
@@ -843,20 +1291,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
                     break;
                 }
             }
-            if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Buffer);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::VERTEX].buffers.append({ nativeBinding, mtlbuf, offset });
-            }
-            if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset });
-            }
-            if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Buffer);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset });
+
+            for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+                if (b->stage.testFlag(toRhiSrbStage(stage))) {
+                    const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer);
+                    if (nativeBinding >= 0)
+                        bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset });
+                }
             }
         }
             break;
@@ -868,36 +1309,21 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
             for (int elem = 0; elem < data->count; ++elem) {
                 QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.stex.texSamplers[elem].tex);
                 QMetalSampler *samplerD = QRHI_RES(QMetalSampler, b->u.stex.texSamplers[elem].sampler);
-                if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
-                    // Must handle all three cases (combined, separate, separate):
-                    //   first = texture binding, second = sampler binding
-                    //   first = texture binding
-                    //   first = sampler binding (i.e. BindingType::Texture...)
-                    const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture);
-                    const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Sampler)
-                                                                : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture) : -1);
-                    if (textureBinding >= 0 && texD)
-                        bindingData.res[QMetalShaderResourceBindingsData::VERTEX].textures.append({ textureBinding + elem, texD->d->tex });
-                    if (samplerBinding >= 0)
-                        bindingData.res[QMetalShaderResourceBindingsData::VERTEX].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
-                }
-                if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
-                    const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture);
-                    const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Sampler)
-                                                                : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture) : -1);
-                    if (textureBinding >= 0 && texD)
-                        bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].textures.append({ textureBinding + elem, texD->d->tex });
-                    if (samplerBinding >= 0)
-                        bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
-                }
-                if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
-                    const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture);
-                    const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Sampler)
-                                                                : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture) : -1);
-                    if (textureBinding >= 0 && texD)
-                        bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].textures.append({ textureBinding + elem, texD->d->tex });
-                    if (samplerBinding >= 0)
-                        bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
+
+                for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+                    if (b->stage.testFlag(toRhiSrbStage(stage))) {
+                        // Must handle all three cases (combined, separate, separate):
+                        //   first = texture binding, second = sampler binding
+                        //   first = texture binding
+                        //   first = sampler binding (i.e. BindingType::Texture...)
+                        const int textureBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture);
+                        const int samplerBinding = texD && samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Sampler)
+                                                                    : (samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture) : -1);
+                        if (textureBinding >= 0 && texD)
+                            bindingData.res[stage].textures.append({ textureBinding + elem, texD->d->tex });
+                        if (samplerBinding >= 0)
+                            bindingData.res[stage].samplers.append({ samplerBinding + elem, samplerD->d->samplerState });
+                    }
                 }
             }
         }
@@ -908,20 +1334,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
         {
             QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex);
             id<MTLTexture> t = texD->d->viewForLevel(b->u.simage.level);
-            if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::VERTEX].textures.append({ nativeBinding, t });
-            }
-            if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].textures.append({ nativeBinding, t });
-            }
-            if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].textures.append({ nativeBinding, t });
+
+            for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+                if (b->stage.testFlag(toRhiSrbStage(stage))) {
+                    const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture);
+                    if (nativeBinding >= 0)
+                        bindingData.res[stage].textures.append({ nativeBinding, t });
+                }
             }
         }
             break;
@@ -932,20 +1351,12 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
             QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf);
             id<MTLBuffer> mtlbuf = bufD->d->buf[0];
             quint32 offset = b->u.sbuf.offset;
-            if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Buffer);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::VERTEX].buffers.append({ nativeBinding, mtlbuf, offset });
-            }
-            if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset });
-            }
-            if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) {
-                const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Buffer);
-                if (nativeBinding >= 0)
-                    bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset });
+            for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
+                if (b->stage.testFlag(toRhiSrbStage(stage))) {
+                    const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer);
+                    if (nativeBinding >= 0)
+                        bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset });
+                }
             }
         }
             break;
@@ -956,9 +1367,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
     }
 
     for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) {
-        if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT))
+        if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT
+                || stage == QMetalShaderResourceBindingsData::TESSCTRL || stage == QMetalShaderResourceBindingsData::TESSEVAL))
             continue;
-        if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && stage == QMetalShaderResourceBindingsData::COMPUTE)
+        if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && (stage == QMetalShaderResourceBindingsData::COMPUTE))
             continue;
 
         // QRhiBatchedBindings works with the native bindings and expects
@@ -970,7 +1382,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
             return a.nativeBinding < b.nativeBinding;
         });
 
-        for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : qAsConst(bindingData.res[stage].buffers)) {
+        for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : std::as_const(bindingData.res[stage].buffers)) {
             bindingData.res[stage].bufferBatches.feed(buf.nativeBinding, buf.mtlbuf);
             bindingData.res[stage].bufferOffsetBatches.feed(buf.nativeBinding, buf.offset);
         }
@@ -989,26 +1401,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
             {
                 continue;
             }
-            switch (stage) {
-            case QMetalShaderResourceBindingsData::VERTEX:
-                [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData()
-                  offsets: offsetBatch.resources.constData()
-                  withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
-                break;
-            case QMetalShaderResourceBindingsData::FRAGMENT:
-                [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData()
-                  offsets: offsetBatch.resources.constData()
-                  withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
-                break;
-            case QMetalShaderResourceBindingsData::COMPUTE:
-                [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData()
-                  offsets: offsetBatch.resources.constData()
-                  withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
-                break;
-            default:
-                Q_UNREACHABLE();
-                break;
-            }
+            bindStageBuffers(cbD, stage, bufferBatch, offsetBatch);
         }
 
         if (offsetOnlyChange)
@@ -1022,10 +1415,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
             return a.nativeBinding < b.nativeBinding;
         });
 
-        for (const QMetalShaderResourceBindingsData::Stage::Texture &t : qAsConst(bindingData.res[stage].textures))
+        for (const QMetalShaderResourceBindingsData::Stage::Texture &t : std::as_const(bindingData.res[stage].textures))
             bindingData.res[stage].textureBatches.feed(t.nativeBinding, t.mtltex);
 
-        for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : qAsConst(bindingData.res[stage].samplers))
+        for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : std::as_const(bindingData.res[stage].samplers))
             bindingData.res[stage].samplerBatches.feed(s.nativeBinding, s.mtlsampler);
 
         bindingData.res[stage].textureBatches.finish();
@@ -1039,23 +1432,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
             {
                 continue;
             }
-            switch (stage) {
-            case QMetalShaderResourceBindingsData::VERTEX:
-                [cbD->d->currentRenderPassEncoder setVertexTextures: batch.resources.constData()
-                  withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
-                break;
-            case QMetalShaderResourceBindingsData::FRAGMENT:
-                [cbD->d->currentRenderPassEncoder setFragmentTextures: batch.resources.constData()
-                  withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
-                break;
-            case QMetalShaderResourceBindingsData::COMPUTE:
-                [cbD->d->currentComputePassEncoder setTextures: batch.resources.constData()
-                  withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
-                break;
-            default:
-                Q_UNREACHABLE();
-                break;
-            }
+            bindStageTextures(cbD, stage, batch);
         }
 
         for (int i = 0, ie = bindingData.res[stage].samplerBatches.batches.count(); i != ie; ++i) {
@@ -1066,65 +1443,68 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD
             {
                 continue;
             }
-            switch (stage) {
-            case QMetalShaderResourceBindingsData::VERTEX:
-                [cbD->d->currentRenderPassEncoder setVertexSamplerStates: batch.resources.constData()
-                  withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
-                break;
-            case QMetalShaderResourceBindingsData::FRAGMENT:
-                [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: batch.resources.constData()
-                  withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
-                break;
-            case QMetalShaderResourceBindingsData::COMPUTE:
-                [cbD->d->currentComputePassEncoder setSamplerStates: batch.resources.constData()
-                  withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))];
-                break;
-            default:
-                Q_UNREACHABLE();
-                break;
-            }
+            bindStageSamplers(cbD, stage, batch);
         }
     }
+
     cbD->d->currentShaderResourceBindingState = bindingData;
 }
 
+void QMetalGraphicsPipeline::makeActiveForCurrentRenderPassEncoder(QMetalCommandBuffer *cbD)
+{
+    [cbD->d->currentRenderPassEncoder setRenderPipelineState: d->ps];
+
+    if (cbD->d->currentDepthStencilState != d->ds) {
+        [cbD->d->currentRenderPassEncoder setDepthStencilState: d->ds];
+        cbD->d->currentDepthStencilState = d->ds;
+    }
+
+    if (cbD->currentCullMode == -1 || d->cullMode != uint(cbD->currentCullMode)) {
+        [cbD->d->currentRenderPassEncoder setCullMode: d->cullMode];
+        cbD->currentCullMode = int(d->cullMode);
+    }
+    if (cbD->currentTriangleFillMode == -1 || d->triangleFillMode != uint(cbD->currentTriangleFillMode)) {
+        [cbD->d->currentRenderPassEncoder setTriangleFillMode: d->triangleFillMode];
+        cbD->currentTriangleFillMode = int(d->triangleFillMode);
+    }
+    if (cbD->currentFrontFaceWinding == -1 || d->winding != uint(cbD->currentFrontFaceWinding)) {
+        [cbD->d->currentRenderPassEncoder setFrontFacingWinding: d->winding];
+        cbD->currentFrontFaceWinding = int(d->winding);
+    }
+    if (!qFuzzyCompare(d->depthBias, cbD->currentDepthBiasValues.first)
+            || !qFuzzyCompare(d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second))
+    {
+        [cbD->d->currentRenderPassEncoder setDepthBias: d->depthBias
+                                                        slopeScale: d->slopeScaledDepthBias
+                                                        clamp: 0.0f];
+        cbD->currentDepthBiasValues = { d->depthBias, d->slopeScaledDepthBias };
+    }
+}
+
 void QRhiMetal::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps)
 {
     QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
     Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
     QMetalGraphicsPipeline *psD = QRHI_RES(QMetalGraphicsPipeline, ps);
 
-    if (cbD->currentGraphicsPipeline != ps || cbD->currentPipelineGeneration != psD->generation) {
-        cbD->currentGraphicsPipeline = ps;
-        cbD->currentComputePipeline = nullptr;
-        cbD->currentPipelineGeneration = psD->generation;
-
-        [cbD->d->currentRenderPassEncoder setRenderPipelineState: psD->d->ps];
+    if (cbD->currentGraphicsPipeline == psD && cbD->currentPipelineGeneration == psD->generation)
+        return;
 
-        if (cbD->d->currentDepthStencilState != psD->d->ds) {
-            [cbD->d->currentRenderPassEncoder setDepthStencilState: psD->d->ds];
-            cbD->d->currentDepthStencilState = psD->d->ds;
-        }
+    cbD->currentGraphicsPipeline = psD;
+    cbD->currentComputePipeline = nullptr;
+    cbD->currentPipelineGeneration = psD->generation;
 
-        if (cbD->currentCullMode == -1 || psD->d->cullMode != uint(cbD->currentCullMode)) {
-            [cbD->d->currentRenderPassEncoder setCullMode: psD->d->cullMode];
-            cbD->currentCullMode = int(psD->d->cullMode);
-        }
-        if (cbD->currentTriangleFillMode == -1 || psD->d->triangleFillMode != uint(cbD->currentTriangleFillMode)) {
-            [cbD->d->currentRenderPassEncoder setTriangleFillMode: psD->d->triangleFillMode];
-            cbD->currentTriangleFillMode = int(psD->d->triangleFillMode);
-        }
-        if (cbD->currentFrontFaceWinding == -1 || psD->d->winding != uint(cbD->currentFrontFaceWinding)) {
-            [cbD->d->currentRenderPassEncoder setFrontFacingWinding: psD->d->winding];
-            cbD->currentFrontFaceWinding = int(psD->d->winding);
+    if (!psD->d->tess.enabled && !psD->d->tess.failed) {
+        psD->makeActiveForCurrentRenderPassEncoder(cbD);
+    } else {
+        // mark work buffers that can now be safely reused as reusable
+        for (QMetalBuffer *workBuf : psD->d->extraBufMgr.deviceLocalWorkBuffers) {
+            if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot)
+                workBuf->lastActiveFrameSlot = -1;
         }
-        if (!qFuzzyCompare(psD->d->depthBias, cbD->currentDepthBiasValues.first)
-                || !qFuzzyCompare(psD->d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second))
-        {
-            [cbD->d->currentRenderPassEncoder setDepthBias: psD->d->depthBias
-                                                            slopeScale: psD->d->slopeScaledDepthBias
-                                                            clamp: 0.0f];
-            cbD->currentDepthBiasValues = { psD->d->depthBias, psD->d->slopeScaledDepthBias };
+        for (QMetalBuffer *workBuf : psD->d->extraBufMgr.hostVisibleWorkBuffers) {
+            if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot)
+                workBuf->lastActiveFrameSlot = -1;
         }
     }
 
@@ -1137,8 +1517,8 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
 {
     QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
     Q_ASSERT(cbD->recordingPass != QMetalCommandBuffer::NoPass);
-    QMetalGraphicsPipeline *gfxPsD = QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline);
-    QMetalComputePipeline *compPsD = QRHI_RES(QMetalComputePipeline, cbD->currentComputePipeline);
+    QMetalGraphicsPipeline *gfxPsD = cbD->currentGraphicsPipeline;
+    QMetalComputePipeline *compPsD = cbD->currentComputePipeline;
 
     if (!srb) {
         if (gfxPsD)
@@ -1152,9 +1532,15 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
     bool hasDynamicOffsetInSrb = false;
     bool resNeedsRebind = false;
 
+    // SPIRV-Cross buffer size buffers
+    // Need to determine storage buffer sizes here as this is the last opportunity for storage
+    // buffer bindings (offset, size) to be specified before draw / dispatch call
+    const bool needsBufferSizeBuffer = (compPsD && compPsD->d->bufferSizeBuffer) || (gfxPsD && gfxPsD->d->bufferSizeBuffer);
+    QMap<QRhiShaderResourceBinding::StageFlag, QMap<int, quint32>> storageBufferSizes;
+
     // do buffer writes, figure out if we need to rebind, and mark as in-use
     for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) {
-        const QRhiShaderResourceBinding::Data *b = srbD->sortedBindings.at(i).data();
+        const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->sortedBindings.at(i));
         QMetalShaderResourceBindings::BoundResourceData &bd(srbD->boundResourceData[i]);
         switch (b->type) {
         case QRhiShaderResourceBinding::UniformBuffer:
@@ -1228,6 +1614,17 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
         {
             QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf);
             Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer));
+
+            if (needsBufferSizeBuffer) {
+                for (int i = 0; i < 6; ++i) {
+                    const QRhiShaderResourceBinding::StageFlag stage =
+                            QRhiShaderResourceBinding::StageFlag(1 << i);
+                    if (b->stage.testFlag(stage)) {
+                        storageBufferSizes[stage][b->binding] = b->u.sbuf.maybeSize ? b->u.sbuf.maybeSize : bufD->size();
+                    }
+                }
+            }
+
             executeBufferHostWritesForCurrentFrame(bufD);
             if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) {
                 resNeedsRebind = true;
@@ -1243,26 +1640,141 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind
         }
     }
 
+    if (needsBufferSizeBuffer) {
+        QMetalBuffer *bufD = nullptr;
+        QVarLengthArray<QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag>, 4> shaders;
+
+        if (compPsD) {
+            bufD = compPsD->d->bufferSizeBuffer;
+            Q_ASSERT(compPsD->d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
+            shaders.append(qMakePair(&compPsD->d->cs, QRhiShaderResourceBinding::StageFlag::ComputeStage));
+        } else {
+            bufD = gfxPsD->d->bufferSizeBuffer;
+            if (gfxPsD->d->tess.enabled) {
+
+                // Assumptions
+                // * We only use one of the compute vertex shader variants in a pipeline at any one time
+                // * The vertex shader variants all have the same storage block bindings
+                // * The vertex shader variants all have the same native resource binding map
+                // * The vertex shader variants all have the same MslBufferSizeBufferBinding requirement
+                // * The vertex shader variants all have the same MslBufferSizeBufferBinding binding
+                // => We only need to use one vertex shader variant to generate the identical shader
+                // resource bindings
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[1].desc.storageBlocks());
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[2].desc.storageBlocks());
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap);
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap);
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
+                         == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)
+                         == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding));
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
+                         == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]
+                         == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]);
+
+                if (gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+                    shaders.append(qMakePair(&gfxPsD->d->tess.compVs[0], QRhiShaderResourceBinding::StageFlag::VertexStage));
+
+                if (gfxPsD->d->tess.compTesc.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+                    shaders.append(qMakePair(&gfxPsD->d->tess.compTesc, QRhiShaderResourceBinding::StageFlag::TessellationControlStage));
+
+                if (gfxPsD->d->tess.vertTese.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+                    shaders.append(qMakePair(&gfxPsD->d->tess.vertTese, QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage));
+
+            } else {
+                if (gfxPsD->d->vs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+                    shaders.append(qMakePair(&gfxPsD->d->vs, QRhiShaderResourceBinding::StageFlag::VertexStage));
+            }
+            if (gfxPsD->d->fs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding))
+                shaders.append(qMakePair(&gfxPsD->d->fs, QRhiShaderResourceBinding::StageFlag::FragmentStage));
+        }
+
+        quint32 offset = 0;
+        for (const QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag> &shader : shaders) {
+
+            const int binding = shader.first->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
+
+            // if we don't have a srb entry for the buffer size buffer
+            if (!(storageBufferSizes.contains(shader.second) && storageBufferSizes[shader.second].contains(binding))) {
+
+                int maxNativeBinding = 0;
+                for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks())
+                    maxNativeBinding = qMax(maxNativeBinding, shader.first->nativeResourceBindingMap[block.binding].first);
+
+                const int size = (maxNativeBinding + 1) * sizeof(int);
+
+                Q_ASSERT(offset + size <= bufD->size());
+                srbD->sortedBindings.append(QRhiShaderResourceBinding::bufferLoad(binding, shader.second, bufD, offset, size));
+
+                QMetalShaderResourceBindings::BoundResourceData bd;
+                bd.sbuf.id = bufD->m_id;
+                bd.sbuf.generation = bufD->generation;
+                srbD->boundResourceData.append(bd);
+            }
+
+            // create the buffer size buffer data
+            QVarLengthArray<int, 8> bufferSizeBufferData;
+            Q_ASSERT(storageBufferSizes.contains(shader.second));
+            const QMap<int, quint32> &sizes(storageBufferSizes[shader.second]);
+            for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) {
+                const int index = shader.first->nativeResourceBindingMap[block.binding].first;
+
+                // if the native binding is -1, the buffer is present but not accessed in the shader
+                if (index < 0)
+                    continue;
+
+                if (bufferSizeBufferData.size() <= index)
+                    bufferSizeBufferData.resize(index + 1);
+
+                Q_ASSERT(sizes.contains(block.binding));
+                bufferSizeBufferData[index] = sizes[block.binding];
+            }
+
+            QRhiBufferData data;
+            const quint32 size = bufferSizeBufferData.size() * sizeof(int);
+            data.assign(reinterpret_cast<const char *>(bufferSizeBufferData.constData()), size);
+            Q_ASSERT(offset + size <= bufD->size());
+            bufD->d->pendingUpdates[bufD->d->slotted ? currentFrameSlot : 0].append({ offset, data });
+
+            // buffer offsets must be 32byte aligned
+            offset += ((size + 31) / 32) * 32;
+        }
+
+        executeBufferHostWritesForCurrentFrame(bufD);
+        bufD->lastActiveFrameSlot = currentFrameSlot;
+    }
+
     // make sure the resources for the correct slot get bound
     const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0;
     if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot)
         resNeedsRebind = true;
 
-    const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb);
+    const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srbD) : (cbD->currentComputeSrb != srbD);
     const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation;
 
     // dynamic uniform buffer offsets always trigger a rebind
     if (hasDynamicOffsetInSrb || resNeedsRebind || srbChanged || srbRebuilt) {
-        const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr };
+        const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr, nullptr, nullptr };
         if (gfxPsD) {
-            cbD->currentGraphicsSrb = srb;
+            cbD->currentGraphicsSrb = srbD;
             cbD->currentComputeSrb = nullptr;
-            resBindMaps[0] = &gfxPsD->d->vs.nativeResourceBindingMap;
-            resBindMaps[1] = &gfxPsD->d->fs.nativeResourceBindingMap;
+            if (gfxPsD->d->tess.enabled) {
+                // If tessellating, we don't know which compVs shader to use until the draw call is
+                // made. They should all have the same native resource binding map, so pick one.
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap);
+                Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap);
+                resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->tess.compVs[0].nativeResourceBindingMap;
+                resBindMaps[QMetalShaderResourceBindingsData::TESSCTRL] = &gfxPsD->d->tess.compTesc.nativeResourceBindingMap;
+                resBindMaps[QMetalShaderResourceBindingsData::TESSEVAL] = &gfxPsD->d->tess.vertTese.nativeResourceBindingMap;
+            } else {
+                resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->vs.nativeResourceBindingMap;
+            }
+            resBindMaps[QMetalShaderResourceBindingsData::FRAGMENT] = &gfxPsD->d->fs.nativeResourceBindingMap;
         } else {
             cbD->currentGraphicsSrb = nullptr;
-            cbD->currentComputeSrb = srb;
-            resBindMaps[2] = &compPsD->d->cs.nativeResourceBindingMap;
+            cbD->currentComputeSrb = srbD;
+            resBindMaps[QMetalShaderResourceBindingsData::COMPUTE] = &compPsD->d->cs.nativeResourceBindingMap;
         }
         cbD->currentSrbGeneration = srbD->generation;
         cbD->currentResSlot = resSlot;
@@ -1293,13 +1805,13 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb,
     offsets.finish();
 
     // same binding space for vertex and constant buffers - work it around
-    QRhiShaderResourceBindings *srb = cbD->currentGraphicsSrb;
+    QMetalShaderResourceBindings *srbD = cbD->currentGraphicsSrb;
     // There's nothing guaranteeing setShaderResources() was called before
     // setVertexInput()... but whatever srb will get bound will have to be
     // layout-compatible anyways so maxBinding is the same.
-    if (!srb)
-        srb = cbD->currentGraphicsPipeline->shaderResourceBindings();
-    const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, srb)->maxBinding + 1;
+    if (!srbD)
+        srbD = QRHI_RES(QMetalShaderResourceBindings, cbD->currentGraphicsPipeline->shaderResourceBindings());
+    const int firstVertexBinding = srbD->maxBinding + 1;
 
     if (firstVertexBinding != cbD->d->currentFirstVertexBinding
             || buffers != cbD->d->currentVertexInputsBuffers
@@ -1323,7 +1835,7 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb,
         QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, indexBuf);
         executeBufferHostWritesForCurrentFrame(ibufD);
         ibufD->lastActiveFrameSlot = currentFrameSlot;
-        cbD->currentIndexBuffer = indexBuf;
+        cbD->currentIndexBuffer = ibufD;
         cbD->currentIndexOffset = indexOffset;
         cbD->currentIndexFormat = indexFormat;
     } else {
@@ -1339,7 +1851,7 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport)
 
     // x,y is top-left in MTLViewportRect but bottom-left in QRhiViewport
     float x, y, w, h;
-    if (!qrhi_toTopLeftRenderTargetRect(outputSize, viewport.viewport(), &x, &y, &w, &h))
+    if (!qrhi_toTopLeftRenderTargetRect<UnBounded>(outputSize, viewport.viewport(), &x, &y, &w, &h))
         return;
 
     MTLViewport vp;
@@ -1352,8 +1864,10 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport)
 
     [cbD->d->currentRenderPassEncoder setViewport: vp];
 
-    if (!QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) {
+    if (cbD->currentGraphicsPipeline
+        && !cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) {
         MTLScissorRect s;
+        qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, viewport.viewport(), &x, &y, &w, &h);
         s.x = NSUInteger(x);
         s.y = NSUInteger(y);
         s.width = NSUInteger(w);
@@ -1366,12 +1880,12 @@ void QRhiMetal::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor)
 {
     QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
     Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
-    Q_ASSERT(QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor));
+    Q_ASSERT(cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor));
     const QSize outputSize = cbD->currentTarget->pixelSize();
 
     // x,y is top-left in MTLScissorRect but bottom-left in QRhiScissor
     int x, y, w, h;
-    if (!qrhi_toTopLeftRenderTargetRect(outputSize, scissor.scissor(), &x, &y, &w, &h))
+    if (!qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, scissor.scissor(), &x, &y, &w, &h))
         return;
 
     MTLScissorRect s;
@@ -1400,20 +1914,320 @@ void QRhiMetal::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue)
     [cbD->d->currentRenderPassEncoder setStencilReferenceValue: refValue];
 }
 
+static id<MTLComputeCommandEncoder> tessellationComputeEncoder(QMetalCommandBuffer *cbD)
+{
+    if (cbD->d->currentRenderPassEncoder) {
+        [cbD->d->currentRenderPassEncoder endEncoding];
+        cbD->d->currentRenderPassEncoder = nil;
+    }
+
+    if (!cbD->d->tessellationComputeEncoder)
+        cbD->d->tessellationComputeEncoder = [cbD->d->cb computeCommandEncoder];
+
+    return cbD->d->tessellationComputeEncoder;
+}
+
+static void endTessellationComputeEncoding(QMetalCommandBuffer *cbD)
+{
+    if (cbD->d->tessellationComputeEncoder) {
+        [cbD->d->tessellationComputeEncoder endEncoding];
+        cbD->d->tessellationComputeEncoder = nil;
+    }
+
+    QMetalRenderTargetData * rtD = nullptr;
+
+    switch (cbD->currentTarget->resourceType()) {
+    case QRhiResource::SwapChainRenderTarget:
+        rtD = QRHI_RES(QMetalSwapChainRenderTarget, cbD->currentTarget)->d;
+        break;
+    case QRhiResource::TextureRenderTarget:
+        rtD = QRHI_RES(QMetalTextureRenderTarget, cbD->currentTarget)->d;
+        break;
+    default:
+        break;
+    }
+
+    Q_ASSERT(rtD);
+
+    QVarLengthArray<MTLLoadAction, 4> oldColorLoad;
+    for (uint i = 0; i < uint(rtD->colorAttCount); ++i) {
+        oldColorLoad.append(cbD->d->currentPassRpDesc.colorAttachments[i].loadAction);
+        if (cbD->d->currentPassRpDesc.colorAttachments[i].storeAction != MTLStoreActionDontCare)
+            cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad;
+    }
+
+    MTLLoadAction oldDepthLoad;
+    MTLLoadAction oldStencilLoad;
+    if (rtD->dsAttCount) {
+        oldDepthLoad = cbD->d->currentPassRpDesc.depthAttachment.loadAction;
+        if (cbD->d->currentPassRpDesc.depthAttachment.storeAction != MTLStoreActionDontCare)
+            cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad;
+
+        oldStencilLoad = cbD->d->currentPassRpDesc.stencilAttachment.loadAction;
+        if (cbD->d->currentPassRpDesc.stencilAttachment.storeAction != MTLStoreActionDontCare)
+            cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad;
+    }
+
+    cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc];
+    cbD->resetPerPassCachedState();
+
+    for (uint i = 0; i < uint(rtD->colorAttCount); ++i) {
+        cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = oldColorLoad[i];
+    }
+
+    if (rtD->dsAttCount) {
+        cbD->d->currentPassRpDesc.depthAttachment.loadAction = oldDepthLoad;
+        cbD->d->currentPassRpDesc.stencilAttachment.loadAction = oldStencilLoad;
+    }
+
+}
+
+void QRhiMetal::tessellatedDraw(const TessDrawArgs &args)
+{
+    QMetalCommandBuffer *cbD = args.cbD;
+    QMetalGraphicsPipeline *graphicsPipeline = cbD->currentGraphicsPipeline;
+    if (graphicsPipeline->d->tess.failed)
+        return;
+
+    const bool indexed = args.type != TessDrawArgs::NonIndexed;
+    const quint32 instanceCount = indexed ? args.drawIndexed.instanceCount : args.draw.instanceCount;
+    const quint32 vertexOrIndexCount = indexed ? args.drawIndexed.indexCount : args.draw.vertexCount;
+
+    QMetalGraphicsPipelineData::Tessellation &tess(graphicsPipeline->d->tess);
+    QMetalGraphicsPipelineData::ExtraBufferManager &extraBufMgr(graphicsPipeline->d->extraBufMgr);
+    const quint32 patchCount = tess.patchCountForDrawCall(vertexOrIndexCount, instanceCount);
+    QMetalBuffer *vertOutBuf = nullptr;
+    QMetalBuffer *tescOutBuf = nullptr;
+    QMetalBuffer *tescPatchOutBuf = nullptr;
+    QMetalBuffer *tescFactorBuf = nullptr;
+    QMetalBuffer *tescParamsBuf = nullptr;
+    id<MTLComputeCommandEncoder> vertTescComputeEncoder = tessellationComputeEncoder(cbD);
+
+    // Step 1: vertex shader (as compute)
+    {
+        id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder;
+        QShader::Variant shaderVariant = QShader::NonIndexedVertexAsComputeShader;
+        if (args.type == TessDrawArgs::U16Indexed)
+            shaderVariant = QShader::UInt16IndexedVertexAsComputeShader;
+        else if (args.type == TessDrawArgs::U32Indexed)
+            shaderVariant = QShader::UInt32IndexedVertexAsComputeShader;
+        const int varIndex = QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(shaderVariant);
+        id<MTLComputePipelineState> computePipelineState = tess.vsCompPipeline(this, shaderVariant);
+        [computeEncoder setComputePipelineState: computePipelineState];
+
+        // Make uniform buffers, textures, and samplers (meant for the
+        // vertex stage from the client's point of view) visible in the
+        // "vertex as compute" shader
+        cbD->d->currentComputePassEncoder = computeEncoder;
+        rebindShaderResources(cbD, QMetalShaderResourceBindingsData::VERTEX, QMetalShaderResourceBindingsData::COMPUTE);
+        cbD->d->currentComputePassEncoder = nil;
+
+        const QMap<int, int> &ebb(tess.compVs[varIndex].nativeShaderInfo.extraBufferBindings);
+        const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+        const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1);
+
+        if (outputBufferBinding >= 0) {
+            const quint32 workBufSize = tess.vsCompOutputBufferSize(vertexOrIndexCount, instanceCount);
+            vertOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
+            if (!vertOutBuf)
+                return;
+            [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
+        }
+
+        if (indexBufferBinding >= 0)
+            [computeEncoder setBuffer: (id<MTLBuffer>) args.drawIndexed.indexBuffer offset: 0 atIndex: indexBufferBinding];
+
+        for (int i = 0, ie = cbD->d->currentVertexInputsBuffers.batches.count(); i != ie; ++i) {
+            const auto &bufferBatch(cbD->d->currentVertexInputsBuffers.batches[i]);
+            const auto &offsetBatch(cbD->d->currentVertexInputOffsets.batches[i]);
+            [computeEncoder setBuffers: bufferBatch.resources.constData()
+              offsets: offsetBatch.resources.constData()
+              withRange: NSMakeRange(uint(cbD->d->currentFirstVertexBinding) + bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))];
+        }
+
+        if (indexed) {
+            [computeEncoder setStageInRegion: MTLRegionMake2D(args.drawIndexed.vertexOffset, args.drawIndexed.firstInstance,
+                                                              args.drawIndexed.indexCount, args.drawIndexed.instanceCount)];
+        } else {
+            [computeEncoder setStageInRegion: MTLRegionMake2D(args.draw.firstVertex, args.draw.firstInstance,
+                                                              args.draw.vertexCount, args.draw.instanceCount)];
+        }
+
+        [computeEncoder dispatchThreads: MTLSizeMake(vertexOrIndexCount, instanceCount, 1)
+          threadsPerThreadgroup: MTLSizeMake(computePipelineState.threadExecutionWidth, 1, 1)];
+    }
+
+    // Step 2: tessellation control shader (as compute)
+    {
+        id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder;
+        id<MTLComputePipelineState> computePipelineState = tess.tescCompPipeline(this);
+        [computeEncoder setComputePipelineState: computePipelineState];
+
+        cbD->d->currentComputePassEncoder = computeEncoder;
+        rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSCTRL, QMetalShaderResourceBindingsData::COMPUTE);
+        cbD->d->currentComputePassEncoder = nil;
+
+        const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings);
+        const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+        const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
+        const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
+        const int paramsBufferBinding = ebb.value(QShaderPrivate::MslTessTescParamsBufferBinding, -1);
+        const int inputBufferBinding = ebb.value(QShaderPrivate::MslTessTescInputBufferBinding, -1);
+
+        if (outputBufferBinding >= 0) {
+            const quint32 workBufSize = tess.tescCompOutputBufferSize(patchCount);
+            tescOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
+            if (!tescOutBuf)
+                return;
+            [computeEncoder setBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
+        }
+
+        if (patchOutputBufferBinding >= 0) {
+            const quint32 workBufSize = tess.tescCompPatchOutputBufferSize(patchCount);
+            tescPatchOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize);
+            if (!tescPatchOutBuf)
+                return;
+            [computeEncoder setBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding];
+        }
+
+        if (tessFactorBufferBinding >= 0) {
+            tescFactorBuf = extraBufMgr.acquireWorkBuffer(this, patchCount * sizeof(MTLQuadTessellationFactorsHalf));
+            [computeEncoder setBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding];
+        }
+
+        if (paramsBufferBinding >= 0) {
+            struct {
+                quint32 inControlPointCount;
+                quint32 patchCount;
+            } params;
+            tescParamsBuf = extraBufMgr.acquireWorkBuffer(this, sizeof(params), QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible);
+            if (!tescParamsBuf)
+                return;
+            params.inControlPointCount = tess.inControlPointCount;
+            params.patchCount = patchCount;
+            id<MTLBuffer> paramsBuf = tescParamsBuf->d->buf[0];
+            char *p = reinterpret_cast<char *>([paramsBuf contents]);
+            memcpy(p, &params, sizeof(params));
+            [computeEncoder setBuffer: paramsBuf offset: 0 atIndex: paramsBufferBinding];
+        }
+
+        if (vertOutBuf && inputBufferBinding >= 0)
+            [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: inputBufferBinding];
+
+        int sgSize = int(computePipelineState.threadExecutionWidth);
+        int wgSize = std::lcm(tess.outControlPointCount, sgSize);
+        while (wgSize > caps.maxThreadGroupSize) {
+            sgSize /= 2;
+            wgSize = std::lcm(tess.outControlPointCount, sgSize);
+        }
+        [computeEncoder dispatchThreads: MTLSizeMake(patchCount * tess.outControlPointCount, 1, 1)
+          threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)];
+    }
+
+    // Much of the state in the QMetalCommandBuffer is going to be reset
+    // when we get a new render encoder. Save what we need. (cheaper than
+    // starting to walk over the srb again)
+    const QMetalShaderResourceBindingsData resourceBindings = cbD->d->currentShaderResourceBindingState;
+
+    endTessellationComputeEncoding(cbD);
+
+    // Step 3: tessellation evaluation (as vertex) + fragment shader
+    {
+        // No need to call tess.teseFragRenderPipeline because it was done
+        // once and we know the result is stored in the standard place
+        // (graphicsPipeline->d->ps).
+
+        graphicsPipeline->makeActiveForCurrentRenderPassEncoder(cbD);
+        id<MTLRenderCommandEncoder> renderEncoder = cbD->d->currentRenderPassEncoder;
+
+        rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSEVAL, QMetalShaderResourceBindingsData::VERTEX, &resourceBindings);
+        rebindShaderResources(cbD, QMetalShaderResourceBindingsData::FRAGMENT, QMetalShaderResourceBindingsData::FRAGMENT, &resourceBindings);
+
+        const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings);
+        const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+        const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
+        const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
+
+        if (outputBufferBinding >= 0 && tescOutBuf)
+            [renderEncoder setVertexBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding];
+
+        if (patchOutputBufferBinding >= 0 && tescPatchOutBuf)
+            [renderEncoder setVertexBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding];
+
+        if (tessFactorBufferBinding >= 0 && tescFactorBuf) {
+            [renderEncoder setTessellationFactorBuffer: tescFactorBuf->d->buf[0] offset: 0 instanceStride: 0];
+            [renderEncoder setVertexBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding];
+        }
+
+        [cbD->d->currentRenderPassEncoder drawPatches: tess.outControlPointCount
+                                                       patchStart: 0
+                                                       patchCount: patchCount
+                                                       patchIndexBuffer: nil
+                                                       patchIndexBufferOffset: 0
+                                                       instanceCount: 1
+                                                       baseInstance: 0];
+    }
+}
+
+void QRhiMetal::adjustForMultiViewDraw(quint32 *instanceCount, QRhiCommandBuffer *cb)
+{
+    QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
+    const int multiViewCount = cbD->currentGraphicsPipeline->m_multiViewCount;
+    if (multiViewCount <= 1)
+        return;
+
+    const QMap<int, int> &ebb(cbD->currentGraphicsPipeline->d->vs.nativeShaderInfo.extraBufferBindings);
+    const int viewMaskBufBinding = ebb.value(QShaderPrivate::MslMultiViewMaskBufferBinding, -1);
+    if (viewMaskBufBinding == -1) {
+        qWarning("No extra buffer for multiview in the vertex shader; was it built with --view-count specified?");
+        return;
+    }
+    struct {
+        quint32 viewOffset;
+        quint32 viewCount;
+    } multiViewInfo;
+    multiViewInfo.viewOffset = 0;
+    multiViewInfo.viewCount = quint32(multiViewCount);
+    QMetalBuffer *buf = cbD->currentGraphicsPipeline->d->extraBufMgr.acquireWorkBuffer(this, sizeof(multiViewInfo),
+        QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible);
+    if (buf) {
+        id<MTLBuffer> mtlbuf = buf->d->buf[0];
+        char *p = reinterpret_cast<char *>([mtlbuf contents]);
+        memcpy(p, &multiViewInfo, sizeof(multiViewInfo));
+        [cbD->d->currentRenderPassEncoder setVertexBuffer: mtlbuf offset: 0 atIndex: viewMaskBufBinding];
+        // The instance count is adjusted for layered rendering. The vertex shader is expected to contain something like:
+        // uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1];
+        // where spvViewMask is the buffer with multiViewInfo passed in above.
+        *instanceCount *= multiViewCount;
+    }
+}
+
 void QRhiMetal::draw(QRhiCommandBuffer *cb, quint32 vertexCount,
                      quint32 instanceCount, quint32 firstVertex, quint32 firstInstance)
 {
     QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
     Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass);
 
+    if (cbD->currentGraphicsPipeline->d->tess.enabled) {
+        TessDrawArgs a;
+        a.cbD = cbD;
+        a.type = TessDrawArgs::NonIndexed;
+        a.draw.vertexCount = vertexCount;
+        a.draw.instanceCount = instanceCount;
+        a.draw.firstVertex = firstVertex;
+        a.draw.firstInstance = firstInstance;
+        tessellatedDraw(a);
+        return;
+    }
+
+    adjustForMultiViewDraw(&instanceCount, cb);
+
     if (caps.baseVertexAndInstance) {
-        [cbD->d->currentRenderPassEncoder drawPrimitives:
-          QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
-          vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance];
+        [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
+                                                          vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance];
     } else {
-        [cbD->d->currentRenderPassEncoder drawPrimitives:
-          QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
-          vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount];
+        [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
+                                                          vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount];
     }
 }
 
@@ -1429,23 +2243,39 @@ void QRhiMetal::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount,
     const quint32 indexOffset = cbD->currentIndexOffset + firstIndex * (cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? 2 : 4);
     Q_ASSERT(indexOffset == aligned(indexOffset, 4u));
 
-    QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, cbD->currentIndexBuffer);
-    id<MTLBuffer> mtlbuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0];
+    QMetalBuffer *ibufD = cbD->currentIndexBuffer;
+    id<MTLBuffer> mtlibuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0];
+
+    if (cbD->currentGraphicsPipeline->d->tess.enabled) {
+        TessDrawArgs a;
+        a.cbD = cbD;
+        a.type = cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? TessDrawArgs::U16Indexed : TessDrawArgs::U32Indexed;
+        a.drawIndexed.indexCount = indexCount;
+        a.drawIndexed.instanceCount = instanceCount;
+        a.drawIndexed.firstIndex = firstIndex;
+        a.drawIndexed.vertexOffset = vertexOffset;
+        a.drawIndexed.firstInstance = firstInstance;
+        a.drawIndexed.indexBuffer = mtlibuf;
+        tessellatedDraw(a);
+        return;
+    }
+
+    adjustForMultiViewDraw(&instanceCount, cb);
 
     if (caps.baseVertexAndInstance) {
-        [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
+        [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
           indexCount: indexCount
           indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32
-          indexBuffer: mtlbuf
+          indexBuffer: mtlibuf
           indexBufferOffset: indexOffset
           instanceCount: instanceCount
           baseVertex: vertexOffset
           baseInstance: firstInstance];
     } else {
-        [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType
+        [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType
           indexCount: indexCount
           indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32
-          indexBuffer: mtlbuf
+          indexBuffer: mtlibuf
           indexBufferOffset: indexOffset
           instanceCount: instanceCount];
     }
@@ -1502,34 +2332,39 @@ void QRhiMetal::endExternal(QRhiCommandBuffer *cb)
     cbD->resetPerPassCachedState();
 }
 
+double QRhiMetal::lastCompletedGpuTime(QRhiCommandBuffer *cb)
+{
+    QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
+    return cbD->d->lastGpuTime;
+}
+
 QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
 {
     Q_UNUSED(flags);
 
     QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain);
+    currentSwapChain = swapChainD;
+    currentFrameSlot = swapChainD->currentFrameSlot;
 
-    // This is a bit messed up since for this swapchain we want to wait for the
-    // commands+present to complete, while for others just for the commands
-    // (for this same frame slot) but not sure how to do that in a sane way so
-    // wait for full cb completion for now.
-    for (QMetalSwapChain *sc : qAsConst(swapchains)) {
-        dispatch_semaphore_t sem = sc->d->sem[swapChainD->currentFrameSlot];
-        dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
+    // If we are too far ahead, block. This is also what ensures that any
+    // resource used in the previous frame for this slot is now not in use
+    // anymore by the GPU.
+    dispatch_semaphore_wait(swapChainD->d->sem[currentFrameSlot], DISPATCH_TIME_FOREVER);
+
+    // Do this also for any other swapchain's commands with the same frame slot
+    // While this reduces concurrency, it keeps resource usage safe: swapchain
+    // A starting its frame 0, followed by swapchain B starting its own frame 0
+    // will make B wait for A's frame 0 commands, so if a resource is written
+    // in B's frame or when B checks for pending resource releases, that won't
+    // mess up A's in-flight commands (as they are not in flight anymore).
+    for (QMetalSwapChain *sc : std::as_const(swapchains)) {
         if (sc != swapChainD)
-            dispatch_semaphore_signal(sem);
+            sc->waitUntilCompleted(currentFrameSlot); // wait+signal
     }
 
-    currentSwapChain = swapChainD;
-    currentFrameSlot = swapChainD->currentFrameSlot;
-    if (swapChainD->ds)
-        swapChainD->ds->lastActiveFrameSlot = currentFrameSlot;
-
     [d->captureScope beginScope];
 
-    // Do not let the command buffer mess with the refcount of objects. We do
-    // have a proper render loop and will manage lifetimes similarly to other
-    // backends (Vulkan).
-    swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
+    swapChainD->cbWrapper.d->cb = d->newCommandBuffer();
 
     QMetalRenderTargetData::ColorAtt colorAtt;
     if (swapChainD->samples > 1) {
@@ -1541,11 +2376,16 @@ QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF
 
     swapChainD->rtWrapper.d->fb.colorAtt[0] = colorAtt;
     swapChainD->rtWrapper.d->fb.dsTex = swapChainD->ds ? swapChainD->ds->d->tex : nil;
+    swapChainD->rtWrapper.d->fb.dsResolveTex = nil;
     swapChainD->rtWrapper.d->fb.hasStencil = swapChainD->ds ? true : false;
     swapChainD->rtWrapper.d->fb.depthNeedsStore = false;
 
+    if (swapChainD->ds)
+        swapChainD->ds->lastActiveFrameSlot = currentFrameSlot;
+
     executeDeferredReleases();
-    swapChainD->cbWrapper.resetState();
+    swapChainD->cbWrapper.resetState(swapChainD->d->lastGpuTime[currentFrameSlot]);
+    swapChainD->d->lastGpuTime[currentFrameSlot] = 0;
     finishActiveReadbacks();
 
     return QRhi::FrameOpSuccess;
@@ -1556,26 +2396,30 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
     QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain);
     Q_ASSERT(currentSwapChain == swapChainD);
 
+    __block int thisFrameSlot = currentFrameSlot;
+    [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer> cb) {
+        swapChainD->d->lastGpuTime[thisFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
+        dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]);
+    }];
+
     const bool needsPresent = !flags.testFlag(QRhi::SkipPresent);
-    if (needsPresent) {
-        // beginFrame-endFrame without a render pass inbetween means there is no
-        // drawable, handle this gracefully because presentDrawable does not like
-        // null arguments.
-        if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) {
-            // QTBUG-103415: while the docs suggest the following two approaches are
-            // equivalent, there is a difference in case a frame is recorded earlier than
-            // (i.e. not in response to) the next CVDisplayLink callback. Therefore, stick
-            // with presentDrawable, which gives results identical to OpenGL, and all other
-            // platforms, i.e. throttles to vsync as expected, meaning constant 15-17 ms with
-            // a 60 Hz screen, no jumps with smaller intervals, regardless of when the frame
-            // is submitted by the app)
-#if 1
+    const bool presentsWithTransaction = swapChainD->d->layer.presentsWithTransaction;
+    if (!presentsWithTransaction && needsPresent) {
+        // beginFrame-endFrame without a render pass inbetween means there is no drawable.
+        if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable)
             [swapChainD->cbWrapper.d->cb presentDrawable: drawable];
-#else
-            [swapChainD->cbWrapper.d->cb addScheduledHandler:^(id<MTLCommandBuffer>) {
-                [drawable present];
-            }];
-#endif
+    }
+
+    [swapChainD->cbWrapper.d->cb commit];
+
+    if (presentsWithTransaction && needsPresent) {
+        // beginFrame-endFrame without a render pass inbetween means there is no drawable.
+        if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) {
+            // The layer has presentsWithTransaction set to true to avoid flicker on resizing,
+            // so here it is important to follow what the Metal docs say when it comes to the
+            // issuing the present.
+            [swapChainD->cbWrapper.d->cb waitUntilScheduled];
+            [drawable present];
         }
     }
 
@@ -1583,13 +2427,6 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
     [swapChainD->d->curDrawable release];
     swapChainD->d->curDrawable = nil;
 
-    __block int thisFrameSlot = currentFrameSlot;
-    [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer>) {
-        dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]);
-    }];
-
-    [swapChainD->cbWrapper.d->cb commit];
-
     [d->captureScope endScope];
 
     if (needsPresent)
@@ -1605,23 +2442,17 @@ QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi:
     Q_UNUSED(flags);
 
     currentFrameSlot = (currentFrameSlot + 1) % QMTL_FRAMES_IN_FLIGHT;
-    if (swapchains.count() > 1) {
-        for (QMetalSwapChain *sc : qAsConst(swapchains)) {
-            // wait+signal is the general pattern to ensure the commands for a
-            // given frame slot have completed (if sem is 1, we go 0 then 1; if
-            // sem is 0 we go -1, block, completion increments to 0, then us to 1)
-            dispatch_semaphore_t sem = sc->d->sem[currentFrameSlot];
-            dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
-            dispatch_semaphore_signal(sem);
-        }
-    }
+
+    for (QMetalSwapChain *sc : std::as_const(swapchains))
+        sc->waitUntilCompleted(currentFrameSlot);
 
     d->ofr.active = true;
     *cb = &d->ofr.cbWrapper;
-    d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
+    d->ofr.cbWrapper.d->cb = d->newCommandBuffer();
 
     executeDeferredReleases();
-    d->ofr.cbWrapper.resetState();
+    d->ofr.cbWrapper.resetState(d->ofr.lastGpuTime);
+    d->ofr.lastGpuTime = 0;
     finishActiveReadbacks();
 
     return QRhi::FrameOpSuccess;
@@ -1633,10 +2464,13 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame(QRhi::EndFrameFlags flags)
     Q_ASSERT(d->ofr.active);
     d->ofr.active = false;
 
-    [d->ofr.cbWrapper.d->cb commit];
+    id<MTLCommandBuffer> cb = d->ofr.cbWrapper.d->cb;
+    [cb commit];
 
     // offscreen frames wait for completion, unlike swapchain ones
-    [d->ofr.cbWrapper.d->cb waitUntilCompleted];
+    [cb waitUntilCompleted];
+
+    d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
 
     finishActiveReadbacks(true);
 
@@ -1660,16 +2494,14 @@ QRhi::FrameOpResult QRhiMetal::finish()
         }
     }
 
-    for (QMetalSwapChain *sc : qAsConst(swapchains)) {
+    for (QMetalSwapChain *sc : std::as_const(swapchains)) {
         for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
             if (currentSwapChain && sc == currentSwapChain && i == currentFrameSlot) {
                 // no wait as this is the thing we're going to be commit below and
                 // beginFrame decremented sem already and going to be signaled by endFrame
                 continue;
             }
-            dispatch_semaphore_t sem = sc->d->sem[i];
-            dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
-            dispatch_semaphore_signal(sem);
+            sc->waitUntilCompleted(i);
         }
     }
 
@@ -1679,10 +2511,13 @@ QRhi::FrameOpResult QRhiMetal::finish()
     }
 
     if (inFrame) {
-        if (d->ofr.active)
-            d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
-        else
-            swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences];
+        if (d->ofr.active) {
+            d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime;
+            d->ofr.cbWrapper.d->cb = d->newCommandBuffer();
+        } else {
+            swapChainD->d->lastGpuTime[currentFrameSlot] += cb.GPUEndTime - cb.GPUStartTime;
+            swapChainD->cbWrapper.d->cb = d->newCommandBuffer();
+        }
     }
 
     executeDeferredReleases(true);
@@ -1860,6 +2695,15 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
     QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb);
     QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates);
 
+    id<MTLBlitCommandEncoder> blitEnc = nil;
+    auto ensureBlit = [&blitEnc, cbD, this]() {
+        if (!blitEnc) {
+            blitEnc = [cbD->d->cb blitCommandEncoder];
+            if (debugMarkers)
+                [blitEnc pushDebugGroup: @"Texture upload/copy"];
+        }
+    };
+
     for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) {
         const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]);
         if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) {
@@ -1882,25 +2726,33 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
             QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf);
             executeBufferHostWritesForCurrentFrame(bufD);
             const int idx = bufD->d->slotted ? currentFrameSlot : 0;
-            char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
-            if (p) {
-                u.result->data.resize(u.readSize);
-                memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
+            if (bufD->m_type == QRhiBuffer::Dynamic) {
+                char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]);
+                if (p) {
+                    u.result->data.resize(u.readSize);
+                    memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize));
+                }
+                if (u.result->completed)
+                    u.result->completed();
+            } else {
+                QRhiMetalData::BufferReadback readback;
+                readback.activeFrameSlot = idx;
+                readback.buf = bufD->d->buf[idx];
+                readback.offset = u.offset;
+                readback.readSize = u.readSize;
+                readback.result = u.result;
+                d->activeBufferReadbacks.append(readback);
+#ifdef Q_OS_MACOS
+                if (bufD->d->managed) {
+                    // On non-Apple Silicon, manually synchronize memory from GPU to CPU
+                    ensureBlit();
+                    [blitEnc synchronizeResource:readback.buf];
+                }
+#endif
             }
-            if (u.result->completed)
-                u.result->completed();
         }
     }
 
-    id<MTLBlitCommandEncoder> blitEnc = nil;
-    auto ensureBlit = [&blitEnc, cbD, this] {
-        if (!blitEnc) {
-            blitEnc = [cbD->d->cb blitCommandEncoder];
-            if (debugMarkers)
-                [blitEnc pushDebugGroup: @"Texture upload/copy"];
-        }
-    };
-
     for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) {
         const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]);
         if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) {
@@ -1908,7 +2760,7 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
             qsizetype stagingSize = 0;
             for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) {
                 for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
-                    for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level]))
+                    for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level]))
                         stagingSize += subresUploadByteSize(subresDesc);
                 }
             }
@@ -1922,7 +2774,7 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate
             qsizetype curOfs = 0;
             for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) {
                 for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) {
-                    for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level]))
+                    for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level]))
                         enqueueSubresUpload(utexD, mp, blitEnc, layer, level, subresDesc, &curOfs);
                 }
             }
@@ -2035,7 +2887,7 @@ void QRhiMetal::executeBufferHostWritesForSlot(QMetalBuffer *bufD, int slot)
     void *p = [bufD->d->buf[slot] contents];
     quint32 changeBegin = UINT32_MAX;
     quint32 changeEnd = 0;
-    for (const QMetalBufferData::BufferUpdate &u : qAsConst(bufD->d->pendingUpdates[slot])) {
+    for (const QMetalBufferData::BufferUpdate &u : std::as_const(bufD->d->pendingUpdates[slot])) {
         memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.data.size()));
         if (u.offset < changeBegin)
             changeBegin = u.offset;
@@ -2111,21 +2963,24 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
         if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QMetalTexture, QMetalRenderBuffer>(rtTex->description(), rtD->currentResIdList))
             rtTex->create();
         cbD->d->currentPassRpDesc = d->createDefaultRenderPass(rtD->dsAttCount, colorClearValue, depthStencilClearValue, rtD->colorAttCount);
-        if (rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents)) {
+        if (rtD->fb.preserveColor) {
             for (uint i = 0; i < uint(rtD->colorAttCount); ++i)
                 cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad;
         }
-        if (rtD->dsAttCount && rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents)) {
+        if (rtD->dsAttCount && rtD->fb.preserveDs) {
             cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad;
             cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad;
         }
         for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments();
              it != itEnd; ++it)
         {
-            if (it->texture())
+            if (it->texture()) {
                 QRHI_RES(QMetalTexture, it->texture())->lastActiveFrameSlot = currentFrameSlot;
-            else if (it->renderBuffer())
+                if (it->multiViewCount() >= 2)
+                    cbD->d->currentPassRpDesc.renderTargetArrayLength = NSUInteger(it->multiViewCount());
+            } else if (it->renderBuffer()) {
                 QRHI_RES(QMetalRenderBuffer, it->renderBuffer())->lastActiveFrameSlot = currentFrameSlot;
+            }
             if (it->resolveTexture())
                 QRHI_RES(QMetalTexture, it->resolveTexture())->lastActiveFrameSlot = currentFrameSlot;
         }
@@ -2133,6 +2988,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
             QRHI_RES(QMetalRenderBuffer, rtTex->m_desc.depthStencilBuffer())->lastActiveFrameSlot = currentFrameSlot;
         if (rtTex->m_desc.depthTexture())
             QRHI_RES(QMetalTexture, rtTex->m_desc.depthTexture())->lastActiveFrameSlot = currentFrameSlot;
+        if (rtTex->m_desc.depthResolveTexture())
+            QRHI_RES(QMetalTexture, rtTex->m_desc.depthResolveTexture())->lastActiveFrameSlot = currentFrameSlot;
     }
         break;
     default:
@@ -2146,7 +3003,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
         cbD->d->currentPassRpDesc.colorAttachments[i].depthPlane = NSUInteger(rtD->fb.colorAtt[i].slice);
         cbD->d->currentPassRpDesc.colorAttachments[i].level = NSUInteger(rtD->fb.colorAtt[i].level);
         if (rtD->fb.colorAtt[i].resolveTex) {
-            cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = MTLStoreActionMultisampleResolve;
+            cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = rtD->fb.preserveColor ? MTLStoreActionStoreAndMultisampleResolve
+                                                                                              : MTLStoreActionMultisampleResolve;
             cbD->d->currentPassRpDesc.colorAttachments[i].resolveTexture = rtD->fb.colorAtt[i].resolveTex;
             cbD->d->currentPassRpDesc.colorAttachments[i].resolveSlice = NSUInteger(rtD->fb.colorAtt[i].resolveLayer);
             cbD->d->currentPassRpDesc.colorAttachments[i].resolveLevel = NSUInteger(rtD->fb.colorAtt[i].resolveLevel);
@@ -2159,6 +3017,15 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb,
         cbD->d->currentPassRpDesc.stencilAttachment.texture = rtD->fb.hasStencil ? rtD->fb.dsTex : nil;
         if (rtD->fb.depthNeedsStore) // Depth/Stencil is set to DontCare by default, override if  needed
             cbD->d->currentPassRpDesc.depthAttachment.storeAction = MTLStoreActionStore;
+        if (rtD->fb.dsResolveTex) {
+            cbD->d->currentPassRpDesc.depthAttachment.storeAction = rtD->fb.depthNeedsStore ? MTLStoreActionStoreAndMultisampleResolve
+                                                                                            : MTLStoreActionMultisampleResolve;
+            cbD->d->currentPassRpDesc.depthAttachment.resolveTexture = rtD->fb.dsResolveTex;
+            if (rtD->fb.hasStencil) {
+                cbD->d->currentPassRpDesc.stencilAttachment.resolveTexture = rtD->fb.dsResolveTex;
+                cbD->d->currentPassRpDesc.stencilAttachment.storeAction = cbD->d->currentPassRpDesc.depthAttachment.storeAction;
+            }
+        }
     }
 
     cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc];
@@ -2216,9 +3083,9 @@ void QRhiMetal::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *p
     Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass);
     QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, ps);
 
-    if (cbD->currentComputePipeline != ps || cbD->currentPipelineGeneration != psD->generation) {
+    if (cbD->currentComputePipeline != psD || cbD->currentPipelineGeneration != psD->generation) {
         cbD->currentGraphicsPipeline = nullptr;
-        cbD->currentComputePipeline = ps;
+        cbD->currentComputePipeline = psD;
         cbD->currentPipelineGeneration = psD->generation;
 
         [cbD->d->currentComputePassEncoder setComputePipelineState: psD->d->ps];
@@ -2284,8 +3151,12 @@ void QRhiMetal::executeDeferredReleases(bool forced)
                 [e.stagingBuffer.buffer release];
                 break;
             case QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline:
-                [e.graphicsPipeline.depthStencilState release];
                 [e.graphicsPipeline.pipelineState release];
+                [e.graphicsPipeline.depthStencilState release];
+                [e.graphicsPipeline.tessVertexComputeState[0] release];
+                [e.graphicsPipeline.tessVertexComputeState[1] release];
+                [e.graphicsPipeline.tessVertexComputeState[2] release];
+                [e.graphicsPipeline.tessTessControlComputeState release];
                 break;
             case QRhiMetalData::DeferredReleaseEntry::ComputePipeline:
                 [e.computePipeline.pipelineState release];
@@ -2315,7 +3186,23 @@ void QRhiMetal::finishActiveReadbacks(bool forced)
             if (readback.result->completed)
                 completedCallbacks.append(readback.result->completed);
 
-            d->activeTextureReadbacks.removeLast();
+            d->activeTextureReadbacks.remove(i);
+        }
+    }
+
+    for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) {
+        const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]);
+        if (forced || currentFrameSlot == readback.activeFrameSlot
+                || readback.activeFrameSlot < 0) {
+            readback.result->data.resize(readback.readSize);
+            char *p = reinterpret_cast<char *>([readback.buf contents]);
+            Q_ASSERT(p);
+            memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize));
+
+            if (readback.result->completed)
+                completedCallbacks.append(readback.result->completed);
+
+            d->activeBufferReadbacks.remove(i);
         }
     }
 
@@ -2388,6 +3275,9 @@ bool QMetalBuffer::create()
     // Static maps to on macOS) is not safe when another frame reading from the
     // same buffer is still in flight.
     d->slotted = !m_usage.testFlag(QRhiBuffer::StorageBuffer); // except for SSBOs written in the shader
+    // and a special case for internal work buffers
+    if (int(m_usage) == WorkBufPoolUsage)
+        d->slotted = false;
 
     for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
         if (i == 0 || d->slotted) {
@@ -2863,11 +3753,14 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize)
     if (d->tex)
         destroy();
 
-    const QSize size = m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize;
     const bool isCube = m_flags.testFlag(CubeMap);
     const bool is3D = m_flags.testFlag(ThreeDimensional);
     const bool isArray = m_flags.testFlag(TextureArray);
     const bool hasMipMaps = m_flags.testFlag(MipMapped);
+    const bool is1D = m_flags.testFlag(OneDimensional);
+
+    const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1)
+                            : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize);
 
     QRHI_RES_RHI(QRhiMetal);
     d->format = toMetalTextureFormat(m_format, m_flags, rhiD);
@@ -2895,12 +3788,18 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize)
         qWarning("Texture cannot be both array and 3D");
         return false;
     }
-    m_depth = qMax(1, m_depth);
+    if (is1D && is3D) {
+        qWarning("Texture cannot be both 1D and 3D");
+        return false;
+    }
+    if (is1D && isCube) {
+        qWarning("Texture cannot be both 1D and cube");
+        return false;
+    }
     if (m_depth > 1 && !is3D) {
         qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth);
         return false;
     }
-    m_arraySize = qMax(0, m_arraySize);
     if (m_arraySize > 0 && !isArray) {
         qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize);
         return false;
@@ -2927,17 +3826,20 @@ bool QMetalTexture::create()
     const bool isCube = m_flags.testFlag(CubeMap);
     const bool is3D = m_flags.testFlag(ThreeDimensional);
     const bool isArray = m_flags.testFlag(TextureArray);
+    const bool is1D = m_flags.testFlag(OneDimensional);
     if (isCube) {
         desc.textureType = MTLTextureTypeCube;
     } else if (is3D) {
         desc.textureType = MTLTextureType3D;
+    } else if (is1D) {
+        desc.textureType = isArray ? MTLTextureType1DArray : MTLTextureType1D;
     } else if (isArray) {
 #ifdef Q_OS_IOS
-        if (samples > 1) {
-            // would be available on iOS 14.0+ but cannot test for that with a 13 SDK
-            qWarning("Multisample 2D texture array is not supported on iOS");
+        if (@available(iOS 14, *)) {
+            desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray;
+        } else {
+            desc.textureType = MTLTextureType2DArray;
         }
-        desc.textureType = MTLTextureType2DArray;
 #else
         desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray;
 #endif
@@ -2947,12 +3849,12 @@ bool QMetalTexture::create()
     desc.pixelFormat = d->format;
     desc.width = NSUInteger(size.width());
     desc.height = NSUInteger(size.height());
-    desc.depth = is3D ? m_depth : 1;
+    desc.depth = is3D ? qMax(1, m_depth) : 1;
     desc.mipmapLevelCount = NSUInteger(mipLevelCount);
     if (samples > 1)
         desc.sampleCount = NSUInteger(samples);
     if (isArray)
-        desc.arrayLength = NSUInteger(m_arraySize);
+        desc.arrayLength = NSUInteger(qMax(0, m_arraySize));
     desc.resourceOptions = MTLResourceStorageModePrivate;
     desc.storageMode = MTLStorageModePrivate;
     desc.usage = MTLTextureUsageShaderRead;
@@ -3011,7 +3913,8 @@ id<MTLTexture> QMetalTextureData::viewForLevel(int level)
     const bool isCube = q->m_flags.testFlag(QRhiTexture::CubeMap);
     const bool isArray = q->m_flags.testFlag(QRhiTexture::TextureArray);
     id<MTLTexture> view = [tex newTextureViewWithPixelFormat: format textureType: type
-            levels: NSMakeRange(NSUInteger(level), 1) slices: NSMakeRange(0, isCube ? 6 : (isArray ? q->m_arraySize : 1))];
+            levels: NSMakeRange(NSUInteger(level), 1)
+            slices: NSMakeRange(0, isCube ? 6 : (isArray ? qMax(0, q->m_arraySize) : 1))];
 
     perLevelViews[level] = view;
     return view;
@@ -3156,7 +4059,9 @@ QMetalRenderPassDescriptor::~QMetalRenderPassDescriptor()
 
 void QMetalRenderPassDescriptor::destroy()
 {
-    // nothing to do here
+    QRHI_RES_RHI(QRhiMetal);
+    if (rhiD)
+        rhiD->unregisterResource(this);
 }
 
 bool QMetalRenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const
@@ -3199,13 +4104,17 @@ void QMetalRenderPassDescriptor::updateSerializedFormat()
 
 QRhiRenderPassDescriptor *QMetalRenderPassDescriptor::newCompatibleRenderPassDescriptor() const
 {
-    QMetalRenderPassDescriptor *rp = new QMetalRenderPassDescriptor(m_rhi);
-    rp->colorAttachmentCount = colorAttachmentCount;
-    rp->hasDepthStencil = hasDepthStencil;
-    memcpy(rp->colorFormat, colorFormat, sizeof(colorFormat));
-    rp->dsFormat = dsFormat;
-    rp->updateSerializedFormat();
-    return rp;
+    QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
+    rpD->colorAttachmentCount = colorAttachmentCount;
+    rpD->hasDepthStencil = hasDepthStencil;
+    memcpy(rpD->colorFormat, colorFormat, sizeof(colorFormat));
+    rpD->dsFormat = dsFormat;
+
+    rpD->updateSerializedFormat();
+
+    QRHI_RES_RHI(QRhiMetal);
+    rhiD->registerResource(rpD, false);
+    return rpD;
 }
 
 QVector<quint32> QMetalRenderPassDescriptor::serializedFormat() const
@@ -3261,12 +4170,14 @@ QMetalTextureRenderTarget::~QMetalTextureRenderTarget()
 
 void QMetalTextureRenderTarget::destroy()
 {
-    // nothing to do here
+    QRHI_RES_RHI(QRhiMetal);
+    if (rhiD)
+        rhiD->unregisterResource(this);
 }
 
 QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDescriptor()
 {
-    const int colorAttachmentCount = m_desc.cendColorAttachments() - m_desc.cbeginColorAttachments();
+    const int colorAttachmentCount = int(m_desc.colorAttachmentCount());
     QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
     rpD->colorAttachmentCount = colorAttachmentCount;
     rpD->hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
@@ -3284,14 +4195,16 @@ QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDesc
         rpD->dsFormat = int(QRHI_RES(QMetalRenderBuffer, m_desc.depthStencilBuffer())->d->format);
 
     rpD->updateSerializedFormat();
+
+    QRHI_RES_RHI(QRhiMetal);
+    rhiD->registerResource(rpD, false);
     return rpD;
 }
 
 bool QMetalTextureRenderTarget::create()
 {
     QRHI_RES_RHI(QRhiMetal);
-    const bool hasColorAttachments = m_desc.cbeginColorAttachments() != m_desc.cendColorAttachments();
-    Q_ASSERT(hasColorAttachments || m_desc.depthTexture());
+    Q_ASSERT(m_desc.colorAttachmentCount() > 0 || m_desc.depthTexture());
     Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture());
     const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture();
 
@@ -3335,8 +4248,9 @@ bool QMetalTextureRenderTarget::create()
         if (m_desc.depthTexture()) {
             QMetalTexture *depthTexD = QRHI_RES(QMetalTexture, m_desc.depthTexture());
             d->fb.dsTex = depthTexD->d->tex;
-            d->fb.hasStencil = false;
-            d->fb.depthNeedsStore = true;
+            d->fb.hasStencil = rhiD->isStencilSupportingFormat(depthTexD->format());
+            d->fb.depthNeedsStore = !m_flags.testFlag(DoNotStoreDepthStencilContents) && !m_desc.depthResolveTexture();
+            d->fb.preserveDs = m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents);
             if (d->colorAttCount == 0) {
                 d->pixelSize = depthTexD->pixelSize();
                 d->sampleCount = depthTexD->samples;
@@ -3346,18 +4260,27 @@ bool QMetalTextureRenderTarget::create()
             d->fb.dsTex = depthRbD->d->tex;
             d->fb.hasStencil = true;
             d->fb.depthNeedsStore = false;
+            d->fb.preserveDs = false;
             if (d->colorAttCount == 0) {
                 d->pixelSize = depthRbD->pixelSize();
                 d->sampleCount = depthRbD->samples;
             }
         }
+        if (m_desc.depthResolveTexture()) {
+            QMetalTexture *depthResolveTexD = QRHI_RES(QMetalTexture, m_desc.depthResolveTexture());
+            d->fb.dsResolveTex = depthResolveTexD->d->tex;
+        }
         d->dsAttCount = 1;
     } else {
         d->dsAttCount = 0;
     }
 
+    if (d->colorAttCount > 0)
+        d->fb.preserveColor = m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents);
+
     QRhiRenderTargetAttachmentTracker::updateResIdList<QMetalTexture, QMetalRenderBuffer>(m_desc, &d->currentResIdList);
 
+    rhiD->registerResource(this, false);
     return true;
 }
 
@@ -3393,6 +4316,10 @@ void QMetalShaderResourceBindings::destroy()
 {
     sortedBindings.clear();
     maxBinding = -1;
+
+    QRHI_RES_RHI(QRhiMetal);
+    if (rhiD)
+        rhiD->unregisterResource(this);
 }
 
 bool QMetalShaderResourceBindings::create()
@@ -3407,13 +4334,9 @@ bool QMetalShaderResourceBindings::create()
     rhiD->updateLayoutDesc(this);
 
     std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings));
-    std::sort(sortedBindings.begin(), sortedBindings.end(),
-              [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b)
-    {
-        return a.data()->binding < b.data()->binding;
-    });
+    std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan);
     if (!sortedBindings.isEmpty())
-        maxBinding = sortedBindings.last().data()->binding;
+        maxBinding = QRhiImplementation::shaderResourceBindingData(sortedBindings.last())->binding;
     else
         maxBinding = -1;
 
@@ -3423,6 +4346,7 @@ bool QMetalShaderResourceBindings::create()
         memset(&bd, 0, sizeof(BoundResourceData));
 
     generation += 1;
+    rhiD->registerResource(this, false);
     return true;
 }
 
@@ -3430,13 +4354,8 @@ void QMetalShaderResourceBindings::updateResources(UpdateFlags flags)
 {
     sortedBindings.clear();
     std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings));
-    if (!flags.testFlag(BindingsAreSorted)) {
-        std::sort(sortedBindings.begin(), sortedBindings.end(),
-                  [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b)
-        {
-            return a.data()->binding < b.data()->binding;
-        });
-    }
+    if (!flags.testFlag(BindingsAreSorted))
+        std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan);
 
     for (BoundResourceData &bd : boundResourceData)
         memset(&bd, 0, sizeof(BoundResourceData));
@@ -3448,6 +4367,8 @@ QMetalGraphicsPipeline::QMetalGraphicsPipeline(QRhiImplementation *rhi)
     : QRhiGraphicsPipeline(rhi),
       d(new QMetalGraphicsPipelineData)
 {
+    d->q = this;
+    d->tess.q = d;
 }
 
 QMetalGraphicsPipeline::~QMetalGraphicsPipeline()
@@ -3461,16 +4382,39 @@ void QMetalGraphicsPipeline::destroy()
     d->vs.destroy();
     d->fs.destroy();
 
-    if (!d->ps)
+    d->tess.compVs[0].destroy();
+    d->tess.compVs[1].destroy();
+    d->tess.compVs[2].destroy();
+
+    d->tess.compTesc.destroy();
+    d->tess.vertTese.destroy();
+
+    qDeleteAll(d->extraBufMgr.deviceLocalWorkBuffers);
+    d->extraBufMgr.deviceLocalWorkBuffers.clear();
+    qDeleteAll(d->extraBufMgr.hostVisibleWorkBuffers);
+    d->extraBufMgr.hostVisibleWorkBuffers.clear();
+
+    delete d->bufferSizeBuffer;
+    d->bufferSizeBuffer = nullptr;
+
+    if (!d->ps && !d->ds
+            && !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2]
+            && !d->tess.tessControlComputeState)
+    {
         return;
+    }
 
     QRhiMetalData::DeferredReleaseEntry e;
     e.type = QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline;
     e.lastActiveFrameSlot = lastActiveFrameSlot;
-    e.graphicsPipeline.depthStencilState = d->ds;
     e.graphicsPipeline.pipelineState = d->ps;
-    d->ds = nil;
+    e.graphicsPipeline.depthStencilState = d->ds;
+    e.graphicsPipeline.tessVertexComputeState = d->tess.vertexComputeState;
+    e.graphicsPipeline.tessTessControlComputeState = d->tess.tessControlComputeState;
     d->ps = nil;
+    d->ds = nil;
+    d->tess.vertexComputeState = {};
+    d->tess.tessControlComputeState = nil;
 
     QRHI_RES_RHI(QRhiMetal);
     if (rhiD) {
@@ -3512,6 +4456,30 @@ static inline MTLVertexFormat toMetalAttributeFormat(QRhiVertexInputAttribute::F
         return MTLVertexFormatInt2;
     case QRhiVertexInputAttribute::SInt:
         return MTLVertexFormatInt;
+    case QRhiVertexInputAttribute::Half4:
+        return MTLVertexFormatHalf4;
+    case QRhiVertexInputAttribute::Half3:
+        return MTLVertexFormatHalf3;
+    case QRhiVertexInputAttribute::Half2:
+        return MTLVertexFormatHalf2;
+    case QRhiVertexInputAttribute::Half:
+        return MTLVertexFormatHalf;
+    case QRhiVertexInputAttribute::UShort4:
+        return MTLVertexFormatUShort4;
+    case QRhiVertexInputAttribute::UShort3:
+        return MTLVertexFormatUShort3;
+    case QRhiVertexInputAttribute::UShort2:
+        return MTLVertexFormatUShort2;
+    case QRhiVertexInputAttribute::UShort:
+        return MTLVertexFormatUShort;
+    case QRhiVertexInputAttribute::SShort4:
+        return MTLVertexFormatShort4;
+    case QRhiVertexInputAttribute::SShort3:
+        return MTLVertexFormatShort3;
+    case QRhiVertexInputAttribute::SShort2:
+        return MTLVertexFormatShort2;
+    case QRhiVertexInputAttribute::SShort:
+        return MTLVertexFormatShort;
     default:
         Q_UNREACHABLE();
         return MTLVertexFormatFloat4;
@@ -3667,6 +4635,24 @@ static inline MTLPrimitiveType toMetalPrimitiveType(QRhiGraphicsPipeline::Topolo
     }
 }
 
+static inline MTLPrimitiveTopologyClass toMetalPrimitiveTopologyClass(QRhiGraphicsPipeline::Topology t)
+{
+    switch (t) {
+    case QRhiGraphicsPipeline::Triangles:
+    case QRhiGraphicsPipeline::TriangleStrip:
+    case QRhiGraphicsPipeline::TriangleFan:
+        return MTLPrimitiveTopologyClassTriangle;
+    case QRhiGraphicsPipeline::Lines:
+    case QRhiGraphicsPipeline::LineStrip:
+        return MTLPrimitiveTopologyClassLine;
+    case QRhiGraphicsPipeline::Points:
+        return MTLPrimitiveTopologyClassPoint;
+    default:
+        Q_UNREACHABLE();
+        return MTLPrimitiveTopologyClassTriangle;
+    }
+}
+
 static inline MTLCullMode toMetalCullMode(QRhiGraphicsPipeline::CullMode c)
 {
     switch (c) {
@@ -3695,15 +4681,67 @@ static inline MTLTriangleFillMode toMetalTriangleFillMode(QRhiGraphicsPipeline::
     }
 }
 
+static inline MTLWinding toMetalTessellationWindingOrder(QShaderDescription::TessellationWindingOrder w)
+{
+    switch (w) {
+    case QShaderDescription::CwTessellationWindingOrder:
+        return MTLWindingClockwise;
+    case QShaderDescription::CcwTessellationWindingOrder:
+        return MTLWindingCounterClockwise;
+    default:
+        // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown
+        return MTLWindingCounterClockwise;
+    }
+}
+
+static inline MTLTessellationPartitionMode toMetalTessellationPartitionMode(QShaderDescription::TessellationPartitioning p)
+{
+    switch (p) {
+    case QShaderDescription::EqualTessellationPartitioning:
+        return MTLTessellationPartitionModePow2;
+    case QShaderDescription::FractionalEvenTessellationPartitioning:
+        return MTLTessellationPartitionModeFractionalEven;
+    case QShaderDescription::FractionalOddTessellationPartitioning:
+        return MTLTessellationPartitionModeFractionalOdd;
+    default:
+        // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown
+        return MTLTessellationPartitionModePow2;
+    }
+}
+
+static inline MTLLanguageVersion toMetalLanguageVersion(const QShaderVersion &version)
+{
+    int v = version.version();
+    return MTLLanguageVersion(((v / 10) << 16) + (v % 10));
+}
+
 id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Variant shaderVariant,
                                              QString *error, QByteArray *entryPoint, QShaderKey *activeKey)
 {
-    QShaderKey key = { QShader::MetalLibShader, 20, shaderVariant };
-    QShaderCode mtllib = shader.shader(key);
-    if (mtllib.shader().isEmpty()) {
-        key.setSourceVersion(12);
-        mtllib = shader.shader(key);
+    QVarLengthArray<int, 8> versions;
+    if (@available(macOS 13, iOS 16, *))
+        versions << 30;
+    if (@available(macOS 12, iOS 15, *))
+        versions << 24;
+    if (@available(macOS 11, iOS 14, *))
+        versions << 23;
+    if (@available(macOS 10.15, iOS 13, *))
+        versions << 22;
+    if (@available(macOS 10.14, iOS 12, *))
+        versions << 21;
+    versions << 20 << 12;
+
+    const QList<QShaderKey> shaders = shader.availableShaders();
+
+    QShaderKey key;
+
+    for (const int &version : versions) {
+        key = { QShader::Source::MetalLibShader, version, shaderVariant };
+        if (shaders.contains(key))
+            break;
     }
+
+    QShaderCode mtllib = shader.shader(key);
     if (!mtllib.shader().isEmpty()) {
         dispatch_data_t data = dispatch_data_create(mtllib.shader().constData(),
                                                     size_t(mtllib.shader().size()),
@@ -3722,12 +4760,13 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var
         }
     }
 
-    key = { QShader::MslShader, 20, shaderVariant };
-    QShaderCode mslSource = shader.shader(key);
-    if (mslSource.shader().isEmpty()) {
-        key.setSourceVersion(12);
-        mslSource = shader.shader(key);
+    for (const int &version : versions) {
+        key = { QShader::Source::MslShader, version, shaderVariant };
+        if (shaders.contains(key))
+            break;
     }
+
+    QShaderCode mslSource = shader.shader(key);
     if (mslSource.shader().isEmpty()) {
         qWarning() << "No MSL 2.0 or 1.2 code found in baked shader" << shader;
         return nil;
@@ -3735,7 +4774,7 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var
 
     NSString *src = [NSString stringWithUTF8String: mslSource.shader().constData()];
     MTLCompileOptions *opts = [[MTLCompileOptions alloc] init];
-    opts.languageVersion = key.sourceVersion() == 20 ? MTLLanguageVersion2_0 : MTLLanguageVersion1_2;
+    opts.languageVersion = toMetalLanguageVersion(key.sourceVersion());
     NSError *err = nil;
     id<MTLLibrary> lib = [dev newLibraryWithSource: src options: opts error: &err];
     [opts release];
@@ -3757,56 +4796,192 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var
 
 id<MTLFunction> QRhiMetalData::createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint)
 {
-    NSString *name = [NSString stringWithUTF8String: entryPoint.constData()];
-    id<MTLFunction> f = [lib newFunctionWithName: name];
-    [name release];
-    return f;
+    return [lib newFunctionWithName:[NSString stringWithUTF8String:entryPoint.constData()]];
 }
 
-bool QMetalGraphicsPipeline::create()
+void QMetalGraphicsPipeline::setupAttachmentsInMetalRenderPassDescriptor(void *metalRpDesc, QMetalRenderPassDescriptor *rpD)
 {
-    if (d->ps)
-        destroy();
+    MTLRenderPipelineDescriptor *rpDesc = reinterpret_cast<MTLRenderPipelineDescriptor *>(metalRpDesc);
+
+    if (rpD->colorAttachmentCount) {
+        // defaults when no targetBlends are provided
+        rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]);
+        rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll;
+        rpDesc.colorAttachments[0].blendingEnabled = false;
+
+        Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount
+                 || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1));
+
+        for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) {
+            const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]);
+            rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]);
+            rpDesc.colorAttachments[i].blendingEnabled = b.enable;
+            rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor);
+            rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor);
+            rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor);
+            rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha);
+            rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha);
+            rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha);
+            rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite);
+        }
+    }
+
+    if (rpD->hasDepthStencil) {
+        // Must only be set when a depth-stencil buffer will actually be bound,
+        // validation blows up otherwise.
+        MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat);
+        rpDesc.depthAttachmentPixelFormat = fmt;
+#if defined(Q_OS_MACOS)
+        if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float)
+#else
+        if (fmt != MTLPixelFormatDepth32Float)
+#endif
+            rpDesc.stencilAttachmentPixelFormat = fmt;
+    }
 
     QRHI_RES_RHI(QRhiMetal);
-    rhiD->pipelineCreationStart();
-    if (!rhiD->sanityCheckGraphicsPipeline(this))
-        return false;
+    rpDesc.sampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount));
+}
 
+void QMetalGraphicsPipeline::setupMetalDepthStencilDescriptor(void *metalDsDesc)
+{
+    MTLDepthStencilDescriptor *dsDesc = reinterpret_cast<MTLDepthStencilDescriptor *>(metalDsDesc);
+
+    dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways;
+    dsDesc.depthWriteEnabled = m_depthWrite;
+    if (m_stencilTest) {
+        dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init];
+        dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp);
+        dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp);
+        dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp);
+        dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp);
+        dsDesc.frontFaceStencil.readMask = m_stencilReadMask;
+        dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask;
+
+        dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init];
+        dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp);
+        dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp);
+        dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp);
+        dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp);
+        dsDesc.backFaceStencil.readMask = m_stencilReadMask;
+        dsDesc.backFaceStencil.writeMask = m_stencilWriteMask;
+    }
+}
+
+void QMetalGraphicsPipeline::mapStates()
+{
+    d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise;
+    d->cullMode = toMetalCullMode(m_cullMode);
+    d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode);
+    d->depthBias = float(m_depthBias);
+    d->slopeScaledDepthBias = m_slopeScaledDepthBias;
+}
+
+void QMetalGraphicsPipelineData::setupVertexInputDescriptor(MTLVertexDescriptor *desc)
+{
     // same binding space for vertex and constant buffers - work it around
-    const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, m_shaderResourceBindings)->maxBinding + 1;
+    // should be in native resource binding not SPIR-V, but this will work anyway
+    const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1;
 
-    MTLVertexDescriptor *inputLayout = [MTLVertexDescriptor vertexDescriptor];
-    for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes();
+    QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout();
+    for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes();
          it != itEnd; ++it)
     {
         const uint loc = uint(it->location());
-        inputLayout.attributes[loc].format = toMetalAttributeFormat(it->format());
-        inputLayout.attributes[loc].offset = NSUInteger(it->offset());
-        inputLayout.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
+        desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format()));
+        desc.attributes[loc].offset = NSUInteger(it->offset());
+        desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
     }
     int bindingIndex = 0;
-    for (auto it = m_vertexInputLayout.cbeginBindings(), itEnd = m_vertexInputLayout.cendBindings();
+    const NSUInteger viewCount = qMax<NSUInteger>(1, q->multiViewCount());
+    for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings();
          it != itEnd; ++it, ++bindingIndex)
     {
         const uint layoutIdx = uint(firstVertexBinding + bindingIndex);
-        inputLayout.layouts[layoutIdx].stepFunction =
-                it->classification() == QRhiVertexInputBinding::PerInstance
-                ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
-        inputLayout.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
-        inputLayout.layouts[layoutIdx].stride = it->stride();
+        desc.layouts[layoutIdx].stepFunction =
+                    it->classification() == QRhiVertexInputBinding::PerInstance
+                    ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
+        desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
+        if (desc.layouts[layoutIdx].stepFunction == MTLVertexStepFunctionPerInstance)
+            desc.layouts[layoutIdx].stepRate *= viewCount;
+        desc.layouts[layoutIdx].stride = it->stride();
     }
+}
 
-    MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
+void QMetalGraphicsPipelineData::setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc)
+{
+    // same binding space for vertex and constant buffers - work it around
+    // should be in native resource binding not SPIR-V, but this will work anyway
+    const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1;
+
+    QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout();
+    for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes();
+         it != itEnd; ++it)
+    {
+        const uint loc = uint(it->location());
+        desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format()));
+        desc.attributes[loc].offset = NSUInteger(it->offset());
+        desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding());
+    }
+    int bindingIndex = 0;
+    for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings();
+         it != itEnd; ++it, ++bindingIndex)
+    {
+        const uint layoutIdx = uint(firstVertexBinding + bindingIndex);
+        if (desc.indexBufferIndex) {
+            desc.layouts[layoutIdx].stepFunction =
+                    it->classification() == QRhiVertexInputBinding::PerInstance
+                    ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridXIndexed;
+        } else {
+            desc.layouts[layoutIdx].stepFunction =
+                    it->classification() == QRhiVertexInputBinding::PerInstance
+                    ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridX;
+        }
+        desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate());
+        desc.layouts[layoutIdx].stride = it->stride();
+    }
+}
+
+void QRhiMetalData::trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
+{
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        if (binArch)  {
+            NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil];
+            rpDesc.binaryArchives = binArchArray;
+        }
+    }
+}
+
+void QRhiMetalData::addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc)
+{
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        if (binArch) {
+            NSError *err = nil;
+            if (![binArch addRenderPipelineFunctionsWithDescriptor: rpDesc error: &err]) {
+                const QString msg = QString::fromNSString(err.localizedDescription);
+                qWarning("Failed to collect render pipeline functions to binary archive: %s", qPrintable(msg));
+            }
+        }
+    }
+}
 
-    rpDesc.vertexDescriptor = inputLayout;
+bool QMetalGraphicsPipeline::createVertexFragmentPipeline()
+{
+    QRHI_RES_RHI(QRhiMetal);
+
+    MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor];
+    d->setupVertexInputDescriptor(vertexDesc);
+
+    MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
+    rpDesc.vertexDescriptor = vertexDesc;
 
-    // mutability cannot be determined (slotted buffers could be set as
+    // Mutability cannot be determined (slotted buffers could be set as
     // MTLMutabilityImmutable, but then we potentially need a different
     // descriptor for each buffer combination as this depends on the actual
-    // buffers not just the resource binding layout) so leave it at the default
+    // buffers not just the resource binding layout), so leave
+    // rpDesc.vertex/fragmentBuffers at the defaults.
 
-    for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) {
+    for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
         auto cacheIt = rhiD->d->shaderCache.constFind(shaderStage);
         if (cacheIt != rhiD->d->shaderCache.constEnd()) {
             switch (shaderStage.type()) {
@@ -3853,6 +5028,8 @@ bool QMetalGraphicsPipeline::create()
                 d->vs.lib = lib;
                 d->vs.func = func;
                 d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
+                d->vs.desc = shader.description();
+                d->vs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
                 rhiD->d->shaderCache.insert(shaderStage, d->vs);
                 [d->vs.lib retain];
                 [d->vs.func retain];
@@ -3862,6 +5039,8 @@ bool QMetalGraphicsPipeline::create()
                 d->fs.lib = lib;
                 d->fs.func = func;
                 d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
+                d->fs.desc = shader.description();
+                d->fs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
                 rhiD->d->shaderCache.insert(shaderStage, d->fs);
                 [d->fs.lib retain];
                 [d->fs.func retain];
@@ -3876,85 +5055,836 @@ bool QMetalGraphicsPipeline::create()
     }
 
     QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, m_renderPassDesc);
+    setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD);
 
-    if (rpD->colorAttachmentCount) {
-        // defaults when no targetBlends are provided
-        rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]);
-        rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll;
-        rpDesc.colorAttachments[0].blendingEnabled = false;
+    if (m_multiViewCount >= 2)
+        rpDesc.inputPrimitiveTopology = toMetalPrimitiveTopologyClass(m_topology);
 
-        Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount
-                 || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1));
+    rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc);
 
-        for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) {
-            const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]);
-            rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]);
-            rpDesc.colorAttachments[i].blendingEnabled = b.enable;
-            rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor);
-            rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor);
-            rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor);
-            rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha);
-            rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha);
-            rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha);
-            rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite);
+    if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+        rhiD->d->addRenderPipelineToBinaryArchive(rpDesc);
+
+    NSError *err = nil;
+    d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
+    [rpDesc release];
+    if (!d->ps) {
+        const QString msg = QString::fromNSString(err.localizedDescription);
+        qWarning("Failed to create render pipeline state: %s", qPrintable(msg));
+        return false;
+    }
+
+    MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
+    setupMetalDepthStencilDescriptor(dsDesc);
+    d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc];
+    [dsDesc release];
+
+    d->primitiveType = toMetalPrimitiveType(m_topology);
+    mapStates();
+
+    return true;
+}
+
+int QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(QShader::Variant vertexCompVariant)
+{
+    switch (vertexCompVariant) {
+    case QShader::NonIndexedVertexAsComputeShader:
+        return 0;
+    case QShader::UInt32IndexedVertexAsComputeShader:
+        return 1;
+    case QShader::UInt16IndexedVertexAsComputeShader:
+        return 2;
+    default:
+        break;
+    }
+    return -1;
+}
+
+id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant)
+{
+    const int varIndex = vsCompVariantToIndex(vertexCompVariant);
+    if (varIndex >= 0 && vertexComputeState[varIndex])
+        return vertexComputeState[varIndex];
+
+    id<MTLFunction> func = nil;
+    if (varIndex >= 0)
+        func = compVs[varIndex].func;
+
+    if (!func) {
+        qWarning("No compute function found for vertex shader translated for tessellation, this should not happen");
+        return nil;
+    }
+
+    const QMap<int, int> &ebb(compVs[varIndex].nativeShaderInfo.extraBufferBindings);
+    const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1);
+
+    MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
+    cpDesc.computeFunction = func;
+    cpDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = YES;
+    cpDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor];
+    if (indexBufferBinding >= 0) {
+        if (vertexCompVariant == QShader::UInt32IndexedVertexAsComputeShader) {
+            cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32;
+            cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding;
+        } else if (vertexCompVariant == QShader::UInt16IndexedVertexAsComputeShader) {
+            cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16;
+            cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding;
         }
     }
+    q->setupStageInputDescriptor(cpDesc.stageInputDescriptor);
 
-    if (rpD->hasDepthStencil) {
-        // Must only be set when a depth-stencil buffer will actually be bound,
-        // validation blows up otherwise.
-        MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat);
-        rpDesc.depthAttachmentPixelFormat = fmt;
-#if defined(Q_OS_MACOS)
-        if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float)
-#else
-        if (fmt != MTLPixelFormatDepth32Float)
-#endif
-            rpDesc.stencilAttachmentPixelFormat = fmt;
+    rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
+
+    if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+        rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
+
+    NSError *err = nil;
+    id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
+            options: MTLPipelineOptionNone
+            reflection: nil
+            error: &err];
+    [cpDesc release];
+    if (!ps) {
+        const QString msg = QString::fromNSString(err.localizedDescription);
+        qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
+    } else {
+        vertexComputeState[varIndex] = ps;
     }
+    // not retained, the only owner is vertexComputeState and so the QRhiGraphicsPipeline
+    return ps;
+}
 
-    rpDesc.sampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount));
+id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::tescCompPipeline(QRhiMetal *rhiD)
+{
+    if (tessControlComputeState)
+        return tessControlComputeState;
+
+    MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
+    cpDesc.computeFunction = compTesc.func;
+
+    rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
+
+    if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+        rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
 
     NSError *err = nil;
-    d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
-    if (!d->ps) {
+    id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
+            options: MTLPipelineOptionNone
+            reflection: nil
+            error: &err];
+    [cpDesc release];
+    if (!ps) {
         const QString msg = QString::fromNSString(err.localizedDescription);
-        qWarning("Failed to create render pipeline state: %s", qPrintable(msg));
-        [rpDesc release];
-        return false;
+        qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
+    } else {
+        tessControlComputeState = ps;
+    }
+    // not retained, the only owner is tessControlComputeState and so the QRhiGraphicsPipeline
+    return ps;
+}
+
+static inline bool indexTaken(quint32 index, quint64 indices)
+{
+    return (indices >> index) & 0x1;
+}
+
+static inline void takeIndex(quint32 index, quint64 &indices)
+{
+    indices |= 1 << index;
+}
+
+static inline int nextAttributeIndex(quint64 indices)
+{
+    // Maximum number of vertex attributes per vertex descriptor. There does
+    // not appear to be a way to query this from the implementation.
+    // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf indicates
+    // that all GPU families have a value of 31.
+    static const int maxVertexAttributes = 31;
+
+    for (int index = 0; index < maxVertexAttributes; ++index) {
+        if (!indexTaken(index, indices))
+            return index;
+    }
+
+    Q_UNREACHABLE_RETURN(-1);
+}
+
+static inline int aligned(quint32 offset, quint32 alignment)
+{
+    return ((offset + alignment - 1) / alignment) * alignment;
+}
+
+template<typename T>
+static void addUnusedVertexAttribute(const T &variable, QRhiMetal *rhiD, quint32 &offset, quint32 &vertexAlignment)
+{
+
+    int elements = 1;
+    for (const int dim : variable.arrayDims)
+        elements *= dim;
+
+    if (variable.type == QShaderDescription::VariableType::Struct) {
+        for (int element = 0; element < elements; ++element) {
+            for (const auto &member : variable.structMembers) {
+                addUnusedVertexAttribute(member, rhiD, offset, vertexAlignment);
+            }
+        }
+    } else {
+        const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type);
+        const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format);
+
+        // MSL specification 3.0 says alignment = size for non packed scalars and vectors
+        const quint32 alignment = size;
+        vertexAlignment = std::max(vertexAlignment, alignment);
+
+        for (int element = 0; element < elements; ++element) {
+            // adjust alignment
+            offset = aligned(offset, alignment);
+            offset += size;
+        }
     }
+}
+
+template<typename T>
+static void addVertexAttribute(const T &variable, int binding, QRhiMetal *rhiD, int &index, quint32 &offset, MTLVertexAttributeDescriptorArray *attributes, quint64 &indices, quint32 &vertexAlignment)
+{
+
+    int elements = 1;
+    for (const int dim : variable.arrayDims)
+        elements *= dim;
+
+    if (variable.type == QShaderDescription::VariableType::Struct) {
+        for (int element = 0; element < elements; ++element) {
+            for (const auto &member : variable.structMembers) {
+                addVertexAttribute(member, binding, rhiD, index, offset, attributes, indices, vertexAlignment);
+            }
+        }
+    } else {
+        const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type);
+        const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format);
+
+        // MSL specification 3.0 says alignment = size for non packed scalars and vectors
+        const quint32 alignment = size;
+        vertexAlignment = std::max(vertexAlignment, alignment);
+
+        for (int element = 0; element < elements; ++element) {
+            Q_ASSERT(!indexTaken(index, indices));
+
+            // adjust alignment
+            offset = aligned(offset, alignment);
+
+            attributes[index].bufferIndex = binding;
+            attributes[index].format = toMetalAttributeFormat(format);
+            attributes[index].offset = offset;
+
+            takeIndex(index, indices);
+            index++;
+            if (indexTaken(index, indices))
+                index = nextAttributeIndex(indices);
+
+            offset += size;
+        }
+    }
+}
+
+static inline bool matches(const QList<QShaderDescription::BlockVariable> &a, const QList<QShaderDescription::BlockVariable> &b)
+{
+    if (a.size() == b.size()) {
+        bool match = true;
+        for (int i = 0; i < a.size() && match; ++i) {
+            match &= a[i].type == b[i].type
+                    && a[i].arrayDims == b[i].arrayDims
+                    && matches(a[i].structMembers, b[i].structMembers);
+        }
+        return match;
+    }
+
+    return false;
+}
+
+static inline bool matches(const QShaderDescription::InOutVariable &a, const QShaderDescription::InOutVariable &b)
+{
+    return a.location == b.location
+            && a.type == b.type
+            && a.perPatch == b.perPatch
+            && matches(a.structMembers, b.structMembers);
+}
+
+//
+// Create the tessellation evaluation render pipeline state
+//
+// The tesc runs as a compute shader in a compute pipeline and writes per patch and per patch
+// control point data into separate storage buffers. The tese runs as a vertex shader in a render
+// pipeline. Our task is to generate a render pipeline descriptor for the tese that pulls vertices
+// from these buffers.
+//
+// As the buffers we are pulling vertices from are written by a compute pipeline, they follow the
+// MSL alignment conventions which we must take into account when generating our
+// MTLVertexDescriptor. We must include the user defined tese input attributes, and any builtins
+// that were used.
+//
+// SPIRV-Cross generates the MSL tese shader code with input attribute indices that reflect the
+// specified GLSL locations. Interface blocks are flattened with each member having an incremented
+// attribute index. SPIRV-Cross reports an error on compilation if there are clashes in the index
+// address space.
+//
+// After the user specified attributes are processed, SPIRV-Cross places the in-use builtins at the
+// next available (lowest value) attribute index. Tese builtins are processed in the following
+// order:
+//
+// in gl_PerVertex
+// {
+//   vec4 gl_Position;
+//   float gl_PointSize;
+//   float gl_ClipDistance[];
+// };
+//
+// patch in float gl_TessLevelOuter[4];
+// patch in float gl_TessLevelInner[2];
+//
+// Enumerations in QShaderDescription::BuiltinType are defined in this order.
+//
+// For quads, SPIRV-Cross places MTLQuadTessellationFactorsHalf per patch in the tessellation
+// factor buffer. For triangles it uses MTLTriangleTessellationFactorsHalf.
+//
+// It should be noted that SPIRV-Cross handles the following builtin inputs internally, with no
+// host side support required.
+//
+// in vec3 gl_TessCoord;
+// in int gl_PatchVerticesIn;
+// in int gl_PrimitiveID;
+//
+id<MTLRenderPipelineState> QMetalGraphicsPipelineData::Tessellation::teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline)
+{
+    if (pipeline->d->ps)
+        return pipeline->d->ps;
+
+    MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init];
+    MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor];
+
+    // tesc output buffers
+    const QMap<int, int> &ebb(compTesc.nativeShaderInfo.extraBufferBindings);
+    const int tescOutputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1);
+    const int tescPatchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1);
+    const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1);
+    quint32 offsetInTescOutput = 0;
+    quint32 offsetInTescPatchOutput = 0;
+    quint32 offsetInTessFactorBuffer = 0;
+    quint32 tescOutputAlignment = 0;
+    quint32 tescPatchOutputAlignment = 0;
+    quint32 tessFactorAlignment = 0;
+    QSet<int> usedBuffers;
+
+    // tesc output variables in ascending location order
+    QMap<int, QShaderDescription::InOutVariable> tescOutVars;
+    for (const auto &tescOutVar : compTesc.desc.outputVariables())
+        tescOutVars[tescOutVar.location] = tescOutVar;
+
+    // tese input variables in ascending location order
+    QMap<int, QShaderDescription::InOutVariable> teseInVars;
+    for (const auto &teseInVar : vertTese.desc.inputVariables())
+        teseInVars[teseInVar.location] = teseInVar;
+
+    // bit mask tracking usage of vertex attribute indices
+    quint64 indices = 0;
+
+    for (QShaderDescription::InOutVariable &tescOutVar : tescOutVars) {
+
+        int index = tescOutVar.location;
+        int binding = -1;
+        quint32 *offset = nullptr;
+        quint32 *alignment = nullptr;
+
+        if (tescOutVar.perPatch) {
+            binding = tescPatchOutputBufferBinding;
+            offset = &offsetInTescPatchOutput;
+            alignment = &tescPatchOutputAlignment;
+        } else {
+            tescOutVar.arrayDims.removeLast();
+            binding = tescOutputBufferBinding;
+            offset = &offsetInTescOutput;
+            alignment = &tescOutputAlignment;
+        }
+
+        if (teseInVars.contains(index)) {
+
+            if (!matches(teseInVars[index], tescOutVar)) {
+                qWarning() << "mismatched tessellation control output -> tesssellation evaluation input at location" << index;
+                qWarning() << "    tesc out:" << tescOutVar;
+                qWarning() << "    tese in:" << teseInVars[index];
+            }
+
+            if (binding != -1) {
+                addVertexAttribute(tescOutVar, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment);
+                usedBuffers << binding;
+            } else {
+                qWarning() << "baked tessellation control shader missing output buffer binding information";
+                addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment);
+            }
+
+        } else {
+            qWarning() << "missing tessellation evaluation input for tessellation control output:" << tescOutVar;
+            addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment);
+        }
+
+        teseInVars.remove(tescOutVar.location);
+    }
+
+    for (const QShaderDescription::InOutVariable &teseInVar : teseInVars)
+        qWarning() << "missing tessellation control output for tessellation evaluation input:" << teseInVar;
+
+    // tesc output builtins in ascending location order
+    QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> tescOutBuiltins;
+    for (const auto &tescOutBuiltin : compTesc.desc.outputBuiltinVariables())
+        tescOutBuiltins[tescOutBuiltin.type] = tescOutBuiltin;
+
+    // tese input builtins in ascending location order
+    QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> teseInBuiltins;
+    for (const auto &teseInBuiltin : vertTese.desc.inputBuiltinVariables())
+        teseInBuiltins[teseInBuiltin.type] = teseInBuiltin;
+
+    const bool trianglesMode = vertTese.desc.tessellationMode() == QShaderDescription::TrianglesTessellationMode;
+    bool tessLevelAdded = false;
+
+    for (const QShaderDescription::BuiltinVariable &builtin : tescOutBuiltins) {
+
+        QShaderDescription::InOutVariable variable;
+        int binding = -1;
+        quint32 *offset = nullptr;
+        quint32 *alignment = nullptr;
+
+        switch (builtin.type) {
+        case QShaderDescription::BuiltinType::PositionBuiltin:
+            variable.type = QShaderDescription::VariableType::Vec4;
+            binding = tescOutputBufferBinding;
+            offset = &offsetInTescOutput;
+            alignment = &tescOutputAlignment;
+            break;
+        case QShaderDescription::BuiltinType::PointSizeBuiltin:
+            variable.type = QShaderDescription::VariableType::Float;
+            binding = tescOutputBufferBinding;
+            offset = &offsetInTescOutput;
+            alignment = &tescOutputAlignment;
+            break;
+        case QShaderDescription::BuiltinType::ClipDistanceBuiltin:
+            variable.type = QShaderDescription::VariableType::Float;
+            variable.arrayDims = builtin.arrayDims;
+            binding = tescOutputBufferBinding;
+            offset = &offsetInTescOutput;
+            alignment = &tescOutputAlignment;
+            break;
+        case QShaderDescription::BuiltinType::TessLevelOuterBuiltin:
+            variable.type = QShaderDescription::VariableType::Half4;
+            binding = tessFactorBufferBinding;
+            offset = &offsetInTessFactorBuffer;
+            tessLevelAdded = trianglesMode;
+            alignment = &tessFactorAlignment;
+            break;
+        case QShaderDescription::BuiltinType::TessLevelInnerBuiltin:
+            if (trianglesMode) {
+                if (!tessLevelAdded) {
+                    variable.type = QShaderDescription::VariableType::Half4;
+                    binding = tessFactorBufferBinding;
+                    offsetInTessFactorBuffer = 0;
+                    offset = &offsetInTessFactorBuffer;
+                    alignment = &tessFactorAlignment;
+                    tessLevelAdded = true;
+                } else {
+                    teseInBuiltins.remove(builtin.type);
+                    continue;
+                }
+            } else {
+                variable.type = QShaderDescription::VariableType::Half2;
+                binding = tessFactorBufferBinding;
+                offsetInTessFactorBuffer = 8;
+                offset = &offsetInTessFactorBuffer;
+                alignment = &tessFactorAlignment;
+            }
+            break;
+        default:
+            Q_UNREACHABLE();
+            break;
+        }
+
+        if (teseInBuiltins.contains(builtin.type)) {
+            if (binding != -1) {
+                int index = nextAttributeIndex(indices);
+                addVertexAttribute(variable, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment);
+                usedBuffers << binding;
+            } else {
+                qWarning() << "baked tessellation control shader missing output buffer binding information";
+                addUnusedVertexAttribute(variable, rhiD, *offset, *alignment);
+            }
+        } else {
+            addUnusedVertexAttribute(variable, rhiD, *offset, *alignment);
+        }
+
+        teseInBuiltins.remove(builtin.type);
+    }
+
+    for (const QShaderDescription::BuiltinVariable &builtin : teseInBuiltins) {
+        switch (builtin.type) {
+        case QShaderDescription::BuiltinType::PositionBuiltin:
+        case QShaderDescription::BuiltinType::PointSizeBuiltin:
+        case QShaderDescription::BuiltinType::ClipDistanceBuiltin:
+            qWarning() << "missing tessellation control output for tessellation evaluation builtin input:" << builtin;
+            break;
+        default:
+            break;
+        }
+    }
+
+    if (usedBuffers.contains(tescOutputBufferBinding)) {
+        vertexDesc.layouts[tescOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatchControlPoint;
+        vertexDesc.layouts[tescOutputBufferBinding].stride = aligned(offsetInTescOutput, tescOutputAlignment);
+    }
+
+    if (usedBuffers.contains(tescPatchOutputBufferBinding)) {
+        vertexDesc.layouts[tescPatchOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch;
+        vertexDesc.layouts[tescPatchOutputBufferBinding].stride = aligned(offsetInTescPatchOutput, tescPatchOutputAlignment);
+    }
+
+    if (usedBuffers.contains(tessFactorBufferBinding)) {
+        vertexDesc.layouts[tessFactorBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch;
+        vertexDesc.layouts[tessFactorBufferBinding].stride = trianglesMode ? sizeof(MTLTriangleTessellationFactorsHalf) : sizeof(MTLQuadTessellationFactorsHalf);
+    }
+
+    rpDesc.vertexDescriptor = vertexDesc;
+    rpDesc.vertexFunction = vertTese.func;
+    rpDesc.fragmentFunction = pipeline->d->fs.func;
+
+    // The portable, cross-API approach is to use CCW, the results are then
+    // identical (assuming the applied clipSpaceCorrMatrix) for all the 3D
+    // APIs. The tess.eval. GLSL shader is thus expected to specify ccw. If it
+    // doesn't, things may not work as expected.
+    rpDesc.tessellationOutputWindingOrder = toMetalTessellationWindingOrder(vertTese.desc.tessellationWindingOrder());
+
+    rpDesc.tessellationPartitionMode = toMetalTessellationPartitionMode(vertTese.desc.tessellationPartitioning());
+
+    QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, pipeline->renderPassDescriptor());
+    pipeline->setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD);
+
+    rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc);
+
+    if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+        rhiD->d->addRenderPipelineToBinaryArchive(rpDesc);
+
+    NSError *err = nil;
+    id<MTLRenderPipelineState> ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err];
     [rpDesc release];
+    if (!ps) {
+        const QString msg = QString::fromNSString(err.localizedDescription);
+        qWarning("Failed to create render pipeline state for tessellation: %s", qPrintable(msg));
+    } else {
+        // ps is stored in the QMetalGraphicsPipelineData so the end result in this
+        // regard is no different from what createVertexFragmentPipeline does
+        pipeline->d->ps = ps;
+    }
+    return ps;
+}
 
-    MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
-    dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways;
-    dsDesc.depthWriteEnabled = m_depthWrite;
-    if (m_stencilTest) {
-        dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init];
-        dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp);
-        dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp);
-        dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp);
-        dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp);
-        dsDesc.frontFaceStencil.readMask = m_stencilReadMask;
-        dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask;
+QMetalBuffer *QMetalGraphicsPipelineData::ExtraBufferManager::acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type)
+{
+    QVector<QMetalBuffer *> *workBuffers = type == WorkBufType::DeviceLocal ? &deviceLocalWorkBuffers : &hostVisibleWorkBuffers;
 
-        dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init];
-        dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp);
-        dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp);
-        dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp);
-        dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp);
-        dsDesc.backFaceStencil.readMask = m_stencilReadMask;
-        dsDesc.backFaceStencil.writeMask = m_stencilWriteMask;
+    // Check if something is reusable as-is.
+    for (QMetalBuffer *workBuf : *workBuffers) {
+        if (workBuf && workBuf->lastActiveFrameSlot == -1 && workBuf->size() >= size) {
+            workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot;
+            return workBuf;
+        }
+    }
+
+    // Once the pool is above a certain threshold, see if there is something
+    // unused (but too small) and recreate that our size.
+    if (workBuffers->count() > QMTL_FRAMES_IN_FLIGHT * 8) {
+        for (QMetalBuffer *workBuf : *workBuffers) {
+            if (workBuf && workBuf->lastActiveFrameSlot == -1) {
+                workBuf->setSize(size);
+                if (workBuf->create()) {
+                    workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot;
+                    return workBuf;
+                }
+            }
+        }
+    }
+
+    // Add a new buffer to the pool.
+    QMetalBuffer *buf;
+    if (type == WorkBufType::DeviceLocal) {
+        // for GPU->GPU data (non-slotted, not necessarily host writable)
+        buf = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size);
+    } else {
+        // for CPU->GPU (non-slotted, host writable/coherent)
+        buf = new QMetalBuffer(rhiD, QRhiBuffer::Dynamic, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size);
+    }
+    if (buf->create()) {
+        buf->lastActiveFrameSlot = rhiD->currentFrameSlot;
+        workBuffers->append(buf);
+        return buf;
+    }
+
+    qWarning("Failed to acquire work buffer of size %u", size);
+    return nullptr;
+}
+
+bool QMetalGraphicsPipeline::createTessellationPipelines(const QShader &tessVert, const QShader &tesc, const QShader &tese, const QShader &tessFrag)
+{
+    QRHI_RES_RHI(QRhiMetal);
+    QString error;
+    QByteArray entryPoint;
+    QShaderKey activeKey;
+
+    const QShaderDescription tescDesc = tesc.description();
+    const QShaderDescription teseDesc = tese.description();
+    d->tess.inControlPointCount = uint(m_patchControlPointCount);
+    d->tess.outControlPointCount = tescDesc.tessellationOutputVertexCount();
+    if (!d->tess.outControlPointCount)
+        d->tess.outControlPointCount = teseDesc.tessellationOutputVertexCount();
+
+    if (!d->tess.outControlPointCount) {
+        qWarning("Failed to determine output vertex count from the tessellation control or evaluation shader, cannot tessellate");
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+
+    if (m_multiViewCount >= 2)
+        qWarning("Multiview is not supported with tessellation");
+
+    // Now the vertex shader is a compute shader.
+    // It should have three dedicated *VertexAsComputeShader variants.
+    // What the requested variant was (Standard or Batchable) plays no role here.
+    // (the Qt Quick scenegraph does not use tessellation with its materials)
+    // Create all three versions.
+
+    bool variantsPresent[3] = {};
+    const QVector<QShaderKey> tessVertKeys = tessVert.availableShaders();
+    for (const QShaderKey &k : tessVertKeys) {
+        switch (k.sourceVariant()) {
+        case QShader::NonIndexedVertexAsComputeShader:
+            variantsPresent[0] = true;
+            break;
+        case QShader::UInt32IndexedVertexAsComputeShader:
+            variantsPresent[1] = true;
+            break;
+        case QShader::UInt16IndexedVertexAsComputeShader:
+            variantsPresent[2] = true;
+            break;
+        default:
+            break;
+        }
+    }
+    if (!(variantsPresent[0] && variantsPresent[1] && variantsPresent[2])) {
+        qWarning("Vertex shader is not prepared for Metal tessellation. Cannot tessellate. "
+                 "Perhaps the relevant variants (UInt32IndexedVertexAsComputeShader et al) were not generated? "
+                 "Try passing --msltess to qsb.");
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+
+    int varIndex = 0; // Will map NonIndexed as 0, UInt32 as 1, UInt16 as 2. Do not change this ordering.
+    for (QShader::Variant variant : {
+         QShader::NonIndexedVertexAsComputeShader,
+         QShader::UInt32IndexedVertexAsComputeShader,
+         QShader::UInt16IndexedVertexAsComputeShader })
+    {
+        id<MTLLibrary> lib = rhiD->d->createMetalLib(tessVert, variant, &error, &entryPoint, &activeKey);
+        if (!lib) {
+            qWarning("MSL shader compilation failed for vertex-as-compute shader %d: %s", int(variant), qPrintable(error));
+            d->tess.enabled = false;
+            d->tess.failed = true;
+            return false;
+        }
+        id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint);
+        if (!func) {
+            qWarning("MSL function for entry point %s not found", entryPoint.constData());
+            [lib release];
+            d->tess.enabled = false;
+            d->tess.failed = true;
+            return false;
+        }
+        QMetalShader &compVs(d->tess.compVs[varIndex]);
+        compVs.lib = lib;
+        compVs.func = func;
+        compVs.desc = tessVert.description();
+        compVs.nativeResourceBindingMap = tessVert.nativeResourceBindingMap(activeKey);
+        compVs.nativeShaderInfo = tessVert.nativeShaderInfo(activeKey);
+
+        // pre-create all three MTLComputePipelineStates
+        if (!d->tess.vsCompPipeline(rhiD, variant)) {
+            qWarning("Failed to pre-generate compute pipeline for vertex compute shader (tessellation variant %d)", int(variant));
+            d->tess.enabled = false;
+            d->tess.failed = true;
+            return false;
+        }
+
+        ++varIndex;
+    }
+
+    // Pipeline #2 is a compute that runs the tessellation control (compute) shader
+    id<MTLLibrary> tessControlLib = rhiD->d->createMetalLib(tesc, QShader::StandardShader, &error, &entryPoint, &activeKey);
+    if (!tessControlLib) {
+        qWarning("MSL shader compilation failed for tessellation control compute shader: %s", qPrintable(error));
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+    id<MTLFunction> tessControlFunc = rhiD->d->createMSLShaderFunction(tessControlLib, entryPoint);
+    if (!tessControlFunc) {
+        qWarning("MSL function for entry point %s not found", entryPoint.constData());
+        [tessControlLib release];
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+    d->tess.compTesc.lib = tessControlLib;
+    d->tess.compTesc.func = tessControlFunc;
+    d->tess.compTesc.desc = tesc.description();
+    d->tess.compTesc.nativeResourceBindingMap = tesc.nativeResourceBindingMap(activeKey);
+    d->tess.compTesc.nativeShaderInfo = tesc.nativeShaderInfo(activeKey);
+    if (!d->tess.tescCompPipeline(rhiD)) {
+        qWarning("Failed to pre-generate compute pipeline for tessellation control shader");
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
     }
 
+    // Pipeline #3 is a render pipeline with the tessellation evaluation (vertex) + the fragment shader
+    id<MTLLibrary> tessEvalLib = rhiD->d->createMetalLib(tese, QShader::StandardShader, &error, &entryPoint, &activeKey);
+    if (!tessEvalLib) {
+        qWarning("MSL shader compilation failed for tessellation evaluation vertex shader: %s", qPrintable(error));
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+    id<MTLFunction> tessEvalFunc = rhiD->d->createMSLShaderFunction(tessEvalLib, entryPoint);
+    if (!tessEvalFunc) {
+        qWarning("MSL function for entry point %s not found", entryPoint.constData());
+        [tessEvalLib release];
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+    d->tess.vertTese.lib = tessEvalLib;
+    d->tess.vertTese.func = tessEvalFunc;
+    d->tess.vertTese.desc = tese.description();
+    d->tess.vertTese.nativeResourceBindingMap = tese.nativeResourceBindingMap(activeKey);
+    d->tess.vertTese.nativeShaderInfo = tese.nativeShaderInfo(activeKey);
+
+    id<MTLLibrary> fragLib = rhiD->d->createMetalLib(tessFrag, QShader::StandardShader, &error, &entryPoint, &activeKey);
+    if (!fragLib) {
+        qWarning("MSL shader compilation failed for fragment shader: %s", qPrintable(error));
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+    id<MTLFunction> fragFunc = rhiD->d->createMSLShaderFunction(fragLib, entryPoint);
+    if (!fragFunc) {
+        qWarning("MSL function for entry point %s not found", entryPoint.constData());
+        [fragLib release];
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+    d->fs.lib = fragLib;
+    d->fs.func = fragFunc;
+    d->fs.desc = tessFrag.description();
+    d->fs.nativeShaderInfo = tessFrag.nativeShaderInfo(activeKey);
+    d->fs.nativeResourceBindingMap = tessFrag.nativeResourceBindingMap(activeKey);
+
+    if (!d->tess.teseFragRenderPipeline(rhiD, this)) {
+        qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader");
+        d->tess.enabled = false;
+        d->tess.failed = true;
+        return false;
+    }
+
+    MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init];
+    setupMetalDepthStencilDescriptor(dsDesc);
     d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc];
     [dsDesc release];
 
-    d->primitiveType = toMetalPrimitiveType(m_topology);
-    d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise;
-    d->cullMode = toMetalCullMode(m_cullMode);
-    d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode);
-    d->depthBias = float(m_depthBias);
-    d->slopeScaledDepthBias = m_slopeScaledDepthBias;
+    // no primitiveType
+    mapStates();
+
+    return true;
+}
+
+bool QMetalGraphicsPipeline::create()
+{
+    destroy(); // no early test, always invoke and leave it to destroy to decide what to clean up
+
+    QRHI_RES_RHI(QRhiMetal);
+    rhiD->pipelineCreationStart();
+    if (!rhiD->sanityCheckGraphicsPipeline(this))
+        return false;
+
+    // See if tessellation is involved. Things will be very different, if so.
+    QShader tessVert;
+    QShader tesc;
+    QShader tese;
+    QShader tessFrag;
+    for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) {
+        switch (shaderStage.type()) {
+        case QRhiShaderStage::Vertex:
+            tessVert = shaderStage.shader();
+            break;
+        case QRhiShaderStage::TessellationControl:
+            tesc = shaderStage.shader();
+            break;
+        case QRhiShaderStage::TessellationEvaluation:
+            tese = shaderStage.shader();
+            break;
+        case QRhiShaderStage::Fragment:
+            tessFrag = shaderStage.shader();
+            break;
+        default:
+            break;
+        }
+    }
+    d->tess.enabled = tesc.isValid() && tese.isValid() && m_topology == Patches && m_patchControlPointCount > 0;
+    d->tess.failed = false;
+
+    bool ok = d->tess.enabled ? createTessellationPipelines(tessVert, tesc, tese, tessFrag) : createVertexFragmentPipeline();
+    if (!ok)
+        return false;
+
+    // SPIRV-Cross buffer size buffers
+    int buffers = 0;
+    QVarLengthArray<QMetalShader *, 6> shaders;
+    if (d->tess.enabled) {
+        shaders.append(&d->tess.compVs[0]);
+        shaders.append(&d->tess.compVs[1]);
+        shaders.append(&d->tess.compVs[2]);
+        shaders.append(&d->tess.compTesc);
+        shaders.append(&d->tess.vertTese);
+    } else {
+        shaders.append(&d->vs);
+    }
+    shaders.append(&d->fs);
+
+    for (QMetalShader *shader : shaders) {
+        if (shader->nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
+            const int binding = shader->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
+            shader->nativeResourceBindingMap[binding] = qMakePair(binding, -1);
+            int maxNativeBinding = 0;
+            for (const QShaderDescription::StorageBlock &block : shader->desc.storageBlocks())
+                maxNativeBinding = qMax(maxNativeBinding, shader->nativeResourceBindingMap[block.binding].first);
+
+            // we use one buffer to hold data for all graphics shader stages, each with a different offset.
+            // buffer offsets must be 32byte aligned - adjust buffer count accordingly
+            buffers += ((maxNativeBinding + 1 + 7) / 8) * 8;
+        }
+    }
+
+    if (buffers) {
+        if (!d->bufferSizeBuffer)
+            d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
+
+        d->bufferSizeBuffer->setSize(buffers * sizeof(int));
+        d->bufferSizeBuffer->create();
+    }
 
     rhiD->pipelineCreationEnd();
     lastActiveFrameSlot = -1;
@@ -3982,6 +5912,9 @@ void QMetalComputePipeline::destroy()
     if (!d->ps)
         return;
 
+    delete d->bufferSizeBuffer;
+    d->bufferSizeBuffer = nullptr;
+
     QRhiMetalData::DeferredReleaseEntry e;
     e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline;
     e.lastActiveFrameSlot = lastActiveFrameSlot;
@@ -3995,6 +5928,29 @@ void QMetalComputePipeline::destroy()
     }
 }
 
+void QRhiMetalData::trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
+{
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        if (binArch)  {
+            NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil];
+            cpDesc.binaryArchives = binArchArray;
+        }
+    }
+}
+
+void QRhiMetalData::addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc)
+{
+    if (@available(macOS 11.0, iOS 14.0, *)) {
+        if (binArch) {
+            NSError *err = nil;
+            if (![binArch addComputePipelineFunctionsWithDescriptor: cpDesc error: &err]) {
+                const QString msg = QString::fromNSString(err.localizedDescription);
+                qWarning("Failed to collect compute pipeline functions to binary archive: %s", qPrintable(msg));
+            }
+        }
+    }
+}
+
 bool QMetalComputePipeline::create()
 {
     if (d->ps)
@@ -4027,6 +5983,14 @@ bool QMetalComputePipeline::create()
         d->cs.func = func;
         d->cs.localSize = shader.description().computeShaderLocalSize();
         d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey);
+        d->cs.desc = shader.description();
+        d->cs.nativeShaderInfo = shader.nativeShaderInfo(activeKey);
+
+        // SPIRV-Cross buffer size buffers
+        if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
+            const int binding = d->cs.nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding];
+            d->cs.nativeResourceBindingMap[binding] = qMakePair(binding, -1);
+        }
 
         if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) {
             for (QMetalShader &s : rhiD->d->shaderCache)
@@ -4041,14 +6005,41 @@ bool QMetalComputePipeline::create()
 
     d->localSize = MTLSizeMake(d->cs.localSize[0], d->cs.localSize[1], d->cs.localSize[2]);
 
+    MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new];
+    cpDesc.computeFunction = d->cs.func;
+
+    rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc);
+
+    if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave))
+        rhiD->d->addComputePipelineToBinaryArchive(cpDesc);
+
     NSError *err = nil;
-    d->ps = [rhiD->d->dev newComputePipelineStateWithFunction: d->cs.func error: &err];
+    d->ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc
+            options: MTLPipelineOptionNone
+            reflection: nil
+            error: &err];
+    [cpDesc release];
     if (!d->ps) {
         const QString msg = QString::fromNSString(err.localizedDescription);
-        qWarning("Failed to create render pipeline state: %s", qPrintable(msg));
+        qWarning("Failed to create compute pipeline state: %s", qPrintable(msg));
         return false;
     }
 
+    // SPIRV-Cross buffer size buffers
+    if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) {
+        int buffers = 0;
+        for (const QShaderDescription::StorageBlock &block : d->cs.desc.storageBlocks())
+            buffers = qMax(buffers, d->cs.nativeResourceBindingMap[block.binding].first);
+
+        buffers += 1;
+
+        if (!d->bufferSizeBuffer)
+            d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int));
+
+        d->bufferSizeBuffer->setSize(buffers * sizeof(int));
+        d->bufferSizeBuffer->create();
+    }
+
     rhiD->pipelineCreationEnd();
     lastActiveFrameSlot = -1;
     generation += 1;
@@ -4081,10 +6072,12 @@ const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles()
     return &nativeHandlesStruct;
 }
 
-void QMetalCommandBuffer::resetState()
+void QMetalCommandBuffer::resetState(double lastGpuTime)
 {
+    d->lastGpuTime = lastGpuTime;
     d->currentRenderPassEncoder = nil;
     d->currentComputePassEncoder = nil;
+    d->tessellationComputeEncoder = nil;
     d->currentPassRpDesc = nil;
     resetPerPassState();
 }
@@ -4146,8 +6139,7 @@ void QMetalSwapChain::destroy()
     for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
         if (d->sem[i]) {
             // the semaphores cannot be released if they do not have the initial value
-            dispatch_semaphore_wait(d->sem[i], DISPATCH_TIME_FOREVER);
-            dispatch_semaphore_signal(d->sem[i]);
+            waitUntilCompleted(i);
 
             dispatch_release(d->sem[i]);
             d->sem[i] = nullptr;
@@ -4159,7 +6151,14 @@ void QMetalSwapChain::destroy()
         d->msaaTex[i] = nil;
     }
 
+#ifdef Q_OS_MACOS
+    d->liveResizeStartObserver.remove();
+    d->liveResizeEndObserver.remove();
+    d->liveResizeObserverSet = false;
+#endif
+
     d->layer = nullptr;
+    m_proxyData = {};
 
     [d->curDrawable release];
     d->curDrawable = nil;
@@ -4181,6 +6180,9 @@ QRhiRenderTarget *QMetalSwapChain::currentFrameRenderTarget()
     return &rtWrapper;
 }
 
+// view.layer should ideally be called on the main thread, otherwise the UI
+// Thread Checker in Xcode drops a warning. Hence trying to proxy it through
+// QRhiSwapChainProxyData instead of just calling this function directly.
 static inline CAMetalLayer *layerForWindow(QWindow *window)
 {
     Q_ASSERT(window);
@@ -4193,29 +6195,51 @@ static inline CAMetalLayer *layerForWindow(QWindow *window)
     return static_cast<CAMetalLayer *>(view.layer);
 }
 
+// If someone calls this, it is hopefully from the main thread, and they will
+// then set the returned data on the QRhiSwapChain, so it won't need to query
+// the layer on its own later on.
+QRhiSwapChainProxyData QRhiMetal::updateSwapChainProxyData(QWindow *window)
+{
+    QRhiSwapChainProxyData d;
+    d.reserved[0] = layerForWindow(window);
+    return d;
+}
+
 QSize QMetalSwapChain::surfacePixelSize()
 {
     Q_ASSERT(m_window);
     CAMetalLayer *layer = d->layer;
     if (!layer)
-        layer = layerForWindow(m_window);
+        layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, m_window, QRhi::Metal, 0);
 
-    CGSize layerSize = layer.bounds.size;
-    layerSize.width *= layer.contentsScale;
-    layerSize.height *= layer.contentsScale;
-    return QSizeF::fromCGSize(layerSize).toSize();
+    Q_ASSERT(layer);
+    int height = (int)layer.bounds.size.height;
+    int width = (int)layer.bounds.size.width;
+    width *= layer.contentsScale;
+    height *= layer.contentsScale;
+    return QSize(width, height);
 }
 
 bool QMetalSwapChain::isFormatSupported(Format f)
 {
-#ifdef Q_OS_MACOS
-    return f == SDR || f == HDRExtendedSrgbLinear;
-#endif
+    if (f == HDRExtendedSrgbLinear) {
+        if (@available(macOS 10.11, iOS 16.0, *))
+            return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f;
+        else
+            return false;
+    } else if (f == HDRExtendedDisplayP3Linear) {
+        if (@available(macOS 11.0, iOS 14.0, *))
+            return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f;
+        else
+            return false;
+    }
     return f == SDR;
 }
 
 QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor()
 {
+    QRHI_RES_RHI(QRhiMetal);
+
     chooseFormats(); // ensure colorFormat and similar are filled out
 
     QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi);
@@ -4226,7 +6250,6 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor()
 
 #ifdef Q_OS_MACOS
     // m_depthStencil may not be built yet so cannot rely on computed fields in it
-    QRHI_RES_RHI(QRhiMetal);
     rpD->dsFormat = rhiD->d->dev.depth24Stencil8PixelFormatSupported
             ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8;
 #else
@@ -4234,6 +6257,8 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor()
 #endif
 
     rpD->updateSerializedFormat();
+
+    rhiD->registerResource(rpD, false);
     return rpD;
 }
 
@@ -4242,7 +6267,7 @@ void QMetalSwapChain::chooseFormats()
     QRHI_RES_RHI(QRhiMetal);
     samples = rhiD->effectiveSampleCount(m_sampleCount);
     // pick a format that is allowed for CAMetalLayer.pixelFormat
-    if (m_format == HDRExtendedSrgbLinear) {
+    if (m_format == HDRExtendedSrgbLinear || m_format == HDRExtendedDisplayP3Linear) {
         d->colorFormat = MTLPixelFormatRGBA16Float;
         d->rhiColorFormat = QRhiTexture::RGBA16F;
         return;
@@ -4251,6 +6276,17 @@ void QMetalSwapChain::chooseFormats()
     d->rhiColorFormat = QRhiTexture::BGRA8;
 }
 
+void QMetalSwapChain::waitUntilCompleted(int slot)
+{
+    // wait+signal is the general pattern to ensure the commands for a
+    // given frame slot have completed (if sem is 1, we go 0 then 1; if
+    // sem is 0 we go -1, block, completion increments to 0, then us to 1)
+
+    dispatch_semaphore_t sem = d->sem[slot];
+    dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
+    dispatch_semaphore_signal(sem);
+}
+
 bool QMetalSwapChain::createOrResize()
 {
     Q_ASSERT(m_window);
@@ -4272,19 +6308,24 @@ bool QMetalSwapChain::createOrResize()
         return false;
     }
 
-    d->layer = layerForWindow(window);
+    d->layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, window, QRhi::Metal, 0);
     Q_ASSERT(d->layer);
 
     chooseFormats();
     if (d->colorFormat != d->layer.pixelFormat)
         d->layer.pixelFormat = d->colorFormat;
-#ifdef Q_OS_MACOS
-    // Can't enable this on iOS until wantsExtendedDynamicRangeContent is available
+
     if (m_format == HDRExtendedSrgbLinear) {
-        d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB);
-        d->layer.wantsExtendedDynamicRangeContent = YES;
+        if (@available(macOS 10.11, iOS 16.0, *)) {
+            d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB);
+            d->layer.wantsExtendedDynamicRangeContent = YES;
+        }
+    } else if (m_format == HDRExtendedDisplayP3Linear) {
+        if (@available(macOS 11.0, iOS 16.0, *)) {
+            d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3);
+            d->layer.wantsExtendedDynamicRangeContent = YES;
+        }
     }
-#endif
 
     if (m_flags.testFlag(UsedAsTransferSource))
         d->layer.framebufferOnly = NO;
@@ -4309,9 +6350,12 @@ bool QMetalSwapChain::createOrResize()
     // Now set the layer's drawableSize which will stay set to the same value
     // until the next createOrResize(), thus ensuring atomicity with regards to
     // the drawable size in frames.
-    CGSize layerSize = d->layer.bounds.size;
-    layerSize.width *= d->layer.contentsScale;
-    layerSize.height *= d->layer.contentsScale;
+    int width = (int)d->layer.bounds.size.width;
+    int height = (int)d->layer.bounds.size.height;
+    CGSize layerSize = CGSizeMake(width, height);
+    const float scaleFactor = d->layer.contentsScale;
+    layerSize.width *= scaleFactor;
+    layerSize.height *= scaleFactor;
     d->layer.drawableSize = layerSize;
 
     m_currentPixelSize = QSizeF::fromCGSize(layerSize).toSize();
@@ -4319,10 +6363,39 @@ bool QMetalSwapChain::createOrResize()
 
     [d->layer setDevice: rhiD->d->dev];
 
+#ifdef Q_OS_MACOS
+    // Can only use presentsWithTransaction (to get smooth resizing) when
+    // presenting from the main (gui) thread. We predict that based on the
+    // thread this function is called on since if the QRhiSwapChain is
+    // initialied on a given thread then that's almost certainly the thread on
+    // which the QRhi renders and presents.
+    const bool canUsePresentsWithTransaction = NSThread.isMainThread;
+
+    // Have an env.var. just in case it turns out presentsWithTransaction is
+    // not desired in some specific case.
+    static bool allowPresentsWithTransaction = !qEnvironmentVariableIntValue("QT_MTL_NO_TRANSACTION");
+
+    if (allowPresentsWithTransaction && canUsePresentsWithTransaction && !d->liveResizeObserverSet) {
+        d->liveResizeObserverSet = true;
+        NSView *view = reinterpret_cast<NSView *>(window->winId());
+        NSWindow *window = view.window;
+        if (window) {
+            qCDebug(QRHI_LOG_INFO, "will set presentsWithTransaction during live resize");
+            d->liveResizeStartObserver = QMacNotificationObserver(window, NSWindowWillStartLiveResizeNotification, [this] {
+                d->layer.presentsWithTransaction = true;
+            });
+            d->liveResizeEndObserver = QMacNotificationObserver(window, NSWindowDidEndLiveResizeNotification, [this] {
+                d->layer.presentsWithTransaction = false;
+            });
+        }
+    }
+#endif
+
     [d->curDrawable release];
     d->curDrawable = nil;
 
     for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) {
+        d->lastGpuTime[i] = 0;
         if (!d->sem[i])
             d->sem[i] = dispatch_semaphore_create(QMTL_FRAMES_IN_FLIGHT - 1);
     }
@@ -4350,12 +6423,13 @@ bool QMetalSwapChain::createOrResize()
 
     rtWrapper.setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget
     rtWrapper.d->pixelSize = pixelSize;
-    rtWrapper.d->dpr = float(window->devicePixelRatio());
+    rtWrapper.d->dpr = scaleFactor;
     rtWrapper.d->sampleCount = samples;
     rtWrapper.d->colorAttCount = 1;
     rtWrapper.d->dsAttCount = ds ? 1 : 0;
 
-    qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, size %dx%d", pixelSize.width(), pixelSize.height());
+    qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, pixel size %dx%d (scale %.2f)",
+            pixelSize.width(), pixelSize.height(), scaleFactor);
 
     if (samples > 1) {
         MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init];
@@ -4384,21 +6458,28 @@ QRhiSwapChainHdrInfo QMetalSwapChain::hdrInfo()
 {
     QRhiSwapChainHdrInfo info;
     info.limitsType = QRhiSwapChainHdrInfo::ColorComponentValue;
-    if (m_format == SDR) {
-        info.limits.colorComponentValue.maxColorComponentValue = 1;
-        return info;
-    }
+    info.limits.colorComponentValue.maxColorComponentValue = 1;
+    info.limits.colorComponentValue.maxPotentialColorComponentValue = 1;
+    info.luminanceBehavior = QRhiSwapChainHdrInfo::DisplayReferred; // 1.0 = SDR white
+    info.sdrWhiteLevel = 200; // typical value, but dummy (don't know the real one); won't matter due to being display-referred
 
-#ifdef Q_OS_MACOS
-    info.isHardCodedDefaults = false;
-    NSView *view = reinterpret_cast<NSView *>(window->winId());
-    info.limits.colorComponentValue.maxColorComponentValue = view.window.screen.maximumExtendedDynamicRangeColorComponentValue;
-#else
-    // ### Fixme: Maybe retrieve the brightness from the screen and if we're not at full brightness we might be able to do more.
-    // For now, assume 2, in line with iPhone 12 specs that claim 625 nits max brightness and 1200 nits max HDR brightness.
-    info.isHardCodedDefaults = true;
-    info.limits.colorComponentValue.maxColorComponentValue = 2;
+    if (m_window) {
+        // Must use m_window, not window, given this may be called before createOrResize().
+#if defined(Q_OS_MACOS)
+        NSView *view = reinterpret_cast<NSView *>(m_window->winId());
+        NSScreen *screen = view.window.screen;
+        info.limits.colorComponentValue.maxColorComponentValue = screen.maximumExtendedDynamicRangeColorComponentValue;
+        info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.maximumPotentialExtendedDynamicRangeColorComponentValue;
+#elif defined(Q_OS_IOS)
+        if (@available(iOS 16.0, *)) {
+            UIView *view = reinterpret_cast<UIView *>(m_window->winId());
+            UIScreen *screen = view.window.windowScene.screen;
+            info.limits.colorComponentValue.maxColorComponentValue = view.window.windowScene.screen.currentEDRHeadroom;
+            info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.potentialEDRHeadroom;
+        }
 #endif
+    }
+
     return info;
 }