diff options
Diffstat (limited to 'src/gui/rhi/qrhimetal.mm')
-rw-r--r-- | src/gui/rhi/qrhimetal.mm | 3627 |
1 files changed, 2985 insertions, 642 deletions
diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm index 5219b5c71d..9fadfc15fa 100644 --- a/src/gui/rhi/qrhimetal.mm +++ b/src/gui/rhi/qrhimetal.mm @@ -1,46 +1,18 @@ -/**************************************************************************** -** -** Copyright (C) 2019 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the Qt Gui module -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "qrhimetal_p_p.h" +// Copyright (C) 2023 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#include "qrhimetal_p.h" +#include "qshader_p.h" #include <QGuiApplication> #include <QWindow> +#include <QUrl> +#include <QFile> +#include <QTemporaryFile> +#include <QFileInfo> #include <qmath.h> +#include <QOperatingSystemVersion> + +#include <QtCore/private/qcore_mac_p.h> #ifdef Q_OS_MACOS #include <AppKit/AppKit.h> @@ -68,19 +40,28 @@ QT_BEGIN_NAMESPACE #error ARC not supported #endif -// Note: we expect everything here pass the Metal API validation when running -// in Debug mode in XCode. Some of the issues that break validation are not -// obvious and not visible when running outside XCode. -// -// An exception is the nextDrawable Called Early blah blah warning, which is -// plain and simply false. +// Even though the macOS 13 MTLBinaryArchive problem (QTBUG-106703) seems +// to be solved in later 13.x releases, we have reports from old Intel hardware +// and older macOS versions where this causes problems (QTBUG-114338). +// Thus we no longer do OS version based differentiation, but rather have a +// single toggle that is currently on, and so QRhi::(set)pipelineCache() +// does nothing with Metal. +#define QRHI_METAL_DISABLE_BINARY_ARCHIVE + +// We should be able to operate with command buffers that do not automatically +// retain/release the resources used by them. (since we have logic that mirrors +// other backends such as the Vulkan one anyway) +#define QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES /*! \class QRhiMetalInitParams \inmodule QtRhi - \internal + \since 6.6 \brief Metal specific initialization parameters. + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. + A Metal-based QRhi needs no special parameters for initialization. \badcode @@ -88,10 +69,13 @@ QT_BEGIN_NAMESPACE rhi = QRhi::create(QRhi::Metal, ¶ms); \endcode - \note Metal API validation cannot be enabled by the application. Instead, - run the debug build of the application in XCode. Generating a - \c{.xcodeproj} file via \c{qmake -spec macx-xcode} provides a convenient - way to enable this. + \note Metal API validation cannot be enabled programmatically by the QRhi. + Instead, either run the debug build of the application in XCode, by + generating a \c{.xcodeproj} file via \c{cmake -G Xcode}, or set the + environment variable \c{METAL_DEVICE_WRAPPER_TYPE=1}. The variable needs to + be set early on in the environment, perferably before starting the process; + attempting to set it at QRhi creation time is not functional in practice. + (too late probably) \note QRhiSwapChain can only target QWindow instances that have their surface type set to QSurface::MetalSurface. @@ -110,14 +94,30 @@ QT_BEGIN_NAMESPACE /*! \class QRhiMetalNativeHandles \inmodule QtRhi - \internal + \since 6.6 \brief Holds the Metal device used by the QRhi. + + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. */ /*! + \variable QRhiMetalNativeHandles::dev + + Set to a valid MTLDevice to import an existing device. +*/ + +/*! + \variable QRhiMetalNativeHandles::cmdQueue + + Set to a valid MTLCommandQueue when importing an existing command queue. + When \nullptr, QRhi will create a new command queue. +*/ + +/*! \class QRhiMetalCommandBufferNativeHandles \inmodule QtRhi - \internal + \since 6.6 \brief Holds the MTLCommandBuffer and MTLRenderCommandEncoder objects that are backing a QRhiCommandBuffer. \note The command buffer object is only guaranteed to be valid while @@ -129,14 +129,28 @@ QT_BEGIN_NAMESPACE \note The command encoder is only valid while recording a pass, that is, between \l{QRhiCommandBuffer::beginPass()} - \l{QRhiCommandBuffer::endPass()}. + + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. */ +/*! + \variable QRhiMetalCommandBufferNativeHandles::commandBuffer +*/ + +/*! + \variable QRhiMetalCommandBufferNativeHandles::encoder +*/ + struct QMetalShader { id<MTLLibrary> lib = nil; id<MTLFunction> func = nil; - std::array<uint, 3> localSize; + std::array<uint, 3> localSize = {}; + uint outputVertexCount = 0; + QShaderDescription desc; QShader::NativeResourceBindingMap nativeResourceBindingMap; + QShader::NativeShaderInfo nativeShaderInfo; void destroy() { nativeResourceBindingMap.clear(); @@ -149,11 +163,14 @@ struct QMetalShader struct QRhiMetalData { - QRhiMetalData(QRhiImplementation *rhi) : ofr(rhi) { } + QRhiMetalData(QRhiMetal *rhi) : q(rhi), ofr(rhi) { } + QRhiMetal *q; id<MTLDevice> dev = nil; id<MTLCommandQueue> cmdQueue = nil; + API_AVAILABLE(macosx(11.0), ios(14.0)) id<MTLBinaryArchive> binArch = nil; + id<MTLCommandBuffer> newCommandBuffer(); MTLRenderPassDescriptor *createDefaultRenderPass(bool hasDepthStencil, const QColor &colorClearValue, const QRhiDepthStencilClearValue &depthStencilClearValue, @@ -161,6 +178,11 @@ struct QRhiMetalData id<MTLLibrary> createMetalLib(const QShader &shader, QShader::Variant shaderVariant, QString *error, QByteArray *entryPoint, QShaderKey *activeKey); id<MTLFunction> createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint); + bool setupBinaryArchive(NSURL *sourceFileUrl = nil); + void addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc); + void trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc); + void addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc); + void trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc); struct DeferredReleaseEntry { enum Type { @@ -168,7 +190,9 @@ struct QRhiMetalData RenderBuffer, Texture, Sampler, - StagingBuffer + StagingBuffer, + GraphicsPipeline, + ComputePipeline }; Type type; int lastActiveFrameSlot; // -1 if not used otherwise 0..FRAMES_IN_FLIGHT-1 @@ -190,6 +214,15 @@ struct QRhiMetalData struct { id<MTLBuffer> buffer; } stagingBuffer; + struct { + id<MTLRenderPipelineState> pipelineState; + id<MTLDepthStencilState> depthStencilState; + std::array<id<MTLComputePipelineState>, 3> tessVertexComputeState; + id<MTLComputePipelineState> tessTessControlComputeState; + } graphicsPipeline; + struct { + id<MTLComputePipelineState> pipelineState; + } computePipeline; }; }; QVector<DeferredReleaseEntry> releaseQueue; @@ -197,6 +230,7 @@ struct QRhiMetalData struct OffscreenFrame { OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { } bool active = false; + double lastGpuTime = 0; QMetalCommandBuffer cbWrapper; } ofr; @@ -211,6 +245,17 @@ struct QRhiMetalData }; QVarLengthArray<TextureReadback, 2> activeTextureReadbacks; + struct BufferReadback + { + int activeFrameSlot = -1; + QRhiReadbackResult *result; + quint32 offset; + quint32 readSize; + id<MTLBuffer> buf; + }; + + QVarLengthArray<BufferReadback, 2> activeBufferReadbacks; + MTLCaptureManager *captureMgr; id<MTLCaptureScope> captureScope = nil; @@ -228,7 +273,7 @@ struct QMetalBufferData bool slotted; id<MTLBuffer> buf[QMTL_FRAMES_IN_FLIGHT]; struct BufferUpdate { - int offset; + quint32 offset; QRhiBufferData data; }; QVarLengthArray<BufferUpdate, 16> pendingUpdates[QMTL_FRAMES_IN_FLIGHT]; @@ -259,15 +304,45 @@ struct QMetalSamplerData id<MTLSamplerState> samplerState = nil; }; +struct QMetalShaderResourceBindingsData { + struct Stage { + struct Buffer { + int nativeBinding; + id<MTLBuffer> mtlbuf; + quint32 offset; + }; + struct Texture { + int nativeBinding; + id<MTLTexture> mtltex; + }; + struct Sampler { + int nativeBinding; + id<MTLSamplerState> mtlsampler; + }; + QVarLengthArray<Buffer, 8> buffers; + QVarLengthArray<Texture, 8> textures; + QVarLengthArray<Sampler, 8> samplers; + QRhiBatchedBindings<id<MTLBuffer> > bufferBatches; + QRhiBatchedBindings<NSUInteger> bufferOffsetBatches; + QRhiBatchedBindings<id<MTLTexture> > textureBatches; + QRhiBatchedBindings<id<MTLSamplerState> > samplerBatches; + } res[QRhiMetal::SUPPORTED_STAGES]; + enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2, TESSCTRL = 3, TESSEVAL = 4 }; +}; + struct QMetalCommandBufferData { id<MTLCommandBuffer> cb; + double lastGpuTime = 0; id<MTLRenderCommandEncoder> currentRenderPassEncoder; id<MTLComputeCommandEncoder> currentComputePassEncoder; + id<MTLComputeCommandEncoder> tessellationComputeEncoder; MTLRenderPassDescriptor *currentPassRpDesc; int currentFirstVertexBinding; QRhiBatchedBindings<id<MTLBuffer> > currentVertexInputsBuffers; QRhiBatchedBindings<NSUInteger> currentVertexInputOffsets; + id<MTLDepthStencilState> currentDepthStencilState; + QMetalShaderResourceBindingsData currentShaderResourceBindingState; }; struct QMetalRenderTargetData @@ -293,22 +368,77 @@ struct QMetalRenderTargetData struct { ColorAtt colorAtt[QMetalRenderPassDescriptor::MAX_COLOR_ATTACHMENTS]; id<MTLTexture> dsTex = nil; + id<MTLTexture> dsResolveTex = nil; bool hasStencil = false; bool depthNeedsStore = false; + bool preserveColor = false; + bool preserveDs = false; } fb; + + QRhiRenderTargetAttachmentTracker::ResIdList currentResIdList; }; struct QMetalGraphicsPipelineData { + QMetalGraphicsPipeline *q = nullptr; id<MTLRenderPipelineState> ps = nil; id<MTLDepthStencilState> ds = nil; MTLPrimitiveType primitiveType; MTLWinding winding; MTLCullMode cullMode; + MTLTriangleFillMode triangleFillMode; float depthBias; float slopeScaledDepthBias; QMetalShader vs; QMetalShader fs; + struct ExtraBufferManager { + enum class WorkBufType { + DeviceLocal, + HostVisible + }; + QMetalBuffer *acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type = WorkBufType::DeviceLocal); + QVector<QMetalBuffer *> deviceLocalWorkBuffers; + QVector<QMetalBuffer *> hostVisibleWorkBuffers; + } extraBufMgr; + struct Tessellation { + QMetalGraphicsPipelineData *q = nullptr; + bool enabled = false; + bool failed = false; + uint inControlPointCount; + uint outControlPointCount; + QMetalShader compVs[3]; + std::array<id<MTLComputePipelineState>, 3> vertexComputeState = {}; + id<MTLComputePipelineState> tessControlComputeState = nil; + QMetalShader compTesc; + QMetalShader vertTese; + quint32 vsCompOutputBufferSize(quint32 vertexOrIndexCount, quint32 instanceCount) const + { + // max vertex output components = resourceLimit(MaxVertexOutputs) * 4 = 60 + return vertexOrIndexCount * instanceCount * sizeof(float) * 60; + } + quint32 tescCompOutputBufferSize(quint32 patchCount) const + { + return outControlPointCount * patchCount * sizeof(float) * 60; + } + quint32 tescCompPatchOutputBufferSize(quint32 patchCount) const + { + // assume maxTessellationControlPerPatchOutputComponents is 128 + return patchCount * sizeof(float) * 128; + } + quint32 patchCountForDrawCall(quint32 vertexOrIndexCount, quint32 instanceCount) const + { + return ((vertexOrIndexCount + inControlPointCount - 1) / inControlPointCount) * instanceCount; + } + static int vsCompVariantToIndex(QShader::Variant vertexCompVariant); + id<MTLComputePipelineState> vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant); + id<MTLComputePipelineState> tescCompPipeline(QRhiMetal *rhiD); + id<MTLRenderPipelineState> teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline); + } tess; + void setupVertexInputDescriptor(MTLVertexDescriptor *desc); + void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc); + + // SPIRV-Cross buffer size buffers + QMetalBuffer *bufferSizeBuffer = nullptr; }; struct QMetalComputePipelineData @@ -316,6 +446,9 @@ struct QMetalComputePipelineData id<MTLComputePipelineState> ps = nil; QMetalShader cs; MTLSize localSize; + + // SPIRV-Cross buffer size buffers + QMetalBuffer *bufferSizeBuffer = nullptr; }; struct QMetalSwapChainData @@ -323,10 +456,16 @@ struct QMetalSwapChainData CAMetalLayer *layer = nullptr; id<CAMetalDrawable> curDrawable = nil; dispatch_semaphore_t sem[QMTL_FRAMES_IN_FLIGHT]; + double lastGpuTime[QMTL_FRAMES_IN_FLIGHT]; MTLRenderPassDescriptor *rp = nullptr; id<MTLTexture> msaaTex[QMTL_FRAMES_IN_FLIGHT]; QRhiTexture::Format rhiColorFormat; MTLPixelFormat colorFormat; +#ifdef Q_OS_MACOS + bool liveResizeObserverSet = false; + QMacNotificationObserver liveResizeStartObserver; + QMacNotificationObserver liveResizeEndObserver; +#endif }; QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *importDevice) @@ -337,7 +476,7 @@ QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *import importedDevice = importDevice != nullptr; if (importedDevice) { - if (d->dev) { + if (importDevice->dev) { d->dev = (id<MTLDevice>) importDevice->dev; importedCmdQueue = importDevice->cmdQueue != nullptr; if (importedCmdQueue) @@ -360,9 +499,55 @@ inline Int aligned(Int v, Int byteAlign) return (v + byteAlign - 1) & ~(byteAlign - 1); } +bool QRhiMetal::probe(QRhiMetalInitParams *params) +{ + Q_UNUSED(params); + id<MTLDevice> dev = MTLCreateSystemDefaultDevice(); + if (dev) { + [dev release]; + return true; + } + return false; +} + +id<MTLCommandBuffer> QRhiMetalData::newCommandBuffer() +{ +#ifdef QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES + // Do not let the command buffer mess with the refcount of objects. We do + // have a proper render loop and will manage lifetimes similarly to other + // backends (Vulkan). + return [cmdQueue commandBufferWithUnretainedReferences]; +#else + return [cmdQueue commandBuffer]; +#endif +} + +bool QRhiMetalData::setupBinaryArchive(NSURL *sourceFileUrl) +{ +#ifdef QRHI_METAL_DISABLE_BINARY_ARCHIVE + return false; +#endif + + if (@available(macOS 11.0, iOS 14.0, *)) { + [binArch release]; + MTLBinaryArchiveDescriptor *binArchDesc = [MTLBinaryArchiveDescriptor new]; + binArchDesc.url = sourceFileUrl; + NSError *err = nil; + binArch = [dev newBinaryArchiveWithDescriptor: binArchDesc error: &err]; + [binArchDesc release]; + if (!binArch) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("newBinaryArchiveWithDescriptor failed: %s", qPrintable(msg)); + return false; + } + return true; + } + return false; +} + bool QRhiMetal::create(QRhi::Flags flags) { - Q_UNUSED(flags); + rhiFlags = flags; if (importedDevice) [d->dev retain]; @@ -377,10 +562,12 @@ bool QRhiMetal::create(QRhi::Flags flags) const QString deviceName = QString::fromNSString([d->dev name]); qCDebug(QRHI_LOG_INFO, "Metal device: %s", qPrintable(deviceName)); driverInfoStruct.deviceName = deviceName.toUtf8(); - driverInfoStruct.deviceId = [d->dev registryID]; -#ifdef Q_OS_IOS - driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice; -#else + + // deviceId and vendorId stay unset for now. Note that registryID is not + // suitable as deviceId because it does not seem stable on macOS and can + // apparently change when the system is rebooted. + +#ifdef Q_OS_MACOS if (@available(macOS 10.15, *)) { const MTLDeviceLocation deviceLocation = [d->dev location]; switch (deviceLocation) { @@ -397,8 +584,14 @@ bool QRhiMetal::create(QRhi::Flags flags) break; } } +#else + driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice; #endif + const QOperatingSystemVersion ver = QOperatingSystemVersion::current(); + osMajor = ver.majorVersion(); + osMinor = ver.minorVersion(); + if (importedCmdQueue) [d->cmdQueue retain]; else @@ -415,12 +608,17 @@ bool QRhiMetal::create(QRhi::Flags flags) #if defined(Q_OS_MACOS) caps.maxTextureSize = 16384; caps.baseVertexAndInstance = true; + if (@available(macOS 10.15, *)) + caps.isAppleGPU = [d->dev supportsFamily:MTLGPUFamilyApple7]; + caps.maxThreadGroupSize = 1024; + caps.multiView = true; #elif defined(Q_OS_TVOS) if ([d->dev supportsFeatureSet: MTLFeatureSet(30003)]) // MTLFeatureSet_tvOS_GPUFamily2_v1 caps.maxTextureSize = 16384; else caps.maxTextureSize = 8192; caps.baseVertexAndInstance = false; + caps.isAppleGPU = true; #elif defined(Q_OS_IOS) // welcome to feature set hell if ([d->dev supportsFeatureSet: MTLFeatureSet(16)] // MTLFeatureSet_iOS_GPUFamily5_v1 @@ -438,6 +636,13 @@ bool QRhiMetal::create(QRhi::Flags flags) caps.maxTextureSize = 4096; caps.baseVertexAndInstance = false; } + caps.isAppleGPU = true; + if (@available(iOS 13, *)) { + if ([d->dev supportsFamily: MTLGPUFamilyApple4]) + caps.maxThreadGroupSize = 1024; + if ([d->dev supportsFamily: MTLGPUFamilyApple5]) + caps.multiView = true; + } #endif caps.supportedSampleCounts = { 1 }; @@ -446,6 +651,9 @@ bool QRhiMetal::create(QRhi::Flags flags) caps.supportedSampleCounts.append(sampleCount); } + if (rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + d->setupBinaryArchive(); + nativeHandlesStruct.dev = (MTLDevice *) d->dev; nativeHandlesStruct.cmdQueue = (MTLCommandQueue *) d->cmdQueue; @@ -464,6 +672,11 @@ void QRhiMetal::destroy() [d->captureScope release]; d->captureScope = nil; + if (@available(macOS 11.0, iOS 14.0, *)) { + [d->binArch release]; + d->binArch = nil; + } + [d->cmdQueue release]; if (!importedCmdQueue) d->cmdQueue = nil; @@ -478,23 +691,12 @@ QVector<int> QRhiMetal::supportedSampleCounts() const return caps.supportedSampleCounts; } -int QRhiMetal::effectiveSampleCount(int sampleCount) const -{ - // Stay compatible with QSurfaceFormat and friends where samples == 0 means the same as 1. - const int s = qBound(1, sampleCount, 64); - if (!supportedSampleCounts().contains(s)) { - qWarning("Attempted to set unsupported sample count %d", sampleCount); - return 1; - } - return s; -} - QRhiSwapChain *QRhiMetal::createSwapChain() { return new QMetalSwapChain(this); } -QRhiBuffer *QRhiMetal::createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, int size) +QRhiBuffer *QRhiMetal::createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, quint32 size) { return new QMetalBuffer(this, type, usage, size); } @@ -537,16 +739,32 @@ bool QRhiMetal::isTextureFormatSupported(QRhiTexture::Format format, QRhiTexture { Q_UNUSED(flags); + bool supportsFamilyMac2 = false; // needed for BC* formats + bool supportsFamilyApple3 = false; + #ifdef Q_OS_MACOS - if (format >= QRhiTexture::ETC2_RGB8 && format <= QRhiTexture::ETC2_RGBA8) - return false; - if (format >= QRhiTexture::ASTC_4x4 && format <= QRhiTexture::ASTC_12x12) - return false; + supportsFamilyMac2 = true; + if (caps.isAppleGPU) + supportsFamilyApple3 = true; #else - if (format >= QRhiTexture::BC1 && format <= QRhiTexture::BC7) - return false; + supportsFamilyApple3 = true; #endif + // BC5 is not available for any Apple hardare + if (format == QRhiTexture::BC5) + return false; + + if (!supportsFamilyApple3) { + if (format >= QRhiTexture::ETC2_RGB8 && format <= QRhiTexture::ETC2_RGBA8) + return false; + if (format >= QRhiTexture::ASTC_4x4 && format <= QRhiTexture::ASTC_12x12) + return false; + } + + if (!supportsFamilyMac2) + if (format >= QRhiTexture::BC1 && format <= QRhiTexture::BC7) + return false; + return true; } @@ -560,7 +778,7 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const case QRhi::DebugMarkers: return true; case QRhi::Timestamps: - return false; + return true; case QRhi::Instancing: return true; case QRhi::CustomInstanceStepRate: @@ -604,7 +822,12 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const case QRhi::ReadBackAnyTextureFormat: return true; case QRhi::PipelineCacheDataLoadSave: - return false; + { + if (@available(macOS 11.0, iOS 14.0, *)) + return true; + else + return false; + } case QRhi::ImageDataStride: return true; case QRhi::RenderBufferImport: @@ -615,6 +838,30 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const return true; case QRhi::TextureArrays: return true; + case QRhi::Tessellation: + return true; + case QRhi::GeometryShader: + return false; + case QRhi::TextureArrayRange: + return false; + case QRhi::NonFillPolygonMode: + return true; + case QRhi::OneDimensionalTextures: + return true; + case QRhi::OneDimensionalTextureMipmaps: + return false; + case QRhi::HalfAttributes: + return true; + case QRhi::RenderToOneDimensionalTexture: + return false; + case QRhi::ThreeDimensionalTextureMipmaps: + return true; + case QRhi::MultiView: + return caps.multiView; + case QRhi::TextureViewFormat: + return false; + case QRhi::ResolveDepthStencil: + return true; default: Q_UNREACHABLE(); return false; @@ -643,15 +890,15 @@ int QRhiMetal::resourceLimit(QRhi::ResourceLimit limit) const case QRhi::MaxThreadGroupY: Q_FALLTHROUGH(); case QRhi::MaxThreadGroupZ: -#if defined(Q_OS_MACOS) - return 1024; -#else - return 512; -#endif + return caps.maxThreadGroupSize; case QRhi::TextureArraySizeMax: return 2048; case QRhi::MaxUniformBufferRange: return 65536; + case QRhi::MaxVertexInputs: + return 31; + case QRhi::MaxVertexOutputs: + return 15; // use the minimum from MTLGPUFamily1/2/3 default: Q_UNREACHABLE(); return 0; @@ -668,9 +915,11 @@ QRhiDriverInfo QRhiMetal::driverInfo() const return driverInfoStruct; } -void QRhiMetal::sendVMemStatsToProfiler() +QRhiStats QRhiMetal::statistics() { - // nothing to do here + QRhiStats result; + result.totalPipelineCreationTime = totalPipelineCreationTime(); + return result; } bool QRhiMetal::makeThreadLocalNativeContextCurrent() @@ -692,14 +941,133 @@ bool QRhiMetal::isDeviceLost() const return false; } +struct QMetalPipelineCacheDataHeader +{ + quint32 rhiId; + quint32 arch; + quint32 dataSize; + quint32 osMajor; + quint32 osMinor; + char driver[236]; +}; + QByteArray QRhiMetal::pipelineCacheData() { - return QByteArray(); + Q_STATIC_ASSERT(sizeof(QMetalPipelineCacheDataHeader) == 256); + QByteArray data; + if (@available(macOS 11.0, iOS 14.0, *)) { + if (!d->binArch || !rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + return data; + + QTemporaryFile tmp; + if (!tmp.open()) { + qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal"); + return data; + } + tmp.close(); // the file exists until the tmp dtor runs + + const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath(); + NSURL *url = QUrl::fromLocalFile(fn).toNSURL(); + NSError *err = nil; + if (![d->binArch serializeToURL: url error: &err]) { + const QString msg = QString::fromNSString(err.localizedDescription); + // Some of these "errors" are not actual errors. (think of "Nothing to serialize") + qCDebug(QRHI_LOG_INFO, "Failed to serialize MTLBinaryArchive: %s", qPrintable(msg)); + return data; + } + + QFile f(fn); + if (!f.open(QIODevice::ReadOnly)) { + qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to reopen temporary file"); + return data; + } + const QByteArray blob = f.readAll(); + f.close(); + + const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader); + const quint32 dataSize = quint32(blob.size()); + + data.resize(headerSize + dataSize); + + QMetalPipelineCacheDataHeader header = {}; + header.rhiId = pipelineCacheRhiId(); + header.arch = quint32(sizeof(void*)); + header.dataSize = quint32(dataSize); + header.osMajor = osMajor; + header.osMinor = osMinor; + const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length())); + if (driverStrLen) + memcpy(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen); + header.driver[driverStrLen] = '\0'; + + memcpy(data.data(), &header, headerSize); + memcpy(data.data() + headerSize, blob.constData(), dataSize); + } + return data; } void QRhiMetal::setPipelineCacheData(const QByteArray &data) { - Q_UNUSED(data); + if (data.isEmpty()) + return; + + const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader); + if (data.size() < qsizetype(headerSize)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (header incomplete)"); + return; + } + + const size_t dataOffset = headerSize; + QMetalPipelineCacheDataHeader header; + memcpy(&header, data.constData(), headerSize); + + const quint32 rhiId = pipelineCacheRhiId(); + if (header.rhiId != rhiId) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: The data is for a different QRhi version or backend (%u, %u)", + rhiId, header.rhiId); + return; + } + + const quint32 arch = quint32(sizeof(void*)); + if (header.arch != arch) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Architecture does not match (%u, %u)", + arch, header.arch); + return; + } + + if (header.osMajor != osMajor || header.osMinor != osMinor) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: OS version does not match (%u.%u, %u.%u)", + osMajor, osMinor, header.osMajor, header.osMinor); + return; + } + + const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length())); + if (strncmp(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Metal device name does not match"); + return; + } + + if (data.size() < qsizetype(dataOffset + header.dataSize)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (data incomplete)"); + return; + } + + if (@available(macOS 11.0, iOS 14.0, *)) { + const char *p = data.constData() + dataOffset; + + QTemporaryFile tmp; + if (!tmp.open()) { + qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal"); + return; + } + tmp.write(p, header.dataSize); + tmp.close(); // the file exists until the tmp dtor runs + + const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath(); + NSURL *url = QUrl::fromLocalFile(fn).toNSURL(); + if (d->setupBinaryArchive(url)) + qCDebug(QRHI_LOG_INFO, "Created MTLBinaryArchive with initial data of %u bytes", header.dataSize); + } } QRhiRenderBuffer *QRhiMetal::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize, @@ -769,6 +1137,136 @@ static inline int mapBinding(int binding, return -1; } +static inline void bindStageBuffers(QMetalCommandBuffer *cbD, + int stage, + const QRhiBatchedBindings<id<MTLBuffer>>::Batch &bufferBatch, + const QRhiBatchedBindings<NSUInteger>::Batch &offsetBatch) +{ + switch (stage) { + case QMetalShaderResourceBindingsData::VERTEX: + [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::FRAGMENT: + [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::COMPUTE: + [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::TESSCTRL: + case QMetalShaderResourceBindingsData::TESSEVAL: + // do nothing. These are used later for tessellation + break; + default: + Q_UNREACHABLE(); + break; + } +} + +static inline void bindStageTextures(QMetalCommandBuffer *cbD, + int stage, + const QRhiBatchedBindings<id<MTLTexture>>::Batch &textureBatch) +{ + switch (stage) { + case QMetalShaderResourceBindingsData::VERTEX: + [cbD->d->currentRenderPassEncoder setVertexTextures: textureBatch.resources.constData() + withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::FRAGMENT: + [cbD->d->currentRenderPassEncoder setFragmentTextures: textureBatch.resources.constData() + withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::COMPUTE: + [cbD->d->currentComputePassEncoder setTextures: textureBatch.resources.constData() + withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::TESSCTRL: + case QMetalShaderResourceBindingsData::TESSEVAL: + // do nothing. These are used later for tessellation + break; + default: + Q_UNREACHABLE(); + break; + } +} + +static inline void bindStageSamplers(QMetalCommandBuffer *cbD, + int encoderStage, + const QRhiBatchedBindings<id<MTLSamplerState>>::Batch &samplerBatch) +{ + switch (encoderStage) { + case QMetalShaderResourceBindingsData::VERTEX: + [cbD->d->currentRenderPassEncoder setVertexSamplerStates: samplerBatch.resources.constData() + withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::FRAGMENT: + [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: samplerBatch.resources.constData() + withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::COMPUTE: + [cbD->d->currentComputePassEncoder setSamplerStates: samplerBatch.resources.constData() + withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::TESSCTRL: + case QMetalShaderResourceBindingsData::TESSEVAL: + // do nothing. These are used later for tessellation + break; + default: + Q_UNREACHABLE(); + break; + } +} + +// Helper that is not used during the common vertex+fragment and compute +// pipelines, but is necessary when tessellation is involved and so the +// graphics pipeline is under the hood a combination of multiple compute and +// render pipelines. We need to be able to set the buffers, textures, samplers +// when a switching between render and compute encoders. +static inline void rebindShaderResources(QMetalCommandBuffer *cbD, int resourceStage, int encoderStage, + const QMetalShaderResourceBindingsData *customBindingState = nullptr) +{ + const QMetalShaderResourceBindingsData *bindingData = customBindingState ? customBindingState : &cbD->d->currentShaderResourceBindingState; + + for (int i = 0, ie = bindingData->res[resourceStage].bufferBatches.batches.count(); i != ie; ++i) { + const auto &bufferBatch(bindingData->res[resourceStage].bufferBatches.batches[i]); + const auto &offsetBatch(bindingData->res[resourceStage].bufferOffsetBatches.batches[i]); + bindStageBuffers(cbD, encoderStage, bufferBatch, offsetBatch); + } + + for (int i = 0, ie = bindingData->res[resourceStage].textureBatches.batches.count(); i != ie; ++i) { + const auto &batch(bindingData->res[resourceStage].textureBatches.batches[i]); + bindStageTextures(cbD, encoderStage, batch); + } + + for (int i = 0, ie = bindingData->res[resourceStage].samplerBatches.batches.count(); i != ie; ++i) { + const auto &batch(bindingData->res[resourceStage].samplerBatches.batches[i]); + bindStageSamplers(cbD, encoderStage, batch); + } +} + +static inline QRhiShaderResourceBinding::StageFlag toRhiSrbStage(int stage) +{ + switch (stage) { + case QMetalShaderResourceBindingsData::VERTEX: + return QRhiShaderResourceBinding::StageFlag::VertexStage; + case QMetalShaderResourceBindingsData::TESSCTRL: + return QRhiShaderResourceBinding::StageFlag::TessellationControlStage; + case QMetalShaderResourceBindingsData::TESSEVAL: + return QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage; + case QMetalShaderResourceBindingsData::FRAGMENT: + return QRhiShaderResourceBinding::StageFlag::FragmentStage; + case QMetalShaderResourceBindingsData::COMPUTE: + return QRhiShaderResourceBinding::StageFlag::ComputeStage; + } + + Q_UNREACHABLE_RETURN(QRhiShaderResourceBinding::StageFlag::VertexStage); +} + void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD, QMetalCommandBuffer *cbD, int dynamicOffsetCount, @@ -776,38 +1274,16 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD bool offsetOnlyChange, const QShader::NativeResourceBindingMap *nativeResourceBindingMaps[SUPPORTED_STAGES]) { - struct Stage { - struct Buffer { - int nativeBinding; - id<MTLBuffer> mtlbuf; - uint offset; - }; - struct Texture { - int nativeBinding; - id<MTLTexture> mtltex; - }; - struct Sampler { - int nativeBinding; - id<MTLSamplerState> mtlsampler; - }; - QVarLengthArray<Buffer, 8> buffers; - QVarLengthArray<Texture, 8> textures; - QVarLengthArray<Sampler, 8> samplers; - QRhiBatchedBindings<id<MTLBuffer> > bufferBatches; - QRhiBatchedBindings<NSUInteger> bufferOffsetBatches; - QRhiBatchedBindings<id<MTLTexture> > textureBatches; - QRhiBatchedBindings<id<MTLSamplerState> > samplerBatches; - } res[SUPPORTED_STAGES]; - enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2 }; + QMetalShaderResourceBindingsData bindingData; - for (const QRhiShaderResourceBinding &binding : qAsConst(srbD->sortedBindings)) { - const QRhiShaderResourceBinding::Data *b = binding.data(); + for (const QRhiShaderResourceBinding &binding : std::as_const(srbD->sortedBindings)) { + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(binding); switch (b->type) { case QRhiShaderResourceBinding::UniformBuffer: { QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.ubuf.buf); id<MTLBuffer> mtlbuf = bufD->d->buf[bufD->d->slotted ? currentFrameSlot : 0]; - uint offset = uint(b->u.ubuf.offset); + quint32 offset = b->u.ubuf.offset; for (int i = 0; i < dynamicOffsetCount; ++i) { const QRhiCommandBuffer::DynamicOffset &dynOfs(dynamicOffsets[i]); if (dynOfs.first == b->binding) { @@ -815,51 +1291,38 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD break; } } - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - const int nativeBinding = mapBinding(b->binding, VERTEX, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - res[VERTEX].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int nativeBinding = mapBinding(b->binding, FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - res[FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int nativeBinding = mapBinding(b->binding, COMPUTE, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - res[COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset }); + + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer); + if (nativeBinding >= 0) + bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset }); + } } } break; case QRhiShaderResourceBinding::SampledTexture: + case QRhiShaderResourceBinding::Texture: + case QRhiShaderResourceBinding::Sampler: { - const QRhiShaderResourceBinding::Data::SampledTextureData *data = &b->u.stex; + const QRhiShaderResourceBinding::Data::TextureAndOrSamplerData *data = &b->u.stex; for (int elem = 0; elem < data->count; ++elem) { QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.stex.texSamplers[elem].tex); QMetalSampler *samplerD = QRHI_RES(QMetalSampler, b->u.stex.texSamplers[elem].sampler); - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - const int nativeBindingTexture = mapBinding(b->binding, VERTEX, nativeResourceBindingMaps, BindingType::Texture); - const int nativeBindingSampler = mapBinding(b->binding, VERTEX, nativeResourceBindingMaps, BindingType::Sampler); - if (nativeBindingTexture >= 0 && nativeBindingSampler >= 0) { - res[VERTEX].textures.append({ nativeBindingTexture + elem, texD->d->tex }); - res[VERTEX].samplers.append({ nativeBindingSampler + elem, samplerD->d->samplerState }); - } - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int nativeBindingTexture = mapBinding(b->binding, FRAGMENT, nativeResourceBindingMaps, BindingType::Texture); - const int nativeBindingSampler = mapBinding(b->binding, FRAGMENT, nativeResourceBindingMaps, BindingType::Sampler); - if (nativeBindingTexture >= 0 && nativeBindingSampler >= 0) { - res[FRAGMENT].textures.append({ nativeBindingTexture + elem, texD->d->tex }); - res[FRAGMENT].samplers.append({ nativeBindingSampler + elem, samplerD->d->samplerState }); - } - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int nativeBindingTexture = mapBinding(b->binding, COMPUTE, nativeResourceBindingMaps, BindingType::Texture); - const int nativeBindingSampler = mapBinding(b->binding, COMPUTE, nativeResourceBindingMaps, BindingType::Sampler); - if (nativeBindingTexture >= 0 && nativeBindingSampler >= 0) { - res[COMPUTE].textures.append({ nativeBindingTexture + elem, texD->d->tex }); - res[COMPUTE].samplers.append({ nativeBindingSampler + elem, samplerD->d->samplerState }); + + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + // Must handle all three cases (combined, separate, separate): + // first = texture binding, second = sampler binding + // first = texture binding + // first = sampler binding (i.e. BindingType::Texture...) + const int textureBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture); + const int samplerBinding = texD && samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Sampler) + : (samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture) : -1); + if (textureBinding >= 0 && texD) + bindingData.res[stage].textures.append({ textureBinding + elem, texD->d->tex }); + if (samplerBinding >= 0) + bindingData.res[stage].samplers.append({ samplerBinding + elem, samplerD->d->samplerState }); } } } @@ -871,20 +1334,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD { QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex); id<MTLTexture> t = texD->d->viewForLevel(b->u.simage.level); - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - const int nativeBinding = mapBinding(b->binding, VERTEX, nativeResourceBindingMaps, BindingType::Texture); - if (nativeBinding >= 0) - res[VERTEX].textures.append({ nativeBinding, t }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int nativeBinding = mapBinding(b->binding, FRAGMENT, nativeResourceBindingMaps, BindingType::Texture); - if (nativeBinding >= 0) - res[FRAGMENT].textures.append({ nativeBinding, t }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int nativeBinding = mapBinding(b->binding, COMPUTE, nativeResourceBindingMaps, BindingType::Texture); - if (nativeBinding >= 0) - res[COMPUTE].textures.append({ nativeBinding, t }); + + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture); + if (nativeBinding >= 0) + bindingData.res[stage].textures.append({ nativeBinding, t }); + } } } break; @@ -894,21 +1350,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD { QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); id<MTLBuffer> mtlbuf = bufD->d->buf[0]; - uint offset = uint(b->u.sbuf.offset); - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - const int nativeBinding = mapBinding(b->binding, VERTEX, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - res[VERTEX].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int nativeBinding = mapBinding(b->binding, FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - res[FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int nativeBinding = mapBinding(b->binding, COMPUTE, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - res[COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset }); + quint32 offset = b->u.sbuf.offset; + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer); + if (nativeBinding >= 0) + bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset }); + } } } break; @@ -919,9 +1367,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD } for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { - if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == VERTEX || stage == FRAGMENT)) + if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT + || stage == QMetalShaderResourceBindingsData::TESSCTRL || stage == QMetalShaderResourceBindingsData::TESSEVAL)) continue; - if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && stage == COMPUTE) + if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && (stage == QMetalShaderResourceBindingsData::COMPUTE)) continue; // QRhiBatchedBindings works with the native bindings and expects @@ -929,104 +1378,107 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD // on the QRhi (SPIR-V) binding) is not helpful in this regard, so we // have to sort here every time. - std::sort(res[stage].buffers.begin(), res[stage].buffers.end(), [](const Stage::Buffer &a, const Stage::Buffer &b) { + std::sort(bindingData.res[stage].buffers.begin(), bindingData.res[stage].buffers.end(), [](const QMetalShaderResourceBindingsData::Stage::Buffer &a, const QMetalShaderResourceBindingsData::Stage::Buffer &b) { return a.nativeBinding < b.nativeBinding; }); - for (const Stage::Buffer &buf : qAsConst(res[stage].buffers)) { - res[stage].bufferBatches.feed(buf.nativeBinding, buf.mtlbuf); - res[stage].bufferOffsetBatches.feed(buf.nativeBinding, buf.offset); + for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : std::as_const(bindingData.res[stage].buffers)) { + bindingData.res[stage].bufferBatches.feed(buf.nativeBinding, buf.mtlbuf); + bindingData.res[stage].bufferOffsetBatches.feed(buf.nativeBinding, buf.offset); } - res[stage].bufferBatches.finish(); - res[stage].bufferOffsetBatches.finish(); - - for (int i = 0, ie = res[stage].bufferBatches.batches.count(); i != ie; ++i) { - const auto &bufferBatch(res[stage].bufferBatches.batches[i]); - const auto &offsetBatch(res[stage].bufferOffsetBatches.batches[i]); - switch (stage) { - case VERTEX: - [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData() - offsets: offsetBatch.resources.constData() - withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; - break; - case FRAGMENT: - [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData() - offsets: offsetBatch.resources.constData() - withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; - break; - case COMPUTE: - [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData() - offsets: offsetBatch.resources.constData() - withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; - break; - default: - Q_UNREACHABLE(); - break; + bindingData.res[stage].bufferBatches.finish(); + bindingData.res[stage].bufferOffsetBatches.finish(); + + for (int i = 0, ie = bindingData.res[stage].bufferBatches.batches.count(); i != ie; ++i) { + const auto &bufferBatch(bindingData.res[stage].bufferBatches.batches[i]); + const auto &offsetBatch(bindingData.res[stage].bufferOffsetBatches.batches[i]); + // skip setting Buffer binding if the current state is already correct + if (cbD->d->currentShaderResourceBindingState.res[stage].bufferBatches.batches.count() > i + && cbD->d->currentShaderResourceBindingState.res[stage].bufferOffsetBatches.batches.count() > i + && bufferBatch == cbD->d->currentShaderResourceBindingState.res[stage].bufferBatches.batches[i] + && offsetBatch == cbD->d->currentShaderResourceBindingState.res[stage].bufferOffsetBatches.batches[i]) + { + continue; } + bindStageBuffers(cbD, stage, bufferBatch, offsetBatch); } if (offsetOnlyChange) continue; - std::sort(res[stage].textures.begin(), res[stage].textures.end(), [](const Stage::Texture &a, const Stage::Texture &b) { + std::sort(bindingData.res[stage].textures.begin(), bindingData.res[stage].textures.end(), [](const QMetalShaderResourceBindingsData::Stage::Texture &a, const QMetalShaderResourceBindingsData::Stage::Texture &b) { return a.nativeBinding < b.nativeBinding; }); - std::sort(res[stage].samplers.begin(), res[stage].samplers.end(), [](const Stage::Sampler &a, const Stage::Sampler &b) { + std::sort(bindingData.res[stage].samplers.begin(), bindingData.res[stage].samplers.end(), [](const QMetalShaderResourceBindingsData::Stage::Sampler &a, const QMetalShaderResourceBindingsData::Stage::Sampler &b) { return a.nativeBinding < b.nativeBinding; }); - for (const Stage::Texture &t : qAsConst(res[stage].textures)) - res[stage].textureBatches.feed(t.nativeBinding, t.mtltex); + for (const QMetalShaderResourceBindingsData::Stage::Texture &t : std::as_const(bindingData.res[stage].textures)) + bindingData.res[stage].textureBatches.feed(t.nativeBinding, t.mtltex); - for (const Stage::Sampler &s : qAsConst(res[stage].samplers)) - res[stage].samplerBatches.feed(s.nativeBinding, s.mtlsampler); + for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : std::as_const(bindingData.res[stage].samplers)) + bindingData.res[stage].samplerBatches.feed(s.nativeBinding, s.mtlsampler); - res[stage].textureBatches.finish(); - res[stage].samplerBatches.finish(); + bindingData.res[stage].textureBatches.finish(); + bindingData.res[stage].samplerBatches.finish(); - for (int i = 0, ie = res[stage].textureBatches.batches.count(); i != ie; ++i) { - const auto &batch(res[stage].textureBatches.batches[i]); - switch (stage) { - case VERTEX: - [cbD->d->currentRenderPassEncoder setVertexTextures: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case FRAGMENT: - [cbD->d->currentRenderPassEncoder setFragmentTextures: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case COMPUTE: - [cbD->d->currentComputePassEncoder setTextures: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - default: - Q_UNREACHABLE(); - break; + for (int i = 0, ie = bindingData.res[stage].textureBatches.batches.count(); i != ie; ++i) { + const auto &batch(bindingData.res[stage].textureBatches.batches[i]); + // skip setting Texture binding if the current state is already correct + if (cbD->d->currentShaderResourceBindingState.res[stage].textureBatches.batches.count() > i + && batch == cbD->d->currentShaderResourceBindingState.res[stage].textureBatches.batches[i]) + { + continue; } + bindStageTextures(cbD, stage, batch); } - for (int i = 0, ie = res[stage].samplerBatches.batches.count(); i != ie; ++i) { - const auto &batch(res[stage].samplerBatches.batches[i]); - switch (stage) { - case VERTEX: - [cbD->d->currentRenderPassEncoder setVertexSamplerStates: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case FRAGMENT: - [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case COMPUTE: - [cbD->d->currentComputePassEncoder setSamplerStates: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - default: - Q_UNREACHABLE(); - break; + + for (int i = 0, ie = bindingData.res[stage].samplerBatches.batches.count(); i != ie; ++i) { + const auto &batch(bindingData.res[stage].samplerBatches.batches[i]); + // skip setting Sampler State if the current state is already correct + if (cbD->d->currentShaderResourceBindingState.res[stage].samplerBatches.batches.count() > i + && batch == cbD->d->currentShaderResourceBindingState.res[stage].samplerBatches.batches[i]) + { + continue; } + bindStageSamplers(cbD, stage, batch); } } + + cbD->d->currentShaderResourceBindingState = bindingData; +} + +void QMetalGraphicsPipeline::makeActiveForCurrentRenderPassEncoder(QMetalCommandBuffer *cbD) +{ + [cbD->d->currentRenderPassEncoder setRenderPipelineState: d->ps]; + + if (cbD->d->currentDepthStencilState != d->ds) { + [cbD->d->currentRenderPassEncoder setDepthStencilState: d->ds]; + cbD->d->currentDepthStencilState = d->ds; + } + + if (cbD->currentCullMode == -1 || d->cullMode != uint(cbD->currentCullMode)) { + [cbD->d->currentRenderPassEncoder setCullMode: d->cullMode]; + cbD->currentCullMode = int(d->cullMode); + } + if (cbD->currentTriangleFillMode == -1 || d->triangleFillMode != uint(cbD->currentTriangleFillMode)) { + [cbD->d->currentRenderPassEncoder setTriangleFillMode: d->triangleFillMode]; + cbD->currentTriangleFillMode = int(d->triangleFillMode); + } + if (cbD->currentFrontFaceWinding == -1 || d->winding != uint(cbD->currentFrontFaceWinding)) { + [cbD->d->currentRenderPassEncoder setFrontFacingWinding: d->winding]; + cbD->currentFrontFaceWinding = int(d->winding); + } + if (!qFuzzyCompare(d->depthBias, cbD->currentDepthBiasValues.first) + || !qFuzzyCompare(d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second)) + { + [cbD->d->currentRenderPassEncoder setDepthBias: d->depthBias + slopeScale: d->slopeScaledDepthBias + clamp: 0.0f]; + cbD->currentDepthBiasValues = { d->depthBias, d->slopeScaledDepthBias }; + } } void QRhiMetal::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps) @@ -1035,29 +1487,24 @@ void QRhiMetal::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); QMetalGraphicsPipeline *psD = QRHI_RES(QMetalGraphicsPipeline, ps); - if (cbD->currentGraphicsPipeline != ps || cbD->currentPipelineGeneration != psD->generation) { - cbD->currentGraphicsPipeline = ps; - cbD->currentComputePipeline = nullptr; - cbD->currentPipelineGeneration = psD->generation; + if (cbD->currentGraphicsPipeline == psD && cbD->currentPipelineGeneration == psD->generation) + return; - [cbD->d->currentRenderPassEncoder setRenderPipelineState: psD->d->ps]; - [cbD->d->currentRenderPassEncoder setDepthStencilState: psD->d->ds]; + cbD->currentGraphicsPipeline = psD; + cbD->currentComputePipeline = nullptr; + cbD->currentPipelineGeneration = psD->generation; - if (cbD->currentCullMode == -1 || psD->d->cullMode != uint(cbD->currentCullMode)) { - [cbD->d->currentRenderPassEncoder setCullMode: psD->d->cullMode]; - cbD->currentCullMode = int(psD->d->cullMode); + if (!psD->d->tess.enabled && !psD->d->tess.failed) { + psD->makeActiveForCurrentRenderPassEncoder(cbD); + } else { + // mark work buffers that can now be safely reused as reusable + for (QMetalBuffer *workBuf : psD->d->extraBufMgr.deviceLocalWorkBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot) + workBuf->lastActiveFrameSlot = -1; } - if (cbD->currentFrontFaceWinding == -1 || psD->d->winding != uint(cbD->currentFrontFaceWinding)) { - [cbD->d->currentRenderPassEncoder setFrontFacingWinding: psD->d->winding]; - cbD->currentFrontFaceWinding = int(psD->d->winding); - } - if (!qFuzzyCompare(psD->d->depthBias, cbD->currentDepthBiasValues.first) - || !qFuzzyCompare(psD->d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second)) - { - [cbD->d->currentRenderPassEncoder setDepthBias: psD->d->depthBias - slopeScale: psD->d->slopeScaledDepthBias - clamp: 0.0f]; - cbD->currentDepthBiasValues = { psD->d->depthBias, psD->d->slopeScaledDepthBias }; + for (QMetalBuffer *workBuf : psD->d->extraBufMgr.hostVisibleWorkBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot) + workBuf->lastActiveFrameSlot = -1; } } @@ -1070,8 +1517,8 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); Q_ASSERT(cbD->recordingPass != QMetalCommandBuffer::NoPass); - QMetalGraphicsPipeline *gfxPsD = QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline); - QMetalComputePipeline *compPsD = QRHI_RES(QMetalComputePipeline, cbD->currentComputePipeline); + QMetalGraphicsPipeline *gfxPsD = cbD->currentGraphicsPipeline; + QMetalComputePipeline *compPsD = cbD->currentComputePipeline; if (!srb) { if (gfxPsD) @@ -1085,9 +1532,15 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind bool hasDynamicOffsetInSrb = false; bool resNeedsRebind = false; + // SPIRV-Cross buffer size buffers + // Need to determine storage buffer sizes here as this is the last opportunity for storage + // buffer bindings (offset, size) to be specified before draw / dispatch call + const bool needsBufferSizeBuffer = (compPsD && compPsD->d->bufferSizeBuffer) || (gfxPsD && gfxPsD->d->bufferSizeBuffer); + QMap<QRhiShaderResourceBinding::StageFlag, QMap<int, quint32>> storageBufferSizes; + // do buffer writes, figure out if we need to rebind, and mark as in-use for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) { - const QRhiShaderResourceBinding::Data *b = srbD->sortedBindings.at(i).data(); + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->sortedBindings.at(i)); QMetalShaderResourceBindings::BoundResourceData &bd(srbD->boundResourceData[i]); switch (b->type) { case QRhiShaderResourceBinding::UniformBuffer: @@ -1108,8 +1561,10 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind } break; case QRhiShaderResourceBinding::SampledTexture: + case QRhiShaderResourceBinding::Texture: + case QRhiShaderResourceBinding::Sampler: { - const QRhiShaderResourceBinding::Data::SampledTextureData *data = &b->u.stex; + const QRhiShaderResourceBinding::Data::TextureAndOrSamplerData *data = &b->u.stex; if (bd.stex.count != data->count) { bd.stex.count = data->count; resNeedsRebind = true; @@ -1117,19 +1572,26 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind for (int elem = 0; elem < data->count; ++elem) { QMetalTexture *texD = QRHI_RES(QMetalTexture, data->texSamplers[elem].tex); QMetalSampler *samplerD = QRHI_RES(QMetalSampler, data->texSamplers[elem].sampler); - if (texD->generation != bd.stex.d[elem].texGeneration - || texD->m_id != bd.stex.d[elem].texId - || samplerD->generation != bd.stex.d[elem].samplerGeneration - || samplerD->m_id != bd.stex.d[elem].samplerId) + Q_ASSERT(texD || samplerD); + const quint64 texId = texD ? texD->m_id : 0; + const uint texGen = texD ? texD->generation : 0; + const quint64 samplerId = samplerD ? samplerD->m_id : 0; + const uint samplerGen = samplerD ? samplerD->generation : 0; + if (texGen != bd.stex.d[elem].texGeneration + || texId != bd.stex.d[elem].texId + || samplerGen != bd.stex.d[elem].samplerGeneration + || samplerId != bd.stex.d[elem].samplerId) { resNeedsRebind = true; - bd.stex.d[elem].texId = texD->m_id; - bd.stex.d[elem].texGeneration = texD->generation; - bd.stex.d[elem].samplerId = samplerD->m_id; - bd.stex.d[elem].samplerGeneration = samplerD->generation; + bd.stex.d[elem].texId = texId; + bd.stex.d[elem].texGeneration = texGen; + bd.stex.d[elem].samplerId = samplerId; + bd.stex.d[elem].samplerGeneration = samplerGen; } - texD->lastActiveFrameSlot = currentFrameSlot; - samplerD->lastActiveFrameSlot = currentFrameSlot; + if (texD) + texD->lastActiveFrameSlot = currentFrameSlot; + if (samplerD) + samplerD->lastActiveFrameSlot = currentFrameSlot; } } break; @@ -1152,6 +1614,17 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind { QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)); + + if (needsBufferSizeBuffer) { + for (int i = 0; i < 6; ++i) { + const QRhiShaderResourceBinding::StageFlag stage = + QRhiShaderResourceBinding::StageFlag(1 << i); + if (b->stage.testFlag(stage)) { + storageBufferSizes[stage][b->binding] = b->u.sbuf.maybeSize ? b->u.sbuf.maybeSize : bufD->size(); + } + } + } + executeBufferHostWritesForCurrentFrame(bufD); if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) { resNeedsRebind = true; @@ -1167,26 +1640,141 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind } } + if (needsBufferSizeBuffer) { + QMetalBuffer *bufD = nullptr; + QVarLengthArray<QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag>, 4> shaders; + + if (compPsD) { + bufD = compPsD->d->bufferSizeBuffer; + Q_ASSERT(compPsD->d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + shaders.append(qMakePair(&compPsD->d->cs, QRhiShaderResourceBinding::StageFlag::ComputeStage)); + } else { + bufD = gfxPsD->d->bufferSizeBuffer; + if (gfxPsD->d->tess.enabled) { + + // Assumptions + // * We only use one of the compute vertex shader variants in a pipeline at any one time + // * The vertex shader variants all have the same storage block bindings + // * The vertex shader variants all have the same native resource binding map + // * The vertex shader variants all have the same MslBufferSizeBufferBinding requirement + // * The vertex shader variants all have the same MslBufferSizeBufferBinding binding + // => We only need to use one vertex shader variant to generate the identical shader + // resource bindings + Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[1].desc.storageBlocks()); + Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[2].desc.storageBlocks()); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding) + == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding) + == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding] + == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding] + == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]); + + if (gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.compVs[0], QRhiShaderResourceBinding::StageFlag::VertexStage)); + + if (gfxPsD->d->tess.compTesc.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.compTesc, QRhiShaderResourceBinding::StageFlag::TessellationControlStage)); + + if (gfxPsD->d->tess.vertTese.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.vertTese, QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage)); + + } else { + if (gfxPsD->d->vs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->vs, QRhiShaderResourceBinding::StageFlag::VertexStage)); + } + if (gfxPsD->d->fs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->fs, QRhiShaderResourceBinding::StageFlag::FragmentStage)); + } + + quint32 offset = 0; + for (const QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag> &shader : shaders) { + + const int binding = shader.first->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + + // if we don't have a srb entry for the buffer size buffer + if (!(storageBufferSizes.contains(shader.second) && storageBufferSizes[shader.second].contains(binding))) { + + int maxNativeBinding = 0; + for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) + maxNativeBinding = qMax(maxNativeBinding, shader.first->nativeResourceBindingMap[block.binding].first); + + const int size = (maxNativeBinding + 1) * sizeof(int); + + Q_ASSERT(offset + size <= bufD->size()); + srbD->sortedBindings.append(QRhiShaderResourceBinding::bufferLoad(binding, shader.second, bufD, offset, size)); + + QMetalShaderResourceBindings::BoundResourceData bd; + bd.sbuf.id = bufD->m_id; + bd.sbuf.generation = bufD->generation; + srbD->boundResourceData.append(bd); + } + + // create the buffer size buffer data + QVarLengthArray<int, 8> bufferSizeBufferData; + Q_ASSERT(storageBufferSizes.contains(shader.second)); + const QMap<int, quint32> &sizes(storageBufferSizes[shader.second]); + for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) { + const int index = shader.first->nativeResourceBindingMap[block.binding].first; + + // if the native binding is -1, the buffer is present but not accessed in the shader + if (index < 0) + continue; + + if (bufferSizeBufferData.size() <= index) + bufferSizeBufferData.resize(index + 1); + + Q_ASSERT(sizes.contains(block.binding)); + bufferSizeBufferData[index] = sizes[block.binding]; + } + + QRhiBufferData data; + const quint32 size = bufferSizeBufferData.size() * sizeof(int); + data.assign(reinterpret_cast<const char *>(bufferSizeBufferData.constData()), size); + Q_ASSERT(offset + size <= bufD->size()); + bufD->d->pendingUpdates[bufD->d->slotted ? currentFrameSlot : 0].append({ offset, data }); + + // buffer offsets must be 32byte aligned + offset += ((size + 31) / 32) * 32; + } + + executeBufferHostWritesForCurrentFrame(bufD); + bufD->lastActiveFrameSlot = currentFrameSlot; + } + // make sure the resources for the correct slot get bound const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0; if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot) resNeedsRebind = true; - const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb); + const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srbD) : (cbD->currentComputeSrb != srbD); const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation; // dynamic uniform buffer offsets always trigger a rebind if (hasDynamicOffsetInSrb || resNeedsRebind || srbChanged || srbRebuilt) { - const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr }; + const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr, nullptr, nullptr }; if (gfxPsD) { - cbD->currentGraphicsSrb = srb; + cbD->currentGraphicsSrb = srbD; cbD->currentComputeSrb = nullptr; - resBindMaps[0] = &gfxPsD->d->vs.nativeResourceBindingMap; - resBindMaps[1] = &gfxPsD->d->fs.nativeResourceBindingMap; + if (gfxPsD->d->tess.enabled) { + // If tessellating, we don't know which compVs shader to use until the draw call is + // made. They should all have the same native resource binding map, so pick one. + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap); + resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->tess.compVs[0].nativeResourceBindingMap; + resBindMaps[QMetalShaderResourceBindingsData::TESSCTRL] = &gfxPsD->d->tess.compTesc.nativeResourceBindingMap; + resBindMaps[QMetalShaderResourceBindingsData::TESSEVAL] = &gfxPsD->d->tess.vertTese.nativeResourceBindingMap; + } else { + resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->vs.nativeResourceBindingMap; + } + resBindMaps[QMetalShaderResourceBindingsData::FRAGMENT] = &gfxPsD->d->fs.nativeResourceBindingMap; } else { cbD->currentGraphicsSrb = nullptr; - cbD->currentComputeSrb = srb; - resBindMaps[2] = &compPsD->d->cs.nativeResourceBindingMap; + cbD->currentComputeSrb = srbD; + resBindMaps[QMetalShaderResourceBindingsData::COMPUTE] = &compPsD->d->cs.nativeResourceBindingMap; } cbD->currentSrbGeneration = srbD->generation; cbD->currentResSlot = resSlot; @@ -1217,13 +1805,13 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, offsets.finish(); // same binding space for vertex and constant buffers - work it around - QRhiShaderResourceBindings *srb = cbD->currentGraphicsSrb; + QMetalShaderResourceBindings *srbD = cbD->currentGraphicsSrb; // There's nothing guaranteeing setShaderResources() was called before // setVertexInput()... but whatever srb will get bound will have to be // layout-compatible anyways so maxBinding is the same. - if (!srb) - srb = cbD->currentGraphicsPipeline->shaderResourceBindings(); - const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, srb)->maxBinding + 1; + if (!srbD) + srbD = QRHI_RES(QMetalShaderResourceBindings, cbD->currentGraphicsPipeline->shaderResourceBindings()); + const int firstVertexBinding = srbD->maxBinding + 1; if (firstVertexBinding != cbD->d->currentFirstVertexBinding || buffers != cbD->d->currentVertexInputsBuffers @@ -1247,7 +1835,7 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, indexBuf); executeBufferHostWritesForCurrentFrame(ibufD); ibufD->lastActiveFrameSlot = currentFrameSlot; - cbD->currentIndexBuffer = indexBuf; + cbD->currentIndexBuffer = ibufD; cbD->currentIndexOffset = indexOffset; cbD->currentIndexFormat = indexFormat; } else { @@ -1263,7 +1851,7 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) // x,y is top-left in MTLViewportRect but bottom-left in QRhiViewport float x, y, w, h; - if (!qrhi_toTopLeftRenderTargetRect(outputSize, viewport.viewport(), &x, &y, &w, &h)) + if (!qrhi_toTopLeftRenderTargetRect<UnBounded>(outputSize, viewport.viewport(), &x, &y, &w, &h)) return; MTLViewport vp; @@ -1276,8 +1864,10 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) [cbD->d->currentRenderPassEncoder setViewport: vp]; - if (!QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { + if (cbD->currentGraphicsPipeline + && !cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { MTLScissorRect s; + qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, viewport.viewport(), &x, &y, &w, &h); s.x = NSUInteger(x); s.y = NSUInteger(y); s.width = NSUInteger(w); @@ -1290,12 +1880,12 @@ void QRhiMetal::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); - Q_ASSERT(QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); + Q_ASSERT(cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); const QSize outputSize = cbD->currentTarget->pixelSize(); // x,y is top-left in MTLScissorRect but bottom-left in QRhiScissor int x, y, w, h; - if (!qrhi_toTopLeftRenderTargetRect(outputSize, scissor.scissor(), &x, &y, &w, &h)) + if (!qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, scissor.scissor(), &x, &y, &w, &h)) return; MTLScissorRect s; @@ -1324,20 +1914,320 @@ void QRhiMetal::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) [cbD->d->currentRenderPassEncoder setStencilReferenceValue: refValue]; } +static id<MTLComputeCommandEncoder> tessellationComputeEncoder(QMetalCommandBuffer *cbD) +{ + if (cbD->d->currentRenderPassEncoder) { + [cbD->d->currentRenderPassEncoder endEncoding]; + cbD->d->currentRenderPassEncoder = nil; + } + + if (!cbD->d->tessellationComputeEncoder) + cbD->d->tessellationComputeEncoder = [cbD->d->cb computeCommandEncoder]; + + return cbD->d->tessellationComputeEncoder; +} + +static void endTessellationComputeEncoding(QMetalCommandBuffer *cbD) +{ + if (cbD->d->tessellationComputeEncoder) { + [cbD->d->tessellationComputeEncoder endEncoding]; + cbD->d->tessellationComputeEncoder = nil; + } + + QMetalRenderTargetData * rtD = nullptr; + + switch (cbD->currentTarget->resourceType()) { + case QRhiResource::SwapChainRenderTarget: + rtD = QRHI_RES(QMetalSwapChainRenderTarget, cbD->currentTarget)->d; + break; + case QRhiResource::TextureRenderTarget: + rtD = QRHI_RES(QMetalTextureRenderTarget, cbD->currentTarget)->d; + break; + default: + break; + } + + Q_ASSERT(rtD); + + QVarLengthArray<MTLLoadAction, 4> oldColorLoad; + for (uint i = 0; i < uint(rtD->colorAttCount); ++i) { + oldColorLoad.append(cbD->d->currentPassRpDesc.colorAttachments[i].loadAction); + if (cbD->d->currentPassRpDesc.colorAttachments[i].storeAction != MTLStoreActionDontCare) + cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad; + } + + MTLLoadAction oldDepthLoad; + MTLLoadAction oldStencilLoad; + if (rtD->dsAttCount) { + oldDepthLoad = cbD->d->currentPassRpDesc.depthAttachment.loadAction; + if (cbD->d->currentPassRpDesc.depthAttachment.storeAction != MTLStoreActionDontCare) + cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad; + + oldStencilLoad = cbD->d->currentPassRpDesc.stencilAttachment.loadAction; + if (cbD->d->currentPassRpDesc.stencilAttachment.storeAction != MTLStoreActionDontCare) + cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad; + } + + cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc]; + cbD->resetPerPassCachedState(); + + for (uint i = 0; i < uint(rtD->colorAttCount); ++i) { + cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = oldColorLoad[i]; + } + + if (rtD->dsAttCount) { + cbD->d->currentPassRpDesc.depthAttachment.loadAction = oldDepthLoad; + cbD->d->currentPassRpDesc.stencilAttachment.loadAction = oldStencilLoad; + } + +} + +void QRhiMetal::tessellatedDraw(const TessDrawArgs &args) +{ + QMetalCommandBuffer *cbD = args.cbD; + QMetalGraphicsPipeline *graphicsPipeline = cbD->currentGraphicsPipeline; + if (graphicsPipeline->d->tess.failed) + return; + + const bool indexed = args.type != TessDrawArgs::NonIndexed; + const quint32 instanceCount = indexed ? args.drawIndexed.instanceCount : args.draw.instanceCount; + const quint32 vertexOrIndexCount = indexed ? args.drawIndexed.indexCount : args.draw.vertexCount; + + QMetalGraphicsPipelineData::Tessellation &tess(graphicsPipeline->d->tess); + QMetalGraphicsPipelineData::ExtraBufferManager &extraBufMgr(graphicsPipeline->d->extraBufMgr); + const quint32 patchCount = tess.patchCountForDrawCall(vertexOrIndexCount, instanceCount); + QMetalBuffer *vertOutBuf = nullptr; + QMetalBuffer *tescOutBuf = nullptr; + QMetalBuffer *tescPatchOutBuf = nullptr; + QMetalBuffer *tescFactorBuf = nullptr; + QMetalBuffer *tescParamsBuf = nullptr; + id<MTLComputeCommandEncoder> vertTescComputeEncoder = tessellationComputeEncoder(cbD); + + // Step 1: vertex shader (as compute) + { + id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder; + QShader::Variant shaderVariant = QShader::NonIndexedVertexAsComputeShader; + if (args.type == TessDrawArgs::U16Indexed) + shaderVariant = QShader::UInt16IndexedVertexAsComputeShader; + else if (args.type == TessDrawArgs::U32Indexed) + shaderVariant = QShader::UInt32IndexedVertexAsComputeShader; + const int varIndex = QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(shaderVariant); + id<MTLComputePipelineState> computePipelineState = tess.vsCompPipeline(this, shaderVariant); + [computeEncoder setComputePipelineState: computePipelineState]; + + // Make uniform buffers, textures, and samplers (meant for the + // vertex stage from the client's point of view) visible in the + // "vertex as compute" shader + cbD->d->currentComputePassEncoder = computeEncoder; + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::VERTEX, QMetalShaderResourceBindingsData::COMPUTE); + cbD->d->currentComputePassEncoder = nil; + + const QMap<int, int> &ebb(tess.compVs[varIndex].nativeShaderInfo.extraBufferBindings); + const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1); + + if (outputBufferBinding >= 0) { + const quint32 workBufSize = tess.vsCompOutputBufferSize(vertexOrIndexCount, instanceCount); + vertOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize); + if (!vertOutBuf) + return; + [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding]; + } + + if (indexBufferBinding >= 0) + [computeEncoder setBuffer: (id<MTLBuffer>) args.drawIndexed.indexBuffer offset: 0 atIndex: indexBufferBinding]; + + for (int i = 0, ie = cbD->d->currentVertexInputsBuffers.batches.count(); i != ie; ++i) { + const auto &bufferBatch(cbD->d->currentVertexInputsBuffers.batches[i]); + const auto &offsetBatch(cbD->d->currentVertexInputOffsets.batches[i]); + [computeEncoder setBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(uint(cbD->d->currentFirstVertexBinding) + bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + } + + if (indexed) { + [computeEncoder setStageInRegion: MTLRegionMake2D(args.drawIndexed.vertexOffset, args.drawIndexed.firstInstance, + args.drawIndexed.indexCount, args.drawIndexed.instanceCount)]; + } else { + [computeEncoder setStageInRegion: MTLRegionMake2D(args.draw.firstVertex, args.draw.firstInstance, + args.draw.vertexCount, args.draw.instanceCount)]; + } + + [computeEncoder dispatchThreads: MTLSizeMake(vertexOrIndexCount, instanceCount, 1) + threadsPerThreadgroup: MTLSizeMake(computePipelineState.threadExecutionWidth, 1, 1)]; + } + + // Step 2: tessellation control shader (as compute) + { + id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder; + id<MTLComputePipelineState> computePipelineState = tess.tescCompPipeline(this); + [computeEncoder setComputePipelineState: computePipelineState]; + + cbD->d->currentComputePassEncoder = computeEncoder; + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSCTRL, QMetalShaderResourceBindingsData::COMPUTE); + cbD->d->currentComputePassEncoder = nil; + + const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings); + const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1); + const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1); + const int paramsBufferBinding = ebb.value(QShaderPrivate::MslTessTescParamsBufferBinding, -1); + const int inputBufferBinding = ebb.value(QShaderPrivate::MslTessTescInputBufferBinding, -1); + + if (outputBufferBinding >= 0) { + const quint32 workBufSize = tess.tescCompOutputBufferSize(patchCount); + tescOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize); + if (!tescOutBuf) + return; + [computeEncoder setBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding]; + } + + if (patchOutputBufferBinding >= 0) { + const quint32 workBufSize = tess.tescCompPatchOutputBufferSize(patchCount); + tescPatchOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize); + if (!tescPatchOutBuf) + return; + [computeEncoder setBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding]; + } + + if (tessFactorBufferBinding >= 0) { + tescFactorBuf = extraBufMgr.acquireWorkBuffer(this, patchCount * sizeof(MTLQuadTessellationFactorsHalf)); + [computeEncoder setBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding]; + } + + if (paramsBufferBinding >= 0) { + struct { + quint32 inControlPointCount; + quint32 patchCount; + } params; + tescParamsBuf = extraBufMgr.acquireWorkBuffer(this, sizeof(params), QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible); + if (!tescParamsBuf) + return; + params.inControlPointCount = tess.inControlPointCount; + params.patchCount = patchCount; + id<MTLBuffer> paramsBuf = tescParamsBuf->d->buf[0]; + char *p = reinterpret_cast<char *>([paramsBuf contents]); + memcpy(p, ¶ms, sizeof(params)); + [computeEncoder setBuffer: paramsBuf offset: 0 atIndex: paramsBufferBinding]; + } + + if (vertOutBuf && inputBufferBinding >= 0) + [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: inputBufferBinding]; + + int sgSize = int(computePipelineState.threadExecutionWidth); + int wgSize = std::lcm(tess.outControlPointCount, sgSize); + while (wgSize > caps.maxThreadGroupSize) { + sgSize /= 2; + wgSize = std::lcm(tess.outControlPointCount, sgSize); + } + [computeEncoder dispatchThreads: MTLSizeMake(patchCount * tess.outControlPointCount, 1, 1) + threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)]; + } + + // Much of the state in the QMetalCommandBuffer is going to be reset + // when we get a new render encoder. Save what we need. (cheaper than + // starting to walk over the srb again) + const QMetalShaderResourceBindingsData resourceBindings = cbD->d->currentShaderResourceBindingState; + + endTessellationComputeEncoding(cbD); + + // Step 3: tessellation evaluation (as vertex) + fragment shader + { + // No need to call tess.teseFragRenderPipeline because it was done + // once and we know the result is stored in the standard place + // (graphicsPipeline->d->ps). + + graphicsPipeline->makeActiveForCurrentRenderPassEncoder(cbD); + id<MTLRenderCommandEncoder> renderEncoder = cbD->d->currentRenderPassEncoder; + + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSEVAL, QMetalShaderResourceBindingsData::VERTEX, &resourceBindings); + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::FRAGMENT, QMetalShaderResourceBindingsData::FRAGMENT, &resourceBindings); + + const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings); + const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1); + const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1); + + if (outputBufferBinding >= 0 && tescOutBuf) + [renderEncoder setVertexBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding]; + + if (patchOutputBufferBinding >= 0 && tescPatchOutBuf) + [renderEncoder setVertexBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding]; + + if (tessFactorBufferBinding >= 0 && tescFactorBuf) { + [renderEncoder setTessellationFactorBuffer: tescFactorBuf->d->buf[0] offset: 0 instanceStride: 0]; + [renderEncoder setVertexBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding]; + } + + [cbD->d->currentRenderPassEncoder drawPatches: tess.outControlPointCount + patchStart: 0 + patchCount: patchCount + patchIndexBuffer: nil + patchIndexBufferOffset: 0 + instanceCount: 1 + baseInstance: 0]; + } +} + +void QRhiMetal::adjustForMultiViewDraw(quint32 *instanceCount, QRhiCommandBuffer *cb) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + const int multiViewCount = cbD->currentGraphicsPipeline->m_multiViewCount; + if (multiViewCount <= 1) + return; + + const QMap<int, int> &ebb(cbD->currentGraphicsPipeline->d->vs.nativeShaderInfo.extraBufferBindings); + const int viewMaskBufBinding = ebb.value(QShaderPrivate::MslMultiViewMaskBufferBinding, -1); + if (viewMaskBufBinding == -1) { + qWarning("No extra buffer for multiview in the vertex shader; was it built with --view-count specified?"); + return; + } + struct { + quint32 viewOffset; + quint32 viewCount; + } multiViewInfo; + multiViewInfo.viewOffset = 0; + multiViewInfo.viewCount = quint32(multiViewCount); + QMetalBuffer *buf = cbD->currentGraphicsPipeline->d->extraBufMgr.acquireWorkBuffer(this, sizeof(multiViewInfo), + QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible); + if (buf) { + id<MTLBuffer> mtlbuf = buf->d->buf[0]; + char *p = reinterpret_cast<char *>([mtlbuf contents]); + memcpy(p, &multiViewInfo, sizeof(multiViewInfo)); + [cbD->d->currentRenderPassEncoder setVertexBuffer: mtlbuf offset: 0 atIndex: viewMaskBufBinding]; + // The instance count is adjusted for layered rendering. The vertex shader is expected to contain something like: + // uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1]; + // where spvViewMask is the buffer with multiViewInfo passed in above. + *instanceCount *= multiViewCount; + } +} + void QRhiMetal::draw(QRhiCommandBuffer *cb, quint32 vertexCount, quint32 instanceCount, quint32 firstVertex, quint32 firstInstance) { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + if (cbD->currentGraphicsPipeline->d->tess.enabled) { + TessDrawArgs a; + a.cbD = cbD; + a.type = TessDrawArgs::NonIndexed; + a.draw.vertexCount = vertexCount; + a.draw.instanceCount = instanceCount; + a.draw.firstVertex = firstVertex; + a.draw.firstInstance = firstInstance; + tessellatedDraw(a); + return; + } + + adjustForMultiViewDraw(&instanceCount, cb); + if (caps.baseVertexAndInstance) { - [cbD->d->currentRenderPassEncoder drawPrimitives: - QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType - vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance]; + [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType + vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance]; } else { - [cbD->d->currentRenderPassEncoder drawPrimitives: - QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType - vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount]; + [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType + vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount]; } } @@ -1351,25 +2241,41 @@ void QRhiMetal::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, return; const quint32 indexOffset = cbD->currentIndexOffset + firstIndex * (cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? 2 : 4); - Q_ASSERT(indexOffset == aligned<quint32>(indexOffset, 4)); + Q_ASSERT(indexOffset == aligned(indexOffset, 4u)); + + QMetalBuffer *ibufD = cbD->currentIndexBuffer; + id<MTLBuffer> mtlibuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0]; + + if (cbD->currentGraphicsPipeline->d->tess.enabled) { + TessDrawArgs a; + a.cbD = cbD; + a.type = cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? TessDrawArgs::U16Indexed : TessDrawArgs::U32Indexed; + a.drawIndexed.indexCount = indexCount; + a.drawIndexed.instanceCount = instanceCount; + a.drawIndexed.firstIndex = firstIndex; + a.drawIndexed.vertexOffset = vertexOffset; + a.drawIndexed.firstInstance = firstInstance; + a.drawIndexed.indexBuffer = mtlibuf; + tessellatedDraw(a); + return; + } - QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, cbD->currentIndexBuffer); - id<MTLBuffer> mtlbuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0]; + adjustForMultiViewDraw(&instanceCount, cb); if (caps.baseVertexAndInstance) { - [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType + [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType indexCount: indexCount indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 - indexBuffer: mtlbuf + indexBuffer: mtlibuf indexBufferOffset: indexOffset instanceCount: instanceCount baseVertex: vertexOffset baseInstance: firstInstance]; } else { - [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType + [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType indexCount: indexCount indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 - indexBuffer: mtlbuf + indexBuffer: mtlibuf indexBufferOffset: indexOffset instanceCount: instanceCount]; } @@ -1426,34 +2332,39 @@ void QRhiMetal::endExternal(QRhiCommandBuffer *cb) cbD->resetPerPassCachedState(); } +double QRhiMetal::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + return cbD->d->lastGpuTime; +} + QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain); + currentSwapChain = swapChainD; + currentFrameSlot = swapChainD->currentFrameSlot; - // This is a bit messed up since for this swapchain we want to wait for the - // commands+present to complete, while for others just for the commands - // (for this same frame slot) but not sure how to do that in a sane way so - // wait for full cb completion for now. - for (QMetalSwapChain *sc : qAsConst(swapchains)) { - dispatch_semaphore_t sem = sc->d->sem[swapChainD->currentFrameSlot]; - dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); + // If we are too far ahead, block. This is also what ensures that any + // resource used in the previous frame for this slot is now not in use + // anymore by the GPU. + dispatch_semaphore_wait(swapChainD->d->sem[currentFrameSlot], DISPATCH_TIME_FOREVER); + + // Do this also for any other swapchain's commands with the same frame slot + // While this reduces concurrency, it keeps resource usage safe: swapchain + // A starting its frame 0, followed by swapchain B starting its own frame 0 + // will make B wait for A's frame 0 commands, so if a resource is written + // in B's frame or when B checks for pending resource releases, that won't + // mess up A's in-flight commands (as they are not in flight anymore). + for (QMetalSwapChain *sc : std::as_const(swapchains)) { if (sc != swapChainD) - dispatch_semaphore_signal(sem); + sc->waitUntilCompleted(currentFrameSlot); // wait+signal } - currentSwapChain = swapChainD; - currentFrameSlot = swapChainD->currentFrameSlot; - if (swapChainD->ds) - swapChainD->ds->lastActiveFrameSlot = currentFrameSlot; - [d->captureScope beginScope]; - // Do not let the command buffer mess with the refcount of objects. We do - // have a proper render loop and will manage lifetimes similarly to other - // backends (Vulkan). - swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; + swapChainD->cbWrapper.d->cb = d->newCommandBuffer(); QMetalRenderTargetData::ColorAtt colorAtt; if (swapChainD->samples > 1) { @@ -1465,14 +2376,16 @@ QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF swapChainD->rtWrapper.d->fb.colorAtt[0] = colorAtt; swapChainD->rtWrapper.d->fb.dsTex = swapChainD->ds ? swapChainD->ds->d->tex : nil; + swapChainD->rtWrapper.d->fb.dsResolveTex = nil; swapChainD->rtWrapper.d->fb.hasStencil = swapChainD->ds ? true : false; swapChainD->rtWrapper.d->fb.depthNeedsStore = false; - QRhiProfilerPrivate *rhiP = profilerPrivateOrNull(); - QRHI_PROF_F(beginSwapChainFrame(swapChain)); + if (swapChainD->ds) + swapChainD->ds->lastActiveFrameSlot = currentFrameSlot; executeDeferredReleases(); - swapChainD->cbWrapper.resetState(); + swapChainD->cbWrapper.resetState(swapChainD->d->lastGpuTime[currentFrameSlot]); + swapChainD->d->lastGpuTime[currentFrameSlot] = 0; finishActiveReadbacks(); return QRhi::FrameOpSuccess; @@ -1483,28 +2396,47 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain); Q_ASSERT(currentSwapChain == swapChainD); + __block int thisFrameSlot = currentFrameSlot; + [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer> cb) { + swapChainD->d->lastGpuTime[thisFrameSlot] += cb.GPUEndTime - cb.GPUStartTime; + dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]); + }]; + +#ifdef QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES + // When Metal API validation diagnostics is enabled in Xcode the texture is + // released before the command buffer is done with it. Manually keep it alive + // to work around this. + id<MTLTexture> drawableTexture = [swapChainD->d->curDrawable.texture retain]; + [swapChainD->cbWrapper.d->cb addCompletedHandler:^(id<MTLCommandBuffer>) { + [drawableTexture release]; + }]; +#endif + const bool needsPresent = !flags.testFlag(QRhi::SkipPresent); - if (needsPresent) { - auto drawable = swapChainD->d->curDrawable; - [swapChainD->cbWrapper.d->cb addScheduledHandler:^(id<MTLCommandBuffer>) { + const bool presentsWithTransaction = swapChainD->d->layer.presentsWithTransaction; + if (!presentsWithTransaction && needsPresent) { + // beginFrame-endFrame without a render pass inbetween means there is no drawable. + if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) + [swapChainD->cbWrapper.d->cb presentDrawable: drawable]; + } + + [swapChainD->cbWrapper.d->cb commit]; + + if (presentsWithTransaction && needsPresent) { + // beginFrame-endFrame without a render pass inbetween means there is no drawable. + if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) { + // The layer has presentsWithTransaction set to true to avoid flicker on resizing, + // so here it is important to follow what the Metal docs say when it comes to the + // issuing the present. + [swapChainD->cbWrapper.d->cb waitUntilScheduled]; [drawable present]; - }]; + } } // Must not hold on to the drawable, regardless of needsPresent [swapChainD->d->curDrawable release]; swapChainD->d->curDrawable = nil; - __block int thisFrameSlot = currentFrameSlot; - [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer>) { - dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]); - }]; - - [swapChainD->cbWrapper.d->cb commit]; - - QRhiProfilerPrivate *rhiP = profilerPrivateOrNull(); - QRHI_PROF_F(endSwapChainFrame(swapChain, swapChainD->frameCount + 1)); - [d->captureScope endScope]; if (needsPresent) @@ -1520,23 +2452,17 @@ QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi: Q_UNUSED(flags); currentFrameSlot = (currentFrameSlot + 1) % QMTL_FRAMES_IN_FLIGHT; - if (swapchains.count() > 1) { - for (QMetalSwapChain *sc : qAsConst(swapchains)) { - // wait+signal is the general pattern to ensure the commands for a - // given frame slot have completed (if sem is 1, we go 0 then 1; if - // sem is 0 we go -1, block, completion increments to 0, then us to 1) - dispatch_semaphore_t sem = sc->d->sem[currentFrameSlot]; - dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); - dispatch_semaphore_signal(sem); - } - } + + for (QMetalSwapChain *sc : std::as_const(swapchains)) + sc->waitUntilCompleted(currentFrameSlot); d->ofr.active = true; *cb = &d->ofr.cbWrapper; - d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; + d->ofr.cbWrapper.d->cb = d->newCommandBuffer(); executeDeferredReleases(); - d->ofr.cbWrapper.resetState(); + d->ofr.cbWrapper.resetState(d->ofr.lastGpuTime); + d->ofr.lastGpuTime = 0; finishActiveReadbacks(); return QRhi::FrameOpSuccess; @@ -1548,10 +2474,13 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame(QRhi::EndFrameFlags flags) Q_ASSERT(d->ofr.active); d->ofr.active = false; - [d->ofr.cbWrapper.d->cb commit]; + id<MTLCommandBuffer> cb = d->ofr.cbWrapper.d->cb; + [cb commit]; // offscreen frames wait for completion, unlike swapchain ones - [d->ofr.cbWrapper.d->cb waitUntilCompleted]; + [cb waitUntilCompleted]; + + d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime; finishActiveReadbacks(true); @@ -1575,16 +2504,14 @@ QRhi::FrameOpResult QRhiMetal::finish() } } - for (QMetalSwapChain *sc : qAsConst(swapchains)) { + for (QMetalSwapChain *sc : std::as_const(swapchains)) { for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { if (currentSwapChain && sc == currentSwapChain && i == currentFrameSlot) { // no wait as this is the thing we're going to be commit below and // beginFrame decremented sem already and going to be signaled by endFrame continue; } - dispatch_semaphore_t sem = sc->d->sem[i]; - dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); - dispatch_semaphore_signal(sem); + sc->waitUntilCompleted(i); } } @@ -1594,10 +2521,13 @@ QRhi::FrameOpResult QRhiMetal::finish() } if (inFrame) { - if (d->ofr.active) - d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; - else - swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; + if (d->ofr.active) { + d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime; + d->ofr.cbWrapper.d->cb = d->newCommandBuffer(); + } else { + swapChainD->d->lastGpuTime[currentFrameSlot] += cb.GPUEndTime - cb.GPUStartTime; + swapChainD->cbWrapper.d->cb = d->newCommandBuffer(); + } } executeDeferredReleases(true); @@ -1659,7 +2589,6 @@ void QRhiMetal::enqueueSubresUpload(QMetalTexture *texD, void *mp, void *blitEnc int w = img.width(); int h = img.height(); int bpl = img.bytesPerLine(); - int srcOffset = 0; if (!subresDesc.sourceSize().isEmpty() || !subresDesc.sourceTopLeft().isNull()) { const int sx = subresDesc.sourceTopLeft().x(); @@ -1668,10 +2597,12 @@ void QRhiMetal::enqueueSubresUpload(QMetalTexture *texD, void *mp, void *blitEnc w = subresDesc.sourceSize().width(); h = subresDesc.sourceSize().height(); } - if (img.depth() == 32) { - memcpy(reinterpret_cast<char *>(mp) + *curOfs, img.constBits(), size_t(fullImageSizeBytes)); - srcOffset = sy * bpl + sx * 4; - // bpl remains set to the original image's row stride + if (w == img.width()) { + const int bpc = qMax(1, img.depth() / 8); + Q_ASSERT(h * img.bytesPerLine() <= fullImageSizeBytes); + memcpy(reinterpret_cast<char *>(mp) + *curOfs, + img.constBits() + sy * img.bytesPerLine() + sx * bpc, + h * img.bytesPerLine()); } else { img = img.copy(sx, sy, w, h); bpl = img.bytesPerLine(); @@ -1683,7 +2614,7 @@ void QRhiMetal::enqueueSubresUpload(QMetalTexture *texD, void *mp, void *blitEnc } [blitEnc copyFromBuffer: texD->d->stagingBuf[currentFrameSlot] - sourceOffset: NSUInteger(*curOfs + srcOffset) + sourceOffset: NSUInteger(*curOfs) sourceBytesPerRow: NSUInteger(bpl) sourceBytesPerImage: 0 sourceSize: MTLSizeMake(NSUInteger(w), NSUInteger(h), 1) @@ -1774,7 +2705,15 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates); - QRhiProfilerPrivate *rhiP = profilerPrivateOrNull(); + + id<MTLBlitCommandEncoder> blitEnc = nil; + auto ensureBlit = [&blitEnc, cbD, this]() { + if (!blitEnc) { + blitEnc = [cbD->d->cb blitCommandEncoder]; + if (debugMarkers) + [blitEnc pushDebugGroup: @"Texture upload/copy"]; + } + }; for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) { const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]); @@ -1798,25 +2737,33 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf); executeBufferHostWritesForCurrentFrame(bufD); const int idx = bufD->d->slotted ? currentFrameSlot : 0; - char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]); - if (p) { - u.result->data.resize(u.readSize); - memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize)); + if (bufD->m_type == QRhiBuffer::Dynamic) { + char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]); + if (p) { + u.result->data.resize(u.readSize); + memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize)); + } + if (u.result->completed) + u.result->completed(); + } else { + QRhiMetalData::BufferReadback readback; + readback.activeFrameSlot = idx; + readback.buf = bufD->d->buf[idx]; + readback.offset = u.offset; + readback.readSize = u.readSize; + readback.result = u.result; + d->activeBufferReadbacks.append(readback); +#ifdef Q_OS_MACOS + if (bufD->d->managed) { + // On non-Apple Silicon, manually synchronize memory from GPU to CPU + ensureBlit(); + [blitEnc synchronizeResource:readback.buf]; + } +#endif } - if (u.result->completed) - u.result->completed(); } } - id<MTLBlitCommandEncoder> blitEnc = nil; - auto ensureBlit = [&blitEnc, cbD, this] { - if (!blitEnc) { - blitEnc = [cbD->d->cb blitCommandEncoder]; - if (debugMarkers) - [blitEnc pushDebugGroup: @"Texture upload/copy"]; - } - }; - for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) { const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]); if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) { @@ -1824,7 +2771,7 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate qsizetype stagingSize = 0; for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) { for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) { - for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level])) + for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) stagingSize += subresUploadByteSize(subresDesc); } } @@ -1833,13 +2780,12 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate Q_ASSERT(!utexD->d->stagingBuf[currentFrameSlot]); utexD->d->stagingBuf[currentFrameSlot] = [d->dev newBufferWithLength: NSUInteger(stagingSize) options: MTLResourceStorageModeShared]; - QRHI_PROF_F(newTextureStagingArea(utexD, currentFrameSlot, quint32(stagingSize))); void *mp = [utexD->d->stagingBuf[currentFrameSlot] contents]; qsizetype curOfs = 0; for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) { for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) { - for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level])) + for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) enqueueSubresUpload(utexD, mp, blitEnc, layer, level, subresDesc, &curOfs); } } @@ -1852,7 +2798,6 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate e.stagingBuffer.buffer = utexD->d->stagingBuf[currentFrameSlot]; utexD->d->stagingBuf[currentFrameSlot] = nil; d->releaseQueue.append(e); - QRHI_PROF_F(releaseTextureStagingArea(utexD, currentFrameSlot)); } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Copy) { Q_ASSERT(u.src && u.dst); QMetalTexture *srcD = QRHI_RES(QMetalTexture, u.src); @@ -1914,10 +2859,6 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate textureFormatInfo(readback.format, readback.pixelSize, &bpl, &readback.bufSize, nullptr); readback.buf = [d->dev newBufferWithLength: readback.bufSize options: MTLResourceStorageModeShared]; - QRHI_PROF_F(newReadbackBuffer(qint64(qintptr(readback.buf)), - texD ? static_cast<QRhiResource *>(texD) : static_cast<QRhiResource *>(swapChainD), - readback.bufSize)); - ensureBlit(); [blitEnc copyFromTexture: src sourceSlice: NSUInteger(is3D ? 0 : u.rb.layer()) @@ -1955,17 +2896,17 @@ void QRhiMetal::executeBufferHostWritesForSlot(QMetalBuffer *bufD, int slot) return; void *p = [bufD->d->buf[slot] contents]; - int changeBegin = -1; - int changeEnd = -1; - for (const QMetalBufferData::BufferUpdate &u : qAsConst(bufD->d->pendingUpdates[slot])) { + quint32 changeBegin = UINT32_MAX; + quint32 changeEnd = 0; + for (const QMetalBufferData::BufferUpdate &u : std::as_const(bufD->d->pendingUpdates[slot])) { memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.data.size())); - if (changeBegin == -1 || u.offset < changeBegin) + if (u.offset < changeBegin) changeBegin = u.offset; - if (changeEnd == -1 || u.offset + u.data.size() > changeEnd) + if (u.offset + u.data.size() > changeEnd) changeEnd = u.offset + u.data.size(); } #ifdef Q_OS_MACOS - if (changeBegin >= 0 && bufD->d->managed) + if (changeBegin < UINT32_MAX && changeBegin < changeEnd && bufD->d->managed) [bufD->d->buf[slot] didModifyRange: NSMakeRange(NSUInteger(changeBegin), NSUInteger(changeEnd - changeBegin))]; #endif @@ -1999,8 +2940,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, QMetalRenderTargetData *rtD = nullptr; switch (rt->resourceType()) { - case QRhiResource::RenderTarget: - rtD = QRHI_RES(QMetalReferenceRenderTarget, rt)->d; + case QRhiResource::SwapChainRenderTarget: + rtD = QRHI_RES(QMetalSwapChainRenderTarget, rt)->d; cbD->d->currentPassRpDesc = d->createDefaultRenderPass(rtD->dsAttCount, colorClearValue, depthStencilClearValue, rtD->colorAttCount); if (rtD->colorAttCount) { QMetalRenderTargetData::ColorAtt &color0(rtD->fb.colorAtt[0]); @@ -2030,29 +2971,42 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, { QMetalTextureRenderTarget *rtTex = QRHI_RES(QMetalTextureRenderTarget, rt); rtD = rtTex->d; + if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QMetalTexture, QMetalRenderBuffer>(rtTex->description(), rtD->currentResIdList)) + rtTex->create(); cbD->d->currentPassRpDesc = d->createDefaultRenderPass(rtD->dsAttCount, colorClearValue, depthStencilClearValue, rtD->colorAttCount); - if (rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents)) { + if (rtD->fb.preserveColor) { for (uint i = 0; i < uint(rtD->colorAttCount); ++i) cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad; } - if (rtD->dsAttCount && rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents)) { + if (rtD->dsAttCount && rtD->fb.preserveDs) { cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad; cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad; } + int colorAttCount = 0; for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments(); it != itEnd; ++it) { - if (it->texture()) + colorAttCount += 1; + if (it->texture()) { QRHI_RES(QMetalTexture, it->texture())->lastActiveFrameSlot = currentFrameSlot; - else if (it->renderBuffer()) + if (it->multiViewCount() >= 2) + cbD->d->currentPassRpDesc.renderTargetArrayLength = NSUInteger(it->multiViewCount()); + } else if (it->renderBuffer()) { QRHI_RES(QMetalRenderBuffer, it->renderBuffer())->lastActiveFrameSlot = currentFrameSlot; + } if (it->resolveTexture()) QRHI_RES(QMetalTexture, it->resolveTexture())->lastActiveFrameSlot = currentFrameSlot; } if (rtTex->m_desc.depthStencilBuffer()) QRHI_RES(QMetalRenderBuffer, rtTex->m_desc.depthStencilBuffer())->lastActiveFrameSlot = currentFrameSlot; - if (rtTex->m_desc.depthTexture()) - QRHI_RES(QMetalTexture, rtTex->m_desc.depthTexture())->lastActiveFrameSlot = currentFrameSlot; + if (rtTex->m_desc.depthTexture()) { + QMetalTexture *depthTexture = QRHI_RES(QMetalTexture, rtTex->m_desc.depthTexture()); + depthTexture->lastActiveFrameSlot = currentFrameSlot; + if (colorAttCount == 0 && depthTexture->arraySize() >= 2) + cbD->d->currentPassRpDesc.renderTargetArrayLength = NSUInteger(depthTexture->arraySize()); + } + if (rtTex->m_desc.depthResolveTexture()) + QRHI_RES(QMetalTexture, rtTex->m_desc.depthResolveTexture())->lastActiveFrameSlot = currentFrameSlot; } break; default: @@ -2066,7 +3020,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, cbD->d->currentPassRpDesc.colorAttachments[i].depthPlane = NSUInteger(rtD->fb.colorAtt[i].slice); cbD->d->currentPassRpDesc.colorAttachments[i].level = NSUInteger(rtD->fb.colorAtt[i].level); if (rtD->fb.colorAtt[i].resolveTex) { - cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = MTLStoreActionMultisampleResolve; + cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = rtD->fb.preserveColor ? MTLStoreActionStoreAndMultisampleResolve + : MTLStoreActionMultisampleResolve; cbD->d->currentPassRpDesc.colorAttachments[i].resolveTexture = rtD->fb.colorAtt[i].resolveTex; cbD->d->currentPassRpDesc.colorAttachments[i].resolveSlice = NSUInteger(rtD->fb.colorAtt[i].resolveLayer); cbD->d->currentPassRpDesc.colorAttachments[i].resolveLevel = NSUInteger(rtD->fb.colorAtt[i].resolveLevel); @@ -2079,6 +3034,15 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, cbD->d->currentPassRpDesc.stencilAttachment.texture = rtD->fb.hasStencil ? rtD->fb.dsTex : nil; if (rtD->fb.depthNeedsStore) // Depth/Stencil is set to DontCare by default, override if needed cbD->d->currentPassRpDesc.depthAttachment.storeAction = MTLStoreActionStore; + if (rtD->fb.dsResolveTex) { + cbD->d->currentPassRpDesc.depthAttachment.storeAction = rtD->fb.depthNeedsStore ? MTLStoreActionStoreAndMultisampleResolve + : MTLStoreActionMultisampleResolve; + cbD->d->currentPassRpDesc.depthAttachment.resolveTexture = rtD->fb.dsResolveTex; + if (rtD->fb.hasStencil) { + cbD->d->currentPassRpDesc.stencilAttachment.resolveTexture = rtD->fb.dsResolveTex; + cbD->d->currentPassRpDesc.stencilAttachment.storeAction = cbD->d->currentPassRpDesc.depthAttachment.storeAction; + } + } } cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc]; @@ -2136,9 +3100,9 @@ void QRhiMetal::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *p Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass); QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, ps); - if (cbD->currentComputePipeline != ps || cbD->currentPipelineGeneration != psD->generation) { + if (cbD->currentComputePipeline != psD || cbD->currentPipelineGeneration != psD->generation) { cbD->currentGraphicsPipeline = nullptr; - cbD->currentComputePipeline = ps; + cbD->currentComputePipeline = psD; cbD->currentPipelineGeneration = psD->generation; [cbD->d->currentComputePassEncoder setComputePipelineState: psD->d->ps]; @@ -2203,6 +3167,17 @@ void QRhiMetal::executeDeferredReleases(bool forced) case QRhiMetalData::DeferredReleaseEntry::StagingBuffer: [e.stagingBuffer.buffer release]; break; + case QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline: + [e.graphicsPipeline.pipelineState release]; + [e.graphicsPipeline.depthStencilState release]; + [e.graphicsPipeline.tessVertexComputeState[0] release]; + [e.graphicsPipeline.tessVertexComputeState[1] release]; + [e.graphicsPipeline.tessVertexComputeState[2] release]; + [e.graphicsPipeline.tessTessControlComputeState release]; + break; + case QRhiMetalData::DeferredReleaseEntry::ComputePipeline: + [e.computePipeline.pipelineState release]; + break; default: break; } @@ -2214,7 +3189,6 @@ void QRhiMetal::executeDeferredReleases(bool forced) void QRhiMetal::finishActiveReadbacks(bool forced) { QVarLengthArray<std::function<void()>, 4> completedCallbacks; - QRhiProfilerPrivate *rhiP = profilerPrivateOrNull(); for (int i = d->activeTextureReadbacks.count() - 1; i >= 0; --i) { const QRhiMetalData::TextureReadback &readback(d->activeTextureReadbacks[i]); @@ -2226,12 +3200,26 @@ void QRhiMetal::finishActiveReadbacks(bool forced) memcpy(readback.result->data.data(), p, readback.bufSize); [readback.buf release]; - QRHI_PROF_F(releaseReadbackBuffer(qint64(qintptr(readback.buf)))); + if (readback.result->completed) + completedCallbacks.append(readback.result->completed); + + d->activeTextureReadbacks.remove(i); + } + } + + for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) { + const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]); + if (forced || currentFrameSlot == readback.activeFrameSlot + || readback.activeFrameSlot < 0) { + readback.result->data.resize(readback.readSize); + char *p = reinterpret_cast<char *>([readback.buf contents]); + Q_ASSERT(p); + memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize)); if (readback.result->completed) completedCallbacks.append(readback.result->completed); - d->activeTextureReadbacks.removeLast(); + d->activeBufferReadbacks.remove(i); } } @@ -2239,7 +3227,7 @@ void QRhiMetal::finishActiveReadbacks(bool forced) f(); } -QMetalBuffer::QMetalBuffer(QRhiImplementation *rhi, Type type, UsageFlags usage, int size) +QMetalBuffer::QMetalBuffer(QRhiImplementation *rhi, Type type, UsageFlags usage, quint32 size) : QRhiBuffer(rhi, type, usage, size), d(new QMetalBufferData) { @@ -2271,8 +3259,6 @@ void QMetalBuffer::destroy() QRHI_RES_RHI(QRhiMetal); if (rhiD) { rhiD->d->releaseQueue.append(e); - QRHI_PROF; - QRHI_PROF_F(releaseBuffer(this)); rhiD->unregisterResource(this); } } @@ -2287,13 +3273,15 @@ bool QMetalBuffer::create() return false; } - const uint nonZeroSize = m_size <= 0 ? 256 : uint(m_size); - const uint roundedSize = m_usage.testFlag(QRhiBuffer::UniformBuffer) ? aligned<uint>(nonZeroSize, 256) : nonZeroSize; + const quint32 nonZeroSize = m_size <= 0 ? 256 : m_size; + const quint32 roundedSize = m_usage.testFlag(QRhiBuffer::UniformBuffer) ? aligned(nonZeroSize, 256u) : nonZeroSize; d->managed = false; MTLResourceOptions opts = MTLResourceStorageModeShared; + + QRHI_RES_RHI(QRhiMetal); #ifdef Q_OS_MACOS - if (m_type != Dynamic) { + if (!rhiD->caps.isAppleGPU && m_type != Dynamic) { opts = MTLResourceStorageModeManaged; d->managed = true; } @@ -2304,8 +3292,10 @@ bool QMetalBuffer::create() // Static maps to on macOS) is not safe when another frame reading from the // same buffer is still in flight. d->slotted = !m_usage.testFlag(QRhiBuffer::StorageBuffer); // except for SSBOs written in the shader + // and a special case for internal work buffers + if (int(m_usage) == WorkBufPoolUsage) + d->slotted = false; - QRHI_RES_RHI(QRhiMetal); for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { if (i == 0 || d->slotted) { d->buf[i] = [rhiD->d->dev newBufferWithLength: roundedSize options: opts]; @@ -2320,9 +3310,6 @@ bool QMetalBuffer::create() } } - QRHI_PROF; - QRHI_PROF_F(newBuffer(this, roundedSize, d->slotted ? QMTL_FRAMES_IN_FLIGHT : 1, 0)); - lastActiveFrameSlot = -1; generation += 1; rhiD->registerResource(this); @@ -2371,11 +3358,12 @@ void QMetalBuffer::endFullDynamicBufferUpdateForCurrentFrame() #endif } -static inline MTLPixelFormat toMetalTextureFormat(QRhiTexture::Format format, QRhiTexture::Flags flags, const QRhiMetalData *d) +static inline MTLPixelFormat toMetalTextureFormat(QRhiTexture::Format format, QRhiTexture::Flags flags, const QRhiMetal *d) { #ifndef Q_OS_MACOS Q_UNUSED(d); #endif + const bool srgb = flags.testFlag(QRhiTexture::sRGB); switch (format) { case QRhiTexture::RGBA8: @@ -2410,13 +3398,16 @@ static inline MTLPixelFormat toMetalTextureFormat(QRhiTexture::Format format, QR case QRhiTexture::R32F: return MTLPixelFormatR32Float; + case QRhiTexture::RGB10A2: + return MTLPixelFormatRGB10A2Unorm; + #ifdef Q_OS_MACOS case QRhiTexture::D16: return MTLPixelFormatDepth16Unorm; case QRhiTexture::D24: - return [d->dev isDepth24Stencil8PixelFormatSupported] ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float; + return [d->d->dev isDepth24Stencil8PixelFormatSupported] ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float; case QRhiTexture::D24S8: - return [d->dev isDepth24Stencil8PixelFormatSupported] ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8; + return [d->d->dev isDepth24Stencil8PixelFormatSupported] ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8; #else case QRhiTexture::D16: return MTLPixelFormatDepth32Float; @@ -2439,7 +3430,7 @@ static inline MTLPixelFormat toMetalTextureFormat(QRhiTexture::Format format, QR return MTLPixelFormatBC4_RUnorm; case QRhiTexture::BC5: qWarning("QRhiMetal does not support BC5"); - return MTLPixelFormatRGBA8Unorm; + return MTLPixelFormatInvalid; case QRhiTexture::BC6H: return MTLPixelFormatBC6H_RGBUfloat; case QRhiTexture::BC7: @@ -2453,7 +3444,7 @@ static inline MTLPixelFormat toMetalTextureFormat(QRhiTexture::Format format, QR case QRhiTexture::BC6H: case QRhiTexture::BC7: qWarning("QRhiMetal: BCx compression not supported on this platform"); - return MTLPixelFormatRGBA8Unorm; + return MTLPixelFormatInvalid; #endif #ifndef Q_OS_MACOS @@ -2494,32 +3485,129 @@ static inline MTLPixelFormat toMetalTextureFormat(QRhiTexture::Format format, QR return srgb ? MTLPixelFormatASTC_12x12_sRGB : MTLPixelFormatASTC_12x12_LDR; #else case QRhiTexture::ETC2_RGB8: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatETC2_RGB8_sRGB : MTLPixelFormatETC2_RGB8; + } + qWarning("QRhiMetal: ETC2 compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ETC2_RGB8A1: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatETC2_RGB8A1_sRGB : MTLPixelFormatETC2_RGB8A1; + } + qWarning("QRhiMetal: ETC2 compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ETC2_RGBA8: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatEAC_RGBA8_sRGB : MTLPixelFormatEAC_RGBA8; + } qWarning("QRhiMetal: ETC2 compression not supported on this platform"); - return MTLPixelFormatRGBA8Unorm; - + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_4x4: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_4x4_sRGB : MTLPixelFormatASTC_4x4_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_5x4: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_5x4_sRGB : MTLPixelFormatASTC_5x4_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_5x5: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_5x5_sRGB : MTLPixelFormatASTC_5x5_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_6x5: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_6x5_sRGB : MTLPixelFormatASTC_6x5_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_6x6: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_6x6_sRGB : MTLPixelFormatASTC_6x6_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_8x5: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_8x5_sRGB : MTLPixelFormatASTC_8x5_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_8x6: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_8x6_sRGB : MTLPixelFormatASTC_8x6_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_8x8: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_8x8_sRGB : MTLPixelFormatASTC_8x8_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_10x5: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_10x5_sRGB : MTLPixelFormatASTC_10x5_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_10x6: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_10x6_sRGB : MTLPixelFormatASTC_10x6_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_10x8: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_10x8_sRGB : MTLPixelFormatASTC_10x8_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_10x10: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_10x10_sRGB : MTLPixelFormatASTC_10x10_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_12x10: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_12x10_sRGB : MTLPixelFormatASTC_12x10_LDR; + } + qWarning("QRhiMetal: ASTC compression not supported on this platform"); + return MTLPixelFormatInvalid; case QRhiTexture::ASTC_12x12: + if (d->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) + return srgb ? MTLPixelFormatASTC_12x12_sRGB : MTLPixelFormatASTC_12x12_LDR; + } qWarning("QRhiMetal: ASTC compression not supported on this platform"); - return MTLPixelFormatRGBA8Unorm; + return MTLPixelFormatInvalid; #endif default: Q_UNREACHABLE(); - return MTLPixelFormatRGBA8Unorm; + return MTLPixelFormatInvalid; } } @@ -2552,8 +3640,6 @@ void QMetalRenderBuffer::destroy() QRHI_RES_RHI(QRhiMetal); if (rhiD) { rhiD->d->releaseQueue.append(e); - QRHI_PROF; - QRHI_PROF_F(releaseRenderBuffer(this)); rhiD->unregisterResource(this); } } @@ -2578,16 +3664,23 @@ bool QMetalRenderBuffer::create() desc.resourceOptions = MTLResourceStorageModePrivate; desc.usage = MTLTextureUsageRenderTarget; - bool transientBacking = false; switch (m_type) { case DepthStencil: #ifdef Q_OS_MACOS - desc.storageMode = MTLStorageModePrivate; - d->format = rhiD->d->dev.depth24Stencil8PixelFormatSupported - ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8; + if (rhiD->caps.isAppleGPU) { + if (@available(macOS 11.0, *)) { + desc.storageMode = MTLStorageModeMemoryless; + d->format = MTLPixelFormatDepth32Float_Stencil8; + } else { + Q_UNREACHABLE(); + } + } else { + desc.storageMode = MTLStorageModePrivate; + d->format = rhiD->d->dev.depth24Stencil8PixelFormatSupported + ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8; + } #else desc.storageMode = MTLStorageModeMemoryless; - transientBacking = true; d->format = MTLPixelFormatDepth32Float_Stencil8; #endif desc.pixelFormat = d->format; @@ -2595,7 +3688,7 @@ bool QMetalRenderBuffer::create() case Color: desc.storageMode = MTLStorageModePrivate; if (m_backingFormatHint != QRhiTexture::UnknownFormat) - d->format = toMetalTextureFormat(m_backingFormatHint, {}, rhiD->d); + d->format = toMetalTextureFormat(m_backingFormatHint, {}, rhiD); else d->format = MTLPixelFormatRGBA8Unorm; desc.pixelFormat = d->format; @@ -2611,9 +3704,6 @@ bool QMetalRenderBuffer::create() if (!m_objectName.isEmpty()) d->tex.label = [NSString stringWithUTF8String: m_objectName.constData()]; - QRHI_PROF; - QRHI_PROF_F(newRenderBuffer(this, transientBacking, false, samples)); - lastActiveFrameSlot = -1; generation += 1; rhiD->registerResource(this); @@ -2671,8 +3761,6 @@ void QMetalTexture::destroy() QRHI_RES_RHI(QRhiMetal); if (rhiD) { rhiD->d->releaseQueue.append(e); - QRHI_PROF; - QRHI_PROF_F(releaseTexture(this)); rhiD->unregisterResource(this); } } @@ -2682,14 +3770,17 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize) if (d->tex) destroy(); - const QSize size = m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize; const bool isCube = m_flags.testFlag(CubeMap); const bool is3D = m_flags.testFlag(ThreeDimensional); const bool isArray = m_flags.testFlag(TextureArray); const bool hasMipMaps = m_flags.testFlag(MipMapped); + const bool is1D = m_flags.testFlag(OneDimensional); + + const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1) + : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize); QRHI_RES_RHI(QRhiMetal); - d->format = toMetalTextureFormat(m_format, m_flags, rhiD->d); + d->format = toMetalTextureFormat(m_format, m_flags, rhiD); mipLevelCount = hasMipMaps ? rhiD->q->mipLevelsForSize(size) : 1; samples = rhiD->effectiveSampleCount(m_sampleCount); if (samples > 1) { @@ -2714,12 +3805,18 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize) qWarning("Texture cannot be both array and 3D"); return false; } - m_depth = qMax(1, m_depth); + if (is1D && is3D) { + qWarning("Texture cannot be both 1D and 3D"); + return false; + } + if (is1D && isCube) { + qWarning("Texture cannot be both 1D and cube"); + return false; + } if (m_depth > 1 && !is3D) { qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth); return false; } - m_arraySize = qMax(0, m_arraySize); if (m_arraySize > 0 && !isArray) { qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize); return false; @@ -2746,17 +3843,20 @@ bool QMetalTexture::create() const bool isCube = m_flags.testFlag(CubeMap); const bool is3D = m_flags.testFlag(ThreeDimensional); const bool isArray = m_flags.testFlag(TextureArray); + const bool is1D = m_flags.testFlag(OneDimensional); if (isCube) { desc.textureType = MTLTextureTypeCube; } else if (is3D) { desc.textureType = MTLTextureType3D; + } else if (is1D) { + desc.textureType = isArray ? MTLTextureType1DArray : MTLTextureType1D; } else if (isArray) { #ifdef Q_OS_IOS - if (samples > 1) { - // would be available on iOS 14.0+ but cannot test for that with a 13 SDK - qWarning("Multisample 2D texture array is not supported on iOS"); + if (@available(iOS 14, *)) { + desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray; + } else { + desc.textureType = MTLTextureType2DArray; } - desc.textureType = MTLTextureType2DArray; #else desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray; #endif @@ -2766,12 +3866,12 @@ bool QMetalTexture::create() desc.pixelFormat = d->format; desc.width = NSUInteger(size.width()); desc.height = NSUInteger(size.height()); - desc.depth = is3D ? m_depth : 1; + desc.depth = is3D ? qMax(1, m_depth) : 1; desc.mipmapLevelCount = NSUInteger(mipLevelCount); if (samples > 1) desc.sampleCount = NSUInteger(samples); if (isArray) - desc.arrayLength = NSUInteger(m_arraySize); + desc.arrayLength = NSUInteger(qMax(0, m_arraySize)); desc.resourceOptions = MTLResourceStorageModePrivate; desc.storageMode = MTLStorageModePrivate; desc.usage = MTLTextureUsageShaderRead; @@ -2789,9 +3889,6 @@ bool QMetalTexture::create() d->owns = true; - QRHI_PROF; - QRHI_PROF_F(newTexture(this, true, mipLevelCount, isCube ? 6 : (isArray ? m_arraySize : 1), samples)); - lastActiveFrameSlot = -1; generation += 1; rhiD->registerResource(this); @@ -2811,9 +3908,6 @@ bool QMetalTexture::createFrom(QRhiTexture::NativeTexture src) d->owns = false; - QRHI_PROF; - QRHI_PROF_F(newTexture(this, false, mipLevelCount, m_flags.testFlag(CubeMap) ? 6 : 1, samples)); - lastActiveFrameSlot = -1; generation += 1; QRHI_RES_RHI(QRhiMetal); @@ -2836,7 +3930,8 @@ id<MTLTexture> QMetalTextureData::viewForLevel(int level) const bool isCube = q->m_flags.testFlag(QRhiTexture::CubeMap); const bool isArray = q->m_flags.testFlag(QRhiTexture::TextureArray); id<MTLTexture> view = [tex newTextureViewWithPixelFormat: format textureType: type - levels: NSMakeRange(NSUInteger(level), 1) slices: NSMakeRange(0, isCube ? 6 : (isArray ? q->m_arraySize : 1))]; + levels: NSMakeRange(NSUInteger(level), 1) + slices: NSMakeRange(0, isCube ? 6 : (isArray ? qMax(0, q->m_arraySize) : 1))]; perLevelViews[level] = view; return view; @@ -2981,7 +4076,9 @@ QMetalRenderPassDescriptor::~QMetalRenderPassDescriptor() void QMetalRenderPassDescriptor::destroy() { - // nothing to do here + QRHI_RES_RHI(QRhiMetal); + if (rhiD) + rhiD->unregisterResource(this); } bool QMetalRenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const @@ -3024,13 +4121,17 @@ void QMetalRenderPassDescriptor::updateSerializedFormat() QRhiRenderPassDescriptor *QMetalRenderPassDescriptor::newCompatibleRenderPassDescriptor() const { - QMetalRenderPassDescriptor *rp = new QMetalRenderPassDescriptor(m_rhi); - rp->colorAttachmentCount = colorAttachmentCount; - rp->hasDepthStencil = hasDepthStencil; - memcpy(rp->colorFormat, colorFormat, sizeof(colorFormat)); - rp->dsFormat = dsFormat; - rp->updateSerializedFormat(); - return rp; + QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi); + rpD->colorAttachmentCount = colorAttachmentCount; + rpD->hasDepthStencil = hasDepthStencil; + memcpy(rpD->colorFormat, colorFormat, sizeof(colorFormat)); + rpD->dsFormat = dsFormat; + + rpD->updateSerializedFormat(); + + QRHI_RES_RHI(QRhiMetal); + rhiD->registerResource(rpD, false); + return rpD; } QVector<quint32> QMetalRenderPassDescriptor::serializedFormat() const @@ -3038,34 +4139,34 @@ QVector<quint32> QMetalRenderPassDescriptor::serializedFormat() const return serializedFormatData; } -QMetalReferenceRenderTarget::QMetalReferenceRenderTarget(QRhiImplementation *rhi) - : QRhiRenderTarget(rhi), +QMetalSwapChainRenderTarget::QMetalSwapChainRenderTarget(QRhiImplementation *rhi, QRhiSwapChain *swapchain) + : QRhiSwapChainRenderTarget(rhi, swapchain), d(new QMetalRenderTargetData) { } -QMetalReferenceRenderTarget::~QMetalReferenceRenderTarget() +QMetalSwapChainRenderTarget::~QMetalSwapChainRenderTarget() { destroy(); delete d; } -void QMetalReferenceRenderTarget::destroy() +void QMetalSwapChainRenderTarget::destroy() { // nothing to do here } -QSize QMetalReferenceRenderTarget::pixelSize() const +QSize QMetalSwapChainRenderTarget::pixelSize() const { return d->pixelSize; } -float QMetalReferenceRenderTarget::devicePixelRatio() const +float QMetalSwapChainRenderTarget::devicePixelRatio() const { return d->dpr; } -int QMetalReferenceRenderTarget::sampleCount() const +int QMetalSwapChainRenderTarget::sampleCount() const { return d->sampleCount; } @@ -3086,12 +4187,14 @@ QMetalTextureRenderTarget::~QMetalTextureRenderTarget() void QMetalTextureRenderTarget::destroy() { - // nothing to do here + QRHI_RES_RHI(QRhiMetal); + if (rhiD) + rhiD->unregisterResource(this); } QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDescriptor() { - const int colorAttachmentCount = m_desc.cendColorAttachments() - m_desc.cbeginColorAttachments(); + const int colorAttachmentCount = int(m_desc.colorAttachmentCount()); QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi); rpD->colorAttachmentCount = colorAttachmentCount; rpD->hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture(); @@ -3109,14 +4212,16 @@ QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDesc rpD->dsFormat = int(QRHI_RES(QMetalRenderBuffer, m_desc.depthStencilBuffer())->d->format); rpD->updateSerializedFormat(); + + QRHI_RES_RHI(QRhiMetal); + rhiD->registerResource(rpD, false); return rpD; } bool QMetalTextureRenderTarget::create() { QRHI_RES_RHI(QRhiMetal); - const bool hasColorAttachments = m_desc.cbeginColorAttachments() != m_desc.cendColorAttachments(); - Q_ASSERT(hasColorAttachments || m_desc.depthTexture()); + Q_ASSERT(m_desc.colorAttachmentCount() > 0 || m_desc.depthTexture()); Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture()); const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture(); @@ -3160,8 +4265,9 @@ bool QMetalTextureRenderTarget::create() if (m_desc.depthTexture()) { QMetalTexture *depthTexD = QRHI_RES(QMetalTexture, m_desc.depthTexture()); d->fb.dsTex = depthTexD->d->tex; - d->fb.hasStencil = false; - d->fb.depthNeedsStore = true; + d->fb.hasStencil = rhiD->isStencilSupportingFormat(depthTexD->format()); + d->fb.depthNeedsStore = !m_flags.testFlag(DoNotStoreDepthStencilContents) && !m_desc.depthResolveTexture(); + d->fb.preserveDs = m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents); if (d->colorAttCount == 0) { d->pixelSize = depthTexD->pixelSize(); d->sampleCount = depthTexD->samples; @@ -3171,21 +4277,35 @@ bool QMetalTextureRenderTarget::create() d->fb.dsTex = depthRbD->d->tex; d->fb.hasStencil = true; d->fb.depthNeedsStore = false; + d->fb.preserveDs = false; if (d->colorAttCount == 0) { d->pixelSize = depthRbD->pixelSize(); d->sampleCount = depthRbD->samples; } } + if (m_desc.depthResolveTexture()) { + QMetalTexture *depthResolveTexD = QRHI_RES(QMetalTexture, m_desc.depthResolveTexture()); + d->fb.dsResolveTex = depthResolveTexD->d->tex; + } d->dsAttCount = 1; } else { d->dsAttCount = 0; } + if (d->colorAttCount > 0) + d->fb.preserveColor = m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents); + + QRhiRenderTargetAttachmentTracker::updateResIdList<QMetalTexture, QMetalRenderBuffer>(m_desc, &d->currentResIdList); + + rhiD->registerResource(this, false); return true; } QSize QMetalTextureRenderTarget::pixelSize() const { + if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QMetalTexture, QMetalRenderBuffer>(m_desc, d->currentResIdList)) + const_cast<QMetalTextureRenderTarget *>(this)->create(); + return d->pixelSize; } @@ -3213,6 +4333,10 @@ void QMetalShaderResourceBindings::destroy() { sortedBindings.clear(); maxBinding = -1; + + QRHI_RES_RHI(QRhiMetal); + if (rhiD) + rhiD->unregisterResource(this); } bool QMetalShaderResourceBindings::create() @@ -3227,13 +4351,9 @@ bool QMetalShaderResourceBindings::create() rhiD->updateLayoutDesc(this); std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings)); - std::sort(sortedBindings.begin(), sortedBindings.end(), - [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b) - { - return a.data()->binding < b.data()->binding; - }); + std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan); if (!sortedBindings.isEmpty()) - maxBinding = sortedBindings.last().data()->binding; + maxBinding = QRhiImplementation::shaderResourceBindingData(sortedBindings.last())->binding; else maxBinding = -1; @@ -3243,6 +4363,7 @@ bool QMetalShaderResourceBindings::create() memset(&bd, 0, sizeof(BoundResourceData)); generation += 1; + rhiD->registerResource(this, false); return true; } @@ -3250,13 +4371,8 @@ void QMetalShaderResourceBindings::updateResources(UpdateFlags flags) { sortedBindings.clear(); std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings)); - if (!flags.testFlag(BindingsAreSorted)) { - std::sort(sortedBindings.begin(), sortedBindings.end(), - [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b) - { - return a.data()->binding < b.data()->binding; - }); - } + if (!flags.testFlag(BindingsAreSorted)) + std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan); for (BoundResourceData &bd : boundResourceData) memset(&bd, 0, sizeof(BoundResourceData)); @@ -3268,6 +4384,8 @@ QMetalGraphicsPipeline::QMetalGraphicsPipeline(QRhiImplementation *rhi) : QRhiGraphicsPipeline(rhi), d(new QMetalGraphicsPipelineData) { + d->q = this; + d->tess.q = d; } QMetalGraphicsPipeline::~QMetalGraphicsPipeline() @@ -3281,18 +4399,45 @@ void QMetalGraphicsPipeline::destroy() d->vs.destroy(); d->fs.destroy(); - [d->ds release]; - d->ds = nil; + d->tess.compVs[0].destroy(); + d->tess.compVs[1].destroy(); + d->tess.compVs[2].destroy(); - if (!d->ps) + d->tess.compTesc.destroy(); + d->tess.vertTese.destroy(); + + qDeleteAll(d->extraBufMgr.deviceLocalWorkBuffers); + d->extraBufMgr.deviceLocalWorkBuffers.clear(); + qDeleteAll(d->extraBufMgr.hostVisibleWorkBuffers); + d->extraBufMgr.hostVisibleWorkBuffers.clear(); + + delete d->bufferSizeBuffer; + d->bufferSizeBuffer = nullptr; + + if (!d->ps && !d->ds + && !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2] + && !d->tess.tessControlComputeState) + { return; + } - [d->ps release]; + QRhiMetalData::DeferredReleaseEntry e; + e.type = QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline; + e.lastActiveFrameSlot = lastActiveFrameSlot; + e.graphicsPipeline.pipelineState = d->ps; + e.graphicsPipeline.depthStencilState = d->ds; + e.graphicsPipeline.tessVertexComputeState = d->tess.vertexComputeState; + e.graphicsPipeline.tessTessControlComputeState = d->tess.tessControlComputeState; d->ps = nil; + d->ds = nil; + d->tess.vertexComputeState = {}; + d->tess.tessControlComputeState = nil; QRHI_RES_RHI(QRhiMetal); - if (rhiD) + if (rhiD) { + rhiD->d->releaseQueue.append(e); rhiD->unregisterResource(this); + } } static inline MTLVertexFormat toMetalAttributeFormat(QRhiVertexInputAttribute::Format format) @@ -3328,6 +4473,30 @@ static inline MTLVertexFormat toMetalAttributeFormat(QRhiVertexInputAttribute::F return MTLVertexFormatInt2; case QRhiVertexInputAttribute::SInt: return MTLVertexFormatInt; + case QRhiVertexInputAttribute::Half4: + return MTLVertexFormatHalf4; + case QRhiVertexInputAttribute::Half3: + return MTLVertexFormatHalf3; + case QRhiVertexInputAttribute::Half2: + return MTLVertexFormatHalf2; + case QRhiVertexInputAttribute::Half: + return MTLVertexFormatHalf; + case QRhiVertexInputAttribute::UShort4: + return MTLVertexFormatUShort4; + case QRhiVertexInputAttribute::UShort3: + return MTLVertexFormatUShort3; + case QRhiVertexInputAttribute::UShort2: + return MTLVertexFormatUShort2; + case QRhiVertexInputAttribute::UShort: + return MTLVertexFormatUShort; + case QRhiVertexInputAttribute::SShort4: + return MTLVertexFormatShort4; + case QRhiVertexInputAttribute::SShort3: + return MTLVertexFormatShort3; + case QRhiVertexInputAttribute::SShort2: + return MTLVertexFormatShort2; + case QRhiVertexInputAttribute::SShort: + return MTLVertexFormatShort; default: Q_UNREACHABLE(); return MTLVertexFormatFloat4; @@ -3483,6 +4652,24 @@ static inline MTLPrimitiveType toMetalPrimitiveType(QRhiGraphicsPipeline::Topolo } } +static inline MTLPrimitiveTopologyClass toMetalPrimitiveTopologyClass(QRhiGraphicsPipeline::Topology t) +{ + switch (t) { + case QRhiGraphicsPipeline::Triangles: + case QRhiGraphicsPipeline::TriangleStrip: + case QRhiGraphicsPipeline::TriangleFan: + return MTLPrimitiveTopologyClassTriangle; + case QRhiGraphicsPipeline::Lines: + case QRhiGraphicsPipeline::LineStrip: + return MTLPrimitiveTopologyClassLine; + case QRhiGraphicsPipeline::Points: + return MTLPrimitiveTopologyClassPoint; + default: + Q_UNREACHABLE(); + return MTLPrimitiveTopologyClassTriangle; + } +} + static inline MTLCullMode toMetalCullMode(QRhiGraphicsPipeline::CullMode c) { switch (c) { @@ -3498,15 +4685,80 @@ static inline MTLCullMode toMetalCullMode(QRhiGraphicsPipeline::CullMode c) } } +static inline MTLTriangleFillMode toMetalTriangleFillMode(QRhiGraphicsPipeline::PolygonMode mode) +{ + switch (mode) { + case QRhiGraphicsPipeline::Fill: + return MTLTriangleFillModeFill; + case QRhiGraphicsPipeline::Line: + return MTLTriangleFillModeLines; + default: + Q_UNREACHABLE(); + return MTLTriangleFillModeFill; + } +} + +static inline MTLWinding toMetalTessellationWindingOrder(QShaderDescription::TessellationWindingOrder w) +{ + switch (w) { + case QShaderDescription::CwTessellationWindingOrder: + return MTLWindingClockwise; + case QShaderDescription::CcwTessellationWindingOrder: + return MTLWindingCounterClockwise; + default: + // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown + return MTLWindingCounterClockwise; + } +} + +static inline MTLTessellationPartitionMode toMetalTessellationPartitionMode(QShaderDescription::TessellationPartitioning p) +{ + switch (p) { + case QShaderDescription::EqualTessellationPartitioning: + return MTLTessellationPartitionModePow2; + case QShaderDescription::FractionalEvenTessellationPartitioning: + return MTLTessellationPartitionModeFractionalEven; + case QShaderDescription::FractionalOddTessellationPartitioning: + return MTLTessellationPartitionModeFractionalOdd; + default: + // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown + return MTLTessellationPartitionModePow2; + } +} + +static inline MTLLanguageVersion toMetalLanguageVersion(const QShaderVersion &version) +{ + int v = version.version(); + return MTLLanguageVersion(((v / 10) << 16) + (v % 10)); +} + id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Variant shaderVariant, QString *error, QByteArray *entryPoint, QShaderKey *activeKey) { - QShaderKey key = { QShader::MetalLibShader, 20, shaderVariant }; - QShaderCode mtllib = shader.shader(key); - if (mtllib.shader().isEmpty()) { - key.setSourceVersion(12); - mtllib = shader.shader(key); + QVarLengthArray<int, 8> versions; + if (@available(macOS 13, iOS 16, *)) + versions << 30; + if (@available(macOS 12, iOS 15, *)) + versions << 24; + if (@available(macOS 11, iOS 14, *)) + versions << 23; + if (@available(macOS 10.15, iOS 13, *)) + versions << 22; + if (@available(macOS 10.14, iOS 12, *)) + versions << 21; + versions << 20 << 12; + + const QList<QShaderKey> shaders = shader.availableShaders(); + + QShaderKey key; + + for (const int &version : versions) { + key = { QShader::Source::MetalLibShader, version, shaderVariant }; + if (shaders.contains(key)) + break; } + + QShaderCode mtllib = shader.shader(key); if (!mtllib.shader().isEmpty()) { dispatch_data_t data = dispatch_data_create(mtllib.shader().constData(), size_t(mtllib.shader().size()), @@ -3525,12 +4777,13 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var } } - key = { QShader::MslShader, 20, shaderVariant }; - QShaderCode mslSource = shader.shader(key); - if (mslSource.shader().isEmpty()) { - key.setSourceVersion(12); - mslSource = shader.shader(key); + for (const int &version : versions) { + key = { QShader::Source::MslShader, version, shaderVariant }; + if (shaders.contains(key)) + break; } + + QShaderCode mslSource = shader.shader(key); if (mslSource.shader().isEmpty()) { qWarning() << "No MSL 2.0 or 1.2 code found in baked shader" << shader; return nil; @@ -3538,7 +4791,7 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var NSString *src = [NSString stringWithUTF8String: mslSource.shader().constData()]; MTLCompileOptions *opts = [[MTLCompileOptions alloc] init]; - opts.languageVersion = key.sourceVersion() == 20 ? MTLLanguageVersion2_0 : MTLLanguageVersion1_2; + opts.languageVersion = toMetalLanguageVersion(key.sourceVersion()); NSError *err = nil; id<MTLLibrary> lib = [dev newLibraryWithSource: src options: opts error: &err]; [opts release]; @@ -3560,55 +4813,192 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var id<MTLFunction> QRhiMetalData::createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint) { - NSString *name = [NSString stringWithUTF8String: entryPoint.constData()]; - id<MTLFunction> f = [lib newFunctionWithName: name]; - [name release]; - return f; + return [lib newFunctionWithName:[NSString stringWithUTF8String:entryPoint.constData()]]; } -bool QMetalGraphicsPipeline::create() +void QMetalGraphicsPipeline::setupAttachmentsInMetalRenderPassDescriptor(void *metalRpDesc, QMetalRenderPassDescriptor *rpD) { - if (d->ps) - destroy(); + MTLRenderPipelineDescriptor *rpDesc = reinterpret_cast<MTLRenderPipelineDescriptor *>(metalRpDesc); + + if (rpD->colorAttachmentCount) { + // defaults when no targetBlends are provided + rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]); + rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll; + rpDesc.colorAttachments[0].blendingEnabled = false; + + Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount + || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1)); + + for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) { + const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]); + rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]); + rpDesc.colorAttachments[i].blendingEnabled = b.enable; + rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor); + rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor); + rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor); + rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha); + rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha); + rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha); + rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite); + } + } + + if (rpD->hasDepthStencil) { + // Must only be set when a depth-stencil buffer will actually be bound, + // validation blows up otherwise. + MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat); + rpDesc.depthAttachmentPixelFormat = fmt; +#if defined(Q_OS_MACOS) + if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float) +#else + if (fmt != MTLPixelFormatDepth32Float) +#endif + rpDesc.stencilAttachmentPixelFormat = fmt; + } QRHI_RES_RHI(QRhiMetal); - if (!rhiD->sanityCheckGraphicsPipeline(this)) - return false; + rpDesc.rasterSampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount)); +} + +void QMetalGraphicsPipeline::setupMetalDepthStencilDescriptor(void *metalDsDesc) +{ + MTLDepthStencilDescriptor *dsDesc = reinterpret_cast<MTLDepthStencilDescriptor *>(metalDsDesc); + + dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways; + dsDesc.depthWriteEnabled = m_depthWrite; + if (m_stencilTest) { + dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init]; + dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp); + dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp); + dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp); + dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp); + dsDesc.frontFaceStencil.readMask = m_stencilReadMask; + dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask; + + dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init]; + dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp); + dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp); + dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp); + dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp); + dsDesc.backFaceStencil.readMask = m_stencilReadMask; + dsDesc.backFaceStencil.writeMask = m_stencilWriteMask; + } +} +void QMetalGraphicsPipeline::mapStates() +{ + d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise; + d->cullMode = toMetalCullMode(m_cullMode); + d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode); + d->depthBias = float(m_depthBias); + d->slopeScaledDepthBias = m_slopeScaledDepthBias; +} + +void QMetalGraphicsPipelineData::setupVertexInputDescriptor(MTLVertexDescriptor *desc) +{ // same binding space for vertex and constant buffers - work it around - const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, m_shaderResourceBindings)->maxBinding + 1; + // should be in native resource binding not SPIR-V, but this will work anyway + const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1; - MTLVertexDescriptor *inputLayout = [MTLVertexDescriptor vertexDescriptor]; - for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes(); + QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout(); + for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes(); it != itEnd; ++it) { const uint loc = uint(it->location()); - inputLayout.attributes[loc].format = toMetalAttributeFormat(it->format()); - inputLayout.attributes[loc].offset = NSUInteger(it->offset()); - inputLayout.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding()); + desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format())); + desc.attributes[loc].offset = NSUInteger(it->offset()); + desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding()); } int bindingIndex = 0; - for (auto it = m_vertexInputLayout.cbeginBindings(), itEnd = m_vertexInputLayout.cendBindings(); + const NSUInteger viewCount = qMax<NSUInteger>(1, q->multiViewCount()); + for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings(); it != itEnd; ++it, ++bindingIndex) { const uint layoutIdx = uint(firstVertexBinding + bindingIndex); - inputLayout.layouts[layoutIdx].stepFunction = - it->classification() == QRhiVertexInputBinding::PerInstance - ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex; - inputLayout.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate()); - inputLayout.layouts[layoutIdx].stride = it->stride(); + desc.layouts[layoutIdx].stepFunction = + it->classification() == QRhiVertexInputBinding::PerInstance + ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex; + desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate()); + if (desc.layouts[layoutIdx].stepFunction == MTLVertexStepFunctionPerInstance) + desc.layouts[layoutIdx].stepRate *= viewCount; + desc.layouts[layoutIdx].stride = it->stride(); } +} - MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init]; +void QMetalGraphicsPipelineData::setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc) +{ + // same binding space for vertex and constant buffers - work it around + // should be in native resource binding not SPIR-V, but this will work anyway + const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1; - rpDesc.vertexDescriptor = inputLayout; + QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout(); + for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes(); + it != itEnd; ++it) + { + const uint loc = uint(it->location()); + desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format())); + desc.attributes[loc].offset = NSUInteger(it->offset()); + desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding()); + } + int bindingIndex = 0; + for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings(); + it != itEnd; ++it, ++bindingIndex) + { + const uint layoutIdx = uint(firstVertexBinding + bindingIndex); + if (desc.indexBufferIndex) { + desc.layouts[layoutIdx].stepFunction = + it->classification() == QRhiVertexInputBinding::PerInstance + ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridXIndexed; + } else { + desc.layouts[layoutIdx].stepFunction = + it->classification() == QRhiVertexInputBinding::PerInstance + ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridX; + } + desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate()); + desc.layouts[layoutIdx].stride = it->stride(); + } +} - // mutability cannot be determined (slotted buffers could be set as +void QRhiMetalData::trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil]; + rpDesc.binaryArchives = binArchArray; + } + } +} + +void QRhiMetalData::addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSError *err = nil; + if (![binArch addRenderPipelineFunctionsWithDescriptor: rpDesc error: &err]) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to collect render pipeline functions to binary archive: %s", qPrintable(msg)); + } + } + } +} + +bool QMetalGraphicsPipeline::createVertexFragmentPipeline() +{ + QRHI_RES_RHI(QRhiMetal); + + MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor]; + d->setupVertexInputDescriptor(vertexDesc); + + MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init]; + rpDesc.vertexDescriptor = vertexDesc; + + // Mutability cannot be determined (slotted buffers could be set as // MTLMutabilityImmutable, but then we potentially need a different // descriptor for each buffer combination as this depends on the actual - // buffers not just the resource binding layout) so leave it at the default + // buffers not just the resource binding layout), so leave + // rpDesc.vertex/fragmentBuffers at the defaults. - for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) { + for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { auto cacheIt = rhiD->d->shaderCache.constFind(shaderStage); if (cacheIt != rhiD->d->shaderCache.constEnd()) { switch (shaderStage.type()) { @@ -3654,8 +5044,9 @@ bool QMetalGraphicsPipeline::create() case QRhiShaderStage::Vertex: d->vs.lib = lib; d->vs.func = func; - if (const QShader::NativeResourceBindingMap *map = shader.nativeResourceBindingMap(activeKey)) - d->vs.nativeResourceBindingMap = *map; + d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->vs.desc = shader.description(); + d->vs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); rhiD->d->shaderCache.insert(shaderStage, d->vs); [d->vs.lib retain]; [d->vs.func retain]; @@ -3664,8 +5055,9 @@ bool QMetalGraphicsPipeline::create() case QRhiShaderStage::Fragment: d->fs.lib = lib; d->fs.func = func; - if (const QShader::NativeResourceBindingMap *map = shader.nativeResourceBindingMap(activeKey)) - d->fs.nativeResourceBindingMap = *map; + d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->fs.desc = shader.description(); + d->fs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); rhiD->d->shaderCache.insert(shaderStage, d->fs); [d->fs.lib retain]; [d->fs.func retain]; @@ -3680,85 +5072,838 @@ bool QMetalGraphicsPipeline::create() } QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, m_renderPassDesc); + setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD); - if (rpD->colorAttachmentCount) { - // defaults when no targetBlends are provided - rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]); - rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll; - rpDesc.colorAttachments[0].blendingEnabled = false; + if (m_multiViewCount >= 2) + rpDesc.inputPrimitiveTopology = toMetalPrimitiveTopologyClass(m_topology); - Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount - || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1)); + rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc); - for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) { - const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]); - rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]); - rpDesc.colorAttachments[i].blendingEnabled = b.enable; - rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor); - rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor); - rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor); - rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha); - rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha); - rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha); - rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite); + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addRenderPipelineToBinaryArchive(rpDesc); + + NSError *err = nil; + d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err]; + [rpDesc release]; + if (!d->ps) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to create render pipeline state: %s", qPrintable(msg)); + return false; + } + + MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init]; + setupMetalDepthStencilDescriptor(dsDesc); + d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc]; + [dsDesc release]; + + d->primitiveType = toMetalPrimitiveType(m_topology); + mapStates(); + + return true; +} + +int QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(QShader::Variant vertexCompVariant) +{ + switch (vertexCompVariant) { + case QShader::NonIndexedVertexAsComputeShader: + return 0; + case QShader::UInt32IndexedVertexAsComputeShader: + return 1; + case QShader::UInt16IndexedVertexAsComputeShader: + return 2; + default: + break; + } + return -1; +} + +id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant) +{ + const int varIndex = vsCompVariantToIndex(vertexCompVariant); + if (varIndex >= 0 && vertexComputeState[varIndex]) + return vertexComputeState[varIndex]; + + id<MTLFunction> func = nil; + if (varIndex >= 0) + func = compVs[varIndex].func; + + if (!func) { + qWarning("No compute function found for vertex shader translated for tessellation, this should not happen"); + return nil; + } + + const QMap<int, int> &ebb(compVs[varIndex].nativeShaderInfo.extraBufferBindings); + const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1); + + MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new]; + cpDesc.computeFunction = func; + cpDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = YES; + cpDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor]; + if (indexBufferBinding >= 0) { + if (vertexCompVariant == QShader::UInt32IndexedVertexAsComputeShader) { + cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32; + cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding; + } else if (vertexCompVariant == QShader::UInt16IndexedVertexAsComputeShader) { + cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16; + cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding; } } + q->setupStageInputDescriptor(cpDesc.stageInputDescriptor); - if (rpD->hasDepthStencil) { - // Must only be set when a depth-stencil buffer will actually be bound, - // validation blows up otherwise. - MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat); - rpDesc.depthAttachmentPixelFormat = fmt; -#ifdef Q_OS_MACOS - if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float) -#else - if (fmt != MTLPixelFormatDepth32Float) -#endif - rpDesc.stencilAttachmentPixelFormat = fmt; + rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addComputePipelineToBinaryArchive(cpDesc); + + NSError *err = nil; + id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc + options: MTLPipelineOptionNone + reflection: nil + error: &err]; + [cpDesc release]; + if (!ps) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to create compute pipeline state: %s", qPrintable(msg)); + } else { + vertexComputeState[varIndex] = ps; } + // not retained, the only owner is vertexComputeState and so the QRhiGraphicsPipeline + return ps; +} + +id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::tescCompPipeline(QRhiMetal *rhiD) +{ + if (tessControlComputeState) + return tessControlComputeState; + + MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new]; + cpDesc.computeFunction = compTesc.func; - rpDesc.sampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount)); + rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addComputePipelineToBinaryArchive(cpDesc); NSError *err = nil; - d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err]; - if (!d->ps) { + id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc + options: MTLPipelineOptionNone + reflection: nil + error: &err]; + [cpDesc release]; + if (!ps) { const QString msg = QString::fromNSString(err.localizedDescription); - qWarning("Failed to create render pipeline state: %s", qPrintable(msg)); - [rpDesc release]; - return false; + qWarning("Failed to create compute pipeline state: %s", qPrintable(msg)); + } else { + tessControlComputeState = ps; + } + // not retained, the only owner is tessControlComputeState and so the QRhiGraphicsPipeline + return ps; +} + +static inline bool indexTaken(quint32 index, quint64 indices) +{ + return (indices >> index) & 0x1; +} + +static inline void takeIndex(quint32 index, quint64 &indices) +{ + indices |= 1 << index; +} + +static inline int nextAttributeIndex(quint64 indices) +{ + // Maximum number of vertex attributes per vertex descriptor. There does + // not appear to be a way to query this from the implementation. + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf indicates + // that all GPU families have a value of 31. + static const int maxVertexAttributes = 31; + + for (int index = 0; index < maxVertexAttributes; ++index) { + if (!indexTaken(index, indices)) + return index; + } + + Q_UNREACHABLE_RETURN(-1); +} + +static inline int aligned(quint32 offset, quint32 alignment) +{ + return ((offset + alignment - 1) / alignment) * alignment; +} + +template<typename T> +static void addUnusedVertexAttribute(const T &variable, QRhiMetal *rhiD, quint32 &offset, quint32 &vertexAlignment) +{ + + int elements = 1; + for (const int dim : variable.arrayDims) + elements *= dim; + + if (variable.type == QShaderDescription::VariableType::Struct) { + for (int element = 0; element < elements; ++element) { + for (const auto &member : variable.structMembers) { + addUnusedVertexAttribute(member, rhiD, offset, vertexAlignment); + } + } + } else { + const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type); + const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format); + + // MSL specification 3.0 says alignment = size for non packed scalars and vectors + const quint32 alignment = size; + vertexAlignment = std::max(vertexAlignment, alignment); + + for (int element = 0; element < elements; ++element) { + // adjust alignment + offset = aligned(offset, alignment); + offset += size; + } + } +} + +template<typename T> +static void addVertexAttribute(const T &variable, int binding, QRhiMetal *rhiD, int &index, quint32 &offset, MTLVertexAttributeDescriptorArray *attributes, quint64 &indices, quint32 &vertexAlignment) +{ + + int elements = 1; + for (const int dim : variable.arrayDims) + elements *= dim; + + if (variable.type == QShaderDescription::VariableType::Struct) { + for (int element = 0; element < elements; ++element) { + for (const auto &member : variable.structMembers) { + addVertexAttribute(member, binding, rhiD, index, offset, attributes, indices, vertexAlignment); + } + } + } else { + const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type); + const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format); + + // MSL specification 3.0 says alignment = size for non packed scalars and vectors + const quint32 alignment = size; + vertexAlignment = std::max(vertexAlignment, alignment); + + for (int element = 0; element < elements; ++element) { + Q_ASSERT(!indexTaken(index, indices)); + + // adjust alignment + offset = aligned(offset, alignment); + + attributes[index].bufferIndex = binding; + attributes[index].format = toMetalAttributeFormat(format); + attributes[index].offset = offset; + + takeIndex(index, indices); + index++; + if (indexTaken(index, indices)) + index = nextAttributeIndex(indices); + + offset += size; + } + } +} + +static inline bool matches(const QList<QShaderDescription::BlockVariable> &a, const QList<QShaderDescription::BlockVariable> &b) +{ + if (a.size() == b.size()) { + bool match = true; + for (int i = 0; i < a.size() && match; ++i) { + match &= a[i].type == b[i].type + && a[i].arrayDims == b[i].arrayDims + && matches(a[i].structMembers, b[i].structMembers); + } + return match; + } + + return false; +} + +static inline bool matches(const QShaderDescription::InOutVariable &a, const QShaderDescription::InOutVariable &b) +{ + return a.location == b.location + && a.type == b.type + && a.perPatch == b.perPatch + && matches(a.structMembers, b.structMembers); +} + +// +// Create the tessellation evaluation render pipeline state +// +// The tesc runs as a compute shader in a compute pipeline and writes per patch and per patch +// control point data into separate storage buffers. The tese runs as a vertex shader in a render +// pipeline. Our task is to generate a render pipeline descriptor for the tese that pulls vertices +// from these buffers. +// +// As the buffers we are pulling vertices from are written by a compute pipeline, they follow the +// MSL alignment conventions which we must take into account when generating our +// MTLVertexDescriptor. We must include the user defined tese input attributes, and any builtins +// that were used. +// +// SPIRV-Cross generates the MSL tese shader code with input attribute indices that reflect the +// specified GLSL locations. Interface blocks are flattened with each member having an incremented +// attribute index. SPIRV-Cross reports an error on compilation if there are clashes in the index +// address space. +// +// After the user specified attributes are processed, SPIRV-Cross places the in-use builtins at the +// next available (lowest value) attribute index. Tese builtins are processed in the following +// order: +// +// in gl_PerVertex +// { +// vec4 gl_Position; +// float gl_PointSize; +// float gl_ClipDistance[]; +// }; +// +// patch in float gl_TessLevelOuter[4]; +// patch in float gl_TessLevelInner[2]; +// +// Enumerations in QShaderDescription::BuiltinType are defined in this order. +// +// For quads, SPIRV-Cross places MTLQuadTessellationFactorsHalf per patch in the tessellation +// factor buffer. For triangles it uses MTLTriangleTessellationFactorsHalf. +// +// It should be noted that SPIRV-Cross handles the following builtin inputs internally, with no +// host side support required. +// +// in vec3 gl_TessCoord; +// in int gl_PatchVerticesIn; +// in int gl_PrimitiveID; +// +id<MTLRenderPipelineState> QMetalGraphicsPipelineData::Tessellation::teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline) +{ + if (pipeline->d->ps) + return pipeline->d->ps; + + MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init]; + MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor]; + + // tesc output buffers + const QMap<int, int> &ebb(compTesc.nativeShaderInfo.extraBufferBindings); + const int tescOutputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int tescPatchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1); + const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1); + quint32 offsetInTescOutput = 0; + quint32 offsetInTescPatchOutput = 0; + quint32 offsetInTessFactorBuffer = 0; + quint32 tescOutputAlignment = 0; + quint32 tescPatchOutputAlignment = 0; + quint32 tessFactorAlignment = 0; + QSet<int> usedBuffers; + + // tesc output variables in ascending location order + QMap<int, QShaderDescription::InOutVariable> tescOutVars; + for (const auto &tescOutVar : compTesc.desc.outputVariables()) + tescOutVars[tescOutVar.location] = tescOutVar; + + // tese input variables in ascending location order + QMap<int, QShaderDescription::InOutVariable> teseInVars; + for (const auto &teseInVar : vertTese.desc.inputVariables()) + teseInVars[teseInVar.location] = teseInVar; + + // bit mask tracking usage of vertex attribute indices + quint64 indices = 0; + + for (QShaderDescription::InOutVariable &tescOutVar : tescOutVars) { + + int index = tescOutVar.location; + int binding = -1; + quint32 *offset = nullptr; + quint32 *alignment = nullptr; + + if (tescOutVar.perPatch) { + binding = tescPatchOutputBufferBinding; + offset = &offsetInTescPatchOutput; + alignment = &tescPatchOutputAlignment; + } else { + tescOutVar.arrayDims.removeLast(); + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + } + + if (teseInVars.contains(index)) { + + if (!matches(teseInVars[index], tescOutVar)) { + qWarning() << "mismatched tessellation control output -> tesssellation evaluation input at location" << index; + qWarning() << " tesc out:" << tescOutVar; + qWarning() << " tese in:" << teseInVars[index]; + } + + if (binding != -1) { + addVertexAttribute(tescOutVar, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment); + usedBuffers << binding; + } else { + qWarning() << "baked tessellation control shader missing output buffer binding information"; + addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment); + } + + } else { + qWarning() << "missing tessellation evaluation input for tessellation control output:" << tescOutVar; + addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment); + } + + teseInVars.remove(tescOutVar.location); + } + + for (const QShaderDescription::InOutVariable &teseInVar : teseInVars) + qWarning() << "missing tessellation control output for tessellation evaluation input:" << teseInVar; + + // tesc output builtins in ascending location order + QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> tescOutBuiltins; + for (const auto &tescOutBuiltin : compTesc.desc.outputBuiltinVariables()) + tescOutBuiltins[tescOutBuiltin.type] = tescOutBuiltin; + + // tese input builtins in ascending location order + QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> teseInBuiltins; + for (const auto &teseInBuiltin : vertTese.desc.inputBuiltinVariables()) + teseInBuiltins[teseInBuiltin.type] = teseInBuiltin; + + const bool trianglesMode = vertTese.desc.tessellationMode() == QShaderDescription::TrianglesTessellationMode; + bool tessLevelAdded = false; + + for (const QShaderDescription::BuiltinVariable &builtin : tescOutBuiltins) { + + QShaderDescription::InOutVariable variable; + int binding = -1; + quint32 *offset = nullptr; + quint32 *alignment = nullptr; + + switch (builtin.type) { + case QShaderDescription::BuiltinType::PositionBuiltin: + variable.type = QShaderDescription::VariableType::Vec4; + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + break; + case QShaderDescription::BuiltinType::PointSizeBuiltin: + variable.type = QShaderDescription::VariableType::Float; + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + break; + case QShaderDescription::BuiltinType::ClipDistanceBuiltin: + variable.type = QShaderDescription::VariableType::Float; + variable.arrayDims = builtin.arrayDims; + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + break; + case QShaderDescription::BuiltinType::TessLevelOuterBuiltin: + variable.type = QShaderDescription::VariableType::Half4; + binding = tessFactorBufferBinding; + offset = &offsetInTessFactorBuffer; + tessLevelAdded = trianglesMode; + alignment = &tessFactorAlignment; + break; + case QShaderDescription::BuiltinType::TessLevelInnerBuiltin: + if (trianglesMode) { + if (!tessLevelAdded) { + variable.type = QShaderDescription::VariableType::Half4; + binding = tessFactorBufferBinding; + offsetInTessFactorBuffer = 0; + offset = &offsetInTessFactorBuffer; + alignment = &tessFactorAlignment; + tessLevelAdded = true; + } else { + teseInBuiltins.remove(builtin.type); + continue; + } + } else { + variable.type = QShaderDescription::VariableType::Half2; + binding = tessFactorBufferBinding; + offsetInTessFactorBuffer = 8; + offset = &offsetInTessFactorBuffer; + alignment = &tessFactorAlignment; + } + break; + default: + Q_UNREACHABLE(); + break; + } + + if (teseInBuiltins.contains(builtin.type)) { + if (binding != -1) { + int index = nextAttributeIndex(indices); + addVertexAttribute(variable, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment); + usedBuffers << binding; + } else { + qWarning() << "baked tessellation control shader missing output buffer binding information"; + addUnusedVertexAttribute(variable, rhiD, *offset, *alignment); + } + } else { + addUnusedVertexAttribute(variable, rhiD, *offset, *alignment); + } + + teseInBuiltins.remove(builtin.type); } + + for (const QShaderDescription::BuiltinVariable &builtin : teseInBuiltins) { + switch (builtin.type) { + case QShaderDescription::BuiltinType::PositionBuiltin: + case QShaderDescription::BuiltinType::PointSizeBuiltin: + case QShaderDescription::BuiltinType::ClipDistanceBuiltin: + qWarning() << "missing tessellation control output for tessellation evaluation builtin input:" << builtin; + break; + default: + break; + } + } + + if (usedBuffers.contains(tescOutputBufferBinding)) { + vertexDesc.layouts[tescOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatchControlPoint; + vertexDesc.layouts[tescOutputBufferBinding].stride = aligned(offsetInTescOutput, tescOutputAlignment); + } + + if (usedBuffers.contains(tescPatchOutputBufferBinding)) { + vertexDesc.layouts[tescPatchOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch; + vertexDesc.layouts[tescPatchOutputBufferBinding].stride = aligned(offsetInTescPatchOutput, tescPatchOutputAlignment); + } + + if (usedBuffers.contains(tessFactorBufferBinding)) { + vertexDesc.layouts[tessFactorBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch; + vertexDesc.layouts[tessFactorBufferBinding].stride = trianglesMode ? sizeof(MTLTriangleTessellationFactorsHalf) : sizeof(MTLQuadTessellationFactorsHalf); + } + + rpDesc.vertexDescriptor = vertexDesc; + rpDesc.vertexFunction = vertTese.func; + rpDesc.fragmentFunction = pipeline->d->fs.func; + + // The portable, cross-API approach is to use CCW, the results are then + // identical (assuming the applied clipSpaceCorrMatrix) for all the 3D + // APIs. The tess.eval. GLSL shader is thus expected to specify ccw. If it + // doesn't, things may not work as expected. + rpDesc.tessellationOutputWindingOrder = toMetalTessellationWindingOrder(vertTese.desc.tessellationWindingOrder()); + + rpDesc.tessellationPartitionMode = toMetalTessellationPartitionMode(vertTese.desc.tessellationPartitioning()); + + QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, pipeline->renderPassDescriptor()); + pipeline->setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD); + + rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addRenderPipelineToBinaryArchive(rpDesc); + + NSError *err = nil; + id<MTLRenderPipelineState> ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err]; [rpDesc release]; + if (!ps) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to create render pipeline state for tessellation: %s", qPrintable(msg)); + } else { + // ps is stored in the QMetalGraphicsPipelineData so the end result in this + // regard is no different from what createVertexFragmentPipeline does + pipeline->d->ps = ps; + } + return ps; +} - MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init]; - dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways; - dsDesc.depthWriteEnabled = m_depthWrite; - if (m_stencilTest) { - dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init]; - dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp); - dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp); - dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp); - dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp); - dsDesc.frontFaceStencil.readMask = m_stencilReadMask; - dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask; +QMetalBuffer *QMetalGraphicsPipelineData::ExtraBufferManager::acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type) +{ + QVector<QMetalBuffer *> *workBuffers = type == WorkBufType::DeviceLocal ? &deviceLocalWorkBuffers : &hostVisibleWorkBuffers; - dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init]; - dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp); - dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp); - dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp); - dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp); - dsDesc.backFaceStencil.readMask = m_stencilReadMask; - dsDesc.backFaceStencil.writeMask = m_stencilWriteMask; + // Check if something is reusable as-is. + for (QMetalBuffer *workBuf : *workBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == -1 && workBuf->size() >= size) { + workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot; + return workBuf; + } + } + + // Once the pool is above a certain threshold, see if there is something + // unused (but too small) and recreate that our size. + if (workBuffers->count() > QMTL_FRAMES_IN_FLIGHT * 8) { + for (QMetalBuffer *workBuf : *workBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == -1) { + workBuf->setSize(size); + if (workBuf->create()) { + workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot; + return workBuf; + } + } + } + } + + // Add a new buffer to the pool. + QMetalBuffer *buf; + if (type == WorkBufType::DeviceLocal) { + // for GPU->GPU data (non-slotted, not necessarily host writable) + buf = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size); + } else { + // for CPU->GPU (non-slotted, host writable/coherent) + buf = new QMetalBuffer(rhiD, QRhiBuffer::Dynamic, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size); + } + if (buf->create()) { + buf->lastActiveFrameSlot = rhiD->currentFrameSlot; + workBuffers->append(buf); + return buf; + } + + qWarning("Failed to acquire work buffer of size %u", size); + return nullptr; +} + +bool QMetalGraphicsPipeline::createTessellationPipelines(const QShader &tessVert, const QShader &tesc, const QShader &tese, const QShader &tessFrag) +{ + QRHI_RES_RHI(QRhiMetal); + QString error; + QByteArray entryPoint; + QShaderKey activeKey; + + const QShaderDescription tescDesc = tesc.description(); + const QShaderDescription teseDesc = tese.description(); + d->tess.inControlPointCount = uint(m_patchControlPointCount); + d->tess.outControlPointCount = tescDesc.tessellationOutputVertexCount(); + if (!d->tess.outControlPointCount) + d->tess.outControlPointCount = teseDesc.tessellationOutputVertexCount(); + + if (!d->tess.outControlPointCount) { + qWarning("Failed to determine output vertex count from the tessellation control or evaluation shader, cannot tessellate"); + d->tess.enabled = false; + d->tess.failed = true; + return false; } + if (m_multiViewCount >= 2) + qWarning("Multiview is not supported with tessellation"); + + // Now the vertex shader is a compute shader. + // It should have three dedicated *VertexAsComputeShader variants. + // What the requested variant was (Standard or Batchable) plays no role here. + // (the Qt Quick scenegraph does not use tessellation with its materials) + // Create all three versions. + + bool variantsPresent[3] = {}; + const QVector<QShaderKey> tessVertKeys = tessVert.availableShaders(); + for (const QShaderKey &k : tessVertKeys) { + switch (k.sourceVariant()) { + case QShader::NonIndexedVertexAsComputeShader: + variantsPresent[0] = true; + break; + case QShader::UInt32IndexedVertexAsComputeShader: + variantsPresent[1] = true; + break; + case QShader::UInt16IndexedVertexAsComputeShader: + variantsPresent[2] = true; + break; + default: + break; + } + } + if (!(variantsPresent[0] && variantsPresent[1] && variantsPresent[2])) { + qWarning("Vertex shader is not prepared for Metal tessellation. Cannot tessellate. " + "Perhaps the relevant variants (UInt32IndexedVertexAsComputeShader et al) were not generated? " + "Try passing --msltess to qsb."); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + int varIndex = 0; // Will map NonIndexed as 0, UInt32 as 1, UInt16 as 2. Do not change this ordering. + for (QShader::Variant variant : { + QShader::NonIndexedVertexAsComputeShader, + QShader::UInt32IndexedVertexAsComputeShader, + QShader::UInt16IndexedVertexAsComputeShader }) + { + id<MTLLibrary> lib = rhiD->d->createMetalLib(tessVert, variant, &error, &entryPoint, &activeKey); + if (!lib) { + qWarning("MSL shader compilation failed for vertex-as-compute shader %d: %s", int(variant), qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint); + if (!func) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [lib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + QMetalShader &compVs(d->tess.compVs[varIndex]); + compVs.lib = lib; + compVs.func = func; + compVs.desc = tessVert.description(); + compVs.nativeResourceBindingMap = tessVert.nativeResourceBindingMap(activeKey); + compVs.nativeShaderInfo = tessVert.nativeShaderInfo(activeKey); + + // pre-create all three MTLComputePipelineStates + if (!d->tess.vsCompPipeline(rhiD, variant)) { + qWarning("Failed to pre-generate compute pipeline for vertex compute shader (tessellation variant %d)", int(variant)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + ++varIndex; + } + + // Pipeline #2 is a compute that runs the tessellation control (compute) shader + id<MTLLibrary> tessControlLib = rhiD->d->createMetalLib(tesc, QShader::StandardShader, &error, &entryPoint, &activeKey); + if (!tessControlLib) { + qWarning("MSL shader compilation failed for tessellation control compute shader: %s", qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> tessControlFunc = rhiD->d->createMSLShaderFunction(tessControlLib, entryPoint); + if (!tessControlFunc) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [tessControlLib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + d->tess.compTesc.lib = tessControlLib; + d->tess.compTesc.func = tessControlFunc; + d->tess.compTesc.desc = tesc.description(); + d->tess.compTesc.nativeResourceBindingMap = tesc.nativeResourceBindingMap(activeKey); + d->tess.compTesc.nativeShaderInfo = tesc.nativeShaderInfo(activeKey); + if (!d->tess.tescCompPipeline(rhiD)) { + qWarning("Failed to pre-generate compute pipeline for tessellation control shader"); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + // Pipeline #3 is a render pipeline with the tessellation evaluation (vertex) + the fragment shader + id<MTLLibrary> tessEvalLib = rhiD->d->createMetalLib(tese, QShader::StandardShader, &error, &entryPoint, &activeKey); + if (!tessEvalLib) { + qWarning("MSL shader compilation failed for tessellation evaluation vertex shader: %s", qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> tessEvalFunc = rhiD->d->createMSLShaderFunction(tessEvalLib, entryPoint); + if (!tessEvalFunc) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [tessEvalLib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + d->tess.vertTese.lib = tessEvalLib; + d->tess.vertTese.func = tessEvalFunc; + d->tess.vertTese.desc = tese.description(); + d->tess.vertTese.nativeResourceBindingMap = tese.nativeResourceBindingMap(activeKey); + d->tess.vertTese.nativeShaderInfo = tese.nativeShaderInfo(activeKey); + + id<MTLLibrary> fragLib = rhiD->d->createMetalLib(tessFrag, QShader::StandardShader, &error, &entryPoint, &activeKey); + if (!fragLib) { + qWarning("MSL shader compilation failed for fragment shader: %s", qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> fragFunc = rhiD->d->createMSLShaderFunction(fragLib, entryPoint); + if (!fragFunc) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [fragLib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + d->fs.lib = fragLib; + d->fs.func = fragFunc; + d->fs.desc = tessFrag.description(); + d->fs.nativeShaderInfo = tessFrag.nativeShaderInfo(activeKey); + d->fs.nativeResourceBindingMap = tessFrag.nativeResourceBindingMap(activeKey); + + if (!d->tess.teseFragRenderPipeline(rhiD, this)) { + qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader"); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init]; + setupMetalDepthStencilDescriptor(dsDesc); d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc]; [dsDesc release]; - d->primitiveType = toMetalPrimitiveType(m_topology); - d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise; - d->cullMode = toMetalCullMode(m_cullMode); - d->depthBias = float(m_depthBias); - d->slopeScaledDepthBias = m_slopeScaledDepthBias; + // no primitiveType + mapStates(); + + return true; +} + +bool QMetalGraphicsPipeline::create() +{ + destroy(); // no early test, always invoke and leave it to destroy to decide what to clean up + + QRHI_RES_RHI(QRhiMetal); + rhiD->pipelineCreationStart(); + if (!rhiD->sanityCheckGraphicsPipeline(this)) + return false; + + // See if tessellation is involved. Things will be very different, if so. + QShader tessVert; + QShader tesc; + QShader tese; + QShader tessFrag; + for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { + switch (shaderStage.type()) { + case QRhiShaderStage::Vertex: + tessVert = shaderStage.shader(); + break; + case QRhiShaderStage::TessellationControl: + tesc = shaderStage.shader(); + break; + case QRhiShaderStage::TessellationEvaluation: + tese = shaderStage.shader(); + break; + case QRhiShaderStage::Fragment: + tessFrag = shaderStage.shader(); + break; + default: + break; + } + } + d->tess.enabled = tesc.isValid() && tese.isValid() && m_topology == Patches && m_patchControlPointCount > 0; + d->tess.failed = false; + + bool ok = d->tess.enabled ? createTessellationPipelines(tessVert, tesc, tese, tessFrag) : createVertexFragmentPipeline(); + if (!ok) + return false; + + // SPIRV-Cross buffer size buffers + int buffers = 0; + QVarLengthArray<QMetalShader *, 6> shaders; + if (d->tess.enabled) { + shaders.append(&d->tess.compVs[0]); + shaders.append(&d->tess.compVs[1]); + shaders.append(&d->tess.compVs[2]); + shaders.append(&d->tess.compTesc); + shaders.append(&d->tess.vertTese); + } else { + shaders.append(&d->vs); + } + shaders.append(&d->fs); + + for (QMetalShader *shader : shaders) { + if (shader->nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + const int binding = shader->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + shader->nativeResourceBindingMap[binding] = qMakePair(binding, -1); + int maxNativeBinding = 0; + for (const QShaderDescription::StorageBlock &block : shader->desc.storageBlocks()) + maxNativeBinding = qMax(maxNativeBinding, shader->nativeResourceBindingMap[block.binding].first); + + // we use one buffer to hold data for all graphics shader stages, each with a different offset. + // buffer offsets must be 32byte aligned - adjust buffer count accordingly + buffers += ((maxNativeBinding + 1 + 7) / 8) * 8; + } + } + + if (buffers) { + if (!d->bufferSizeBuffer) + d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int)); + + d->bufferSizeBuffer->setSize(buffers * sizeof(int)); + d->bufferSizeBuffer->create(); + } + rhiD->pipelineCreationEnd(); lastActiveFrameSlot = -1; generation += 1; rhiD->registerResource(this); @@ -3784,12 +5929,43 @@ void QMetalComputePipeline::destroy() if (!d->ps) return; - [d->ps release]; + delete d->bufferSizeBuffer; + d->bufferSizeBuffer = nullptr; + + QRhiMetalData::DeferredReleaseEntry e; + e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline; + e.lastActiveFrameSlot = lastActiveFrameSlot; + e.computePipeline.pipelineState = d->ps; d->ps = nil; QRHI_RES_RHI(QRhiMetal); - if (rhiD) + if (rhiD) { + rhiD->d->releaseQueue.append(e); rhiD->unregisterResource(this); + } +} + +void QRhiMetalData::trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil]; + cpDesc.binaryArchives = binArchArray; + } + } +} + +void QRhiMetalData::addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSError *err = nil; + if (![binArch addComputePipelineFunctionsWithDescriptor: cpDesc error: &err]) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to collect compute pipeline functions to binary archive: %s", qPrintable(msg)); + } + } + } } bool QMetalComputePipeline::create() @@ -3798,6 +5974,7 @@ bool QMetalComputePipeline::create() destroy(); QRHI_RES_RHI(QRhiMetal); + rhiD->pipelineCreationStart(); auto cacheIt = rhiD->d->shaderCache.constFind(m_shaderStage); if (cacheIt != rhiD->d->shaderCache.constEnd()) { @@ -3822,8 +5999,15 @@ bool QMetalComputePipeline::create() d->cs.lib = lib; d->cs.func = func; d->cs.localSize = shader.description().computeShaderLocalSize(); - if (const QShader::NativeResourceBindingMap *map = shader.nativeResourceBindingMap(activeKey)) - d->cs.nativeResourceBindingMap = *map; + d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->cs.desc = shader.description(); + d->cs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); + + // SPIRV-Cross buffer size buffers + if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + const int binding = d->cs.nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + d->cs.nativeResourceBindingMap[binding] = qMakePair(binding, -1); + } if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) { for (QMetalShader &s : rhiD->d->shaderCache) @@ -3838,14 +6022,42 @@ bool QMetalComputePipeline::create() d->localSize = MTLSizeMake(d->cs.localSize[0], d->cs.localSize[1], d->cs.localSize[2]); + MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new]; + cpDesc.computeFunction = d->cs.func; + + rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addComputePipelineToBinaryArchive(cpDesc); + NSError *err = nil; - d->ps = [rhiD->d->dev newComputePipelineStateWithFunction: d->cs.func error: &err]; + d->ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc + options: MTLPipelineOptionNone + reflection: nil + error: &err]; + [cpDesc release]; if (!d->ps) { const QString msg = QString::fromNSString(err.localizedDescription); - qWarning("Failed to create render pipeline state: %s", qPrintable(msg)); + qWarning("Failed to create compute pipeline state: %s", qPrintable(msg)); return false; } + // SPIRV-Cross buffer size buffers + if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + int buffers = 0; + for (const QShaderDescription::StorageBlock &block : d->cs.desc.storageBlocks()) + buffers = qMax(buffers, d->cs.nativeResourceBindingMap[block.binding].first); + + buffers += 1; + + if (!d->bufferSizeBuffer) + d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int)); + + d->bufferSizeBuffer->setSize(buffers * sizeof(int)); + d->bufferSizeBuffer->create(); + } + + rhiD->pipelineCreationEnd(); lastActiveFrameSlot = -1; generation += 1; rhiD->registerResource(this); @@ -3877,10 +6089,12 @@ const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles() return &nativeHandlesStruct; } -void QMetalCommandBuffer::resetState() +void QMetalCommandBuffer::resetState(double lastGpuTime) { + d->lastGpuTime = lastGpuTime; d->currentRenderPassEncoder = nil; d->currentComputePassEncoder = nil; + d->tessellationComputeEncoder = nil; d->currentPassRpDesc = nil; resetPerPassState(); } @@ -3905,9 +6119,12 @@ void QMetalCommandBuffer::resetPerPassCachedState() currentIndexOffset = 0; currentIndexFormat = QRhiCommandBuffer::IndexUInt16; currentCullMode = -1; + currentTriangleFillMode = -1; currentFrontFaceWinding = -1; currentDepthBiasValues = { 0.0f, 0.0f }; + d->currentShaderResourceBindingState = {}; + d->currentDepthStencilState = nil; d->currentFirstVertexBinding = -1; d->currentVertexInputsBuffers.clear(); d->currentVertexInputOffsets.clear(); @@ -3915,7 +6132,7 @@ void QMetalCommandBuffer::resetPerPassCachedState() QMetalSwapChain::QMetalSwapChain(QRhiImplementation *rhi) : QRhiSwapChain(rhi), - rtWrapper(rhi), + rtWrapper(rhi, this), cbWrapper(rhi), d(new QMetalSwapChainData) { @@ -3939,8 +6156,7 @@ void QMetalSwapChain::destroy() for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { if (d->sem[i]) { // the semaphores cannot be released if they do not have the initial value - dispatch_semaphore_wait(d->sem[i], DISPATCH_TIME_FOREVER); - dispatch_semaphore_signal(d->sem[i]); + waitUntilCompleted(i); dispatch_release(d->sem[i]); d->sem[i] = nullptr; @@ -3952,7 +6168,14 @@ void QMetalSwapChain::destroy() d->msaaTex[i] = nil; } +#ifdef Q_OS_MACOS + d->liveResizeStartObserver.remove(); + d->liveResizeEndObserver.remove(); + d->liveResizeObserverSet = false; +#endif + d->layer = nullptr; + m_proxyData = {}; [d->curDrawable release]; d->curDrawable = nil; @@ -3960,8 +6183,6 @@ void QMetalSwapChain::destroy() QRHI_RES_RHI(QRhiMetal); if (rhiD) { rhiD->swapchains.remove(this); - QRHI_PROF; - QRHI_PROF_F(releaseSwapChain(this)); rhiD->unregisterResource(this); } } @@ -3976,6 +6197,9 @@ QRhiRenderTarget *QMetalSwapChain::currentFrameRenderTarget() return &rtWrapper; } +// view.layer should ideally be called on the main thread, otherwise the UI +// Thread Checker in Xcode drops a warning. Hence trying to proxy it through +// QRhiSwapChainProxyData instead of just calling this function directly. static inline CAMetalLayer *layerForWindow(QWindow *window) { Q_ASSERT(window); @@ -3988,21 +6212,51 @@ static inline CAMetalLayer *layerForWindow(QWindow *window) return static_cast<CAMetalLayer *>(view.layer); } +// If someone calls this, it is hopefully from the main thread, and they will +// then set the returned data on the QRhiSwapChain, so it won't need to query +// the layer on its own later on. +QRhiSwapChainProxyData QRhiMetal::updateSwapChainProxyData(QWindow *window) +{ + QRhiSwapChainProxyData d; + d.reserved[0] = layerForWindow(window); + return d; +} + QSize QMetalSwapChain::surfacePixelSize() { Q_ASSERT(m_window); CAMetalLayer *layer = d->layer; if (!layer) - layer = layerForWindow(m_window); + layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, m_window, QRhi::Metal, 0); + + Q_ASSERT(layer); + int height = (int)layer.bounds.size.height; + int width = (int)layer.bounds.size.width; + width *= layer.contentsScale; + height *= layer.contentsScale; + return QSize(width, height); +} - CGSize layerSize = layer.bounds.size; - layerSize.width *= layer.contentsScale; - layerSize.height *= layer.contentsScale; - return QSizeF::fromCGSize(layerSize).toSize(); +bool QMetalSwapChain::isFormatSupported(Format f) +{ + if (f == HDRExtendedSrgbLinear) { + if (@available(macOS 10.11, iOS 16.0, *)) + return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f; + else + return false; + } else if (f == HDRExtendedDisplayP3Linear) { + if (@available(macOS 11.0, iOS 14.0, *)) + return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f; + else + return false; + } + return f == SDR; } QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor() { + QRHI_RES_RHI(QRhiMetal); + chooseFormats(); // ensure colorFormat and similar are filled out QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi); @@ -4013,7 +6267,6 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor() #ifdef Q_OS_MACOS // m_depthStencil may not be built yet so cannot rely on computed fields in it - QRHI_RES_RHI(QRhiMetal); rpD->dsFormat = rhiD->d->dev.depth24Stencil8PixelFormatSupported ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8; #else @@ -4021,6 +6274,8 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor() #endif rpD->updateSerializedFormat(); + + rhiD->registerResource(rpD, false); return rpD; } @@ -4029,10 +6284,26 @@ void QMetalSwapChain::chooseFormats() QRHI_RES_RHI(QRhiMetal); samples = rhiD->effectiveSampleCount(m_sampleCount); // pick a format that is allowed for CAMetalLayer.pixelFormat + if (m_format == HDRExtendedSrgbLinear || m_format == HDRExtendedDisplayP3Linear) { + d->colorFormat = MTLPixelFormatRGBA16Float; + d->rhiColorFormat = QRhiTexture::RGBA16F; + return; + } d->colorFormat = m_flags.testFlag(sRGB) ? MTLPixelFormatBGRA8Unorm_sRGB : MTLPixelFormatBGRA8Unorm; d->rhiColorFormat = QRhiTexture::BGRA8; } +void QMetalSwapChain::waitUntilCompleted(int slot) +{ + // wait+signal is the general pattern to ensure the commands for a + // given frame slot have completed (if sem is 1, we go 0 then 1; if + // sem is 0 we go -1, block, completion increments to 0, then us to 1) + + dispatch_semaphore_t sem = d->sem[slot]; + dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); + dispatch_semaphore_signal(sem); +} + bool QMetalSwapChain::createOrResize() { Q_ASSERT(m_window); @@ -4054,13 +6325,25 @@ bool QMetalSwapChain::createOrResize() return false; } - d->layer = layerForWindow(window); + d->layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, window, QRhi::Metal, 0); Q_ASSERT(d->layer); chooseFormats(); if (d->colorFormat != d->layer.pixelFormat) d->layer.pixelFormat = d->colorFormat; + if (m_format == HDRExtendedSrgbLinear) { + if (@available(macOS 10.11, iOS 16.0, *)) { + d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB); + d->layer.wantsExtendedDynamicRangeContent = YES; + } + } else if (m_format == HDRExtendedDisplayP3Linear) { + if (@available(macOS 11.0, iOS 16.0, *)) { + d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3); + d->layer.wantsExtendedDynamicRangeContent = YES; + } + } + if (m_flags.testFlag(UsedAsTransferSource)) d->layer.framebufferOnly = NO; @@ -4084,9 +6367,12 @@ bool QMetalSwapChain::createOrResize() // Now set the layer's drawableSize which will stay set to the same value // until the next createOrResize(), thus ensuring atomicity with regards to // the drawable size in frames. - CGSize layerSize = d->layer.bounds.size; - layerSize.width *= d->layer.contentsScale; - layerSize.height *= d->layer.contentsScale; + int width = (int)d->layer.bounds.size.width; + int height = (int)d->layer.bounds.size.height; + CGSize layerSize = CGSizeMake(width, height); + const float scaleFactor = d->layer.contentsScale; + layerSize.width *= scaleFactor; + layerSize.height *= scaleFactor; d->layer.drawableSize = layerSize; m_currentPixelSize = QSizeF::fromCGSize(layerSize).toSize(); @@ -4094,10 +6380,39 @@ bool QMetalSwapChain::createOrResize() [d->layer setDevice: rhiD->d->dev]; +#ifdef Q_OS_MACOS + // Can only use presentsWithTransaction (to get smooth resizing) when + // presenting from the main (gui) thread. We predict that based on the + // thread this function is called on since if the QRhiSwapChain is + // initialied on a given thread then that's almost certainly the thread on + // which the QRhi renders and presents. + const bool canUsePresentsWithTransaction = NSThread.isMainThread; + + // Have an env.var. just in case it turns out presentsWithTransaction is + // not desired in some specific case. + static bool allowPresentsWithTransaction = !qEnvironmentVariableIntValue("QT_MTL_NO_TRANSACTION"); + + if (allowPresentsWithTransaction && canUsePresentsWithTransaction && !d->liveResizeObserverSet) { + d->liveResizeObserverSet = true; + NSView *view = reinterpret_cast<NSView *>(window->winId()); + NSWindow *window = view.window; + if (window) { + qCDebug(QRHI_LOG_INFO, "will set presentsWithTransaction during live resize"); + d->liveResizeStartObserver = QMacNotificationObserver(window, NSWindowWillStartLiveResizeNotification, [this] { + d->layer.presentsWithTransaction = true; + }); + d->liveResizeEndObserver = QMacNotificationObserver(window, NSWindowDidEndLiveResizeNotification, [this] { + d->layer.presentsWithTransaction = false; + }); + } + } +#endif + [d->curDrawable release]; d->curDrawable = nil; for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { + d->lastGpuTime[i] = 0; if (!d->sem[i]) d->sem[i] = dispatch_semaphore_create(QMTL_FRAMES_IN_FLIGHT - 1); } @@ -4123,13 +6438,15 @@ bool QMetalSwapChain::createOrResize() } } + rtWrapper.setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget rtWrapper.d->pixelSize = pixelSize; - rtWrapper.d->dpr = float(window->devicePixelRatio()); + rtWrapper.d->dpr = scaleFactor; rtWrapper.d->sampleCount = samples; rtWrapper.d->colorAttCount = 1; rtWrapper.d->dsAttCount = ds ? 1 : 0; - qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, size %dx%d", pixelSize.width(), pixelSize.height()); + qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, pixel size %dx%d (scale %.2f)", + pixelSize.width(), pixelSize.height(), scaleFactor); if (samples > 1) { MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; @@ -4148,13 +6465,39 @@ bool QMetalSwapChain::createOrResize() [desc release]; } - QRHI_PROF; - QRHI_PROF_F(resizeSwapChain(this, QMTL_FRAMES_IN_FLIGHT, samples > 1 ? QMTL_FRAMES_IN_FLIGHT : 0, samples)); - if (needsRegistration) rhiD->registerResource(this); return true; } +QRhiSwapChainHdrInfo QMetalSwapChain::hdrInfo() +{ + QRhiSwapChainHdrInfo info; + info.limitsType = QRhiSwapChainHdrInfo::ColorComponentValue; + info.limits.colorComponentValue.maxColorComponentValue = 1; + info.limits.colorComponentValue.maxPotentialColorComponentValue = 1; + info.luminanceBehavior = QRhiSwapChainHdrInfo::DisplayReferred; // 1.0 = SDR white + info.sdrWhiteLevel = 200; // typical value, but dummy (don't know the real one); won't matter due to being display-referred + + if (m_window) { + // Must use m_window, not window, given this may be called before createOrResize(). +#if defined(Q_OS_MACOS) + NSView *view = reinterpret_cast<NSView *>(m_window->winId()); + NSScreen *screen = view.window.screen; + info.limits.colorComponentValue.maxColorComponentValue = screen.maximumExtendedDynamicRangeColorComponentValue; + info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.maximumPotentialExtendedDynamicRangeColorComponentValue; +#elif defined(Q_OS_IOS) + if (@available(iOS 16.0, *)) { + UIView *view = reinterpret_cast<UIView *>(m_window->winId()); + UIScreen *screen = view.window.windowScene.screen; + info.limits.colorComponentValue.maxColorComponentValue = view.window.windowScene.screen.currentEDRHeadroom; + info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.potentialEDRHeadroom; + } +#endif + } + + return info; +} + QT_END_NAMESPACE |