diff options
Diffstat (limited to 'src/gui/rhi/qrhimetal.mm')
-rw-r--r-- | src/gui/rhi/qrhimetal.mm | 3083 |
1 files changed, 2582 insertions, 501 deletions
diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm index dc21feb74c..1c7b397193 100644 --- a/src/gui/rhi/qrhimetal.mm +++ b/src/gui/rhi/qrhimetal.mm @@ -1,10 +1,18 @@ -// Copyright (C) 2019 The Qt Company Ltd. +// Copyright (C) 2023 The Qt Company Ltd. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only -#include "qrhimetal_p_p.h" +#include "qrhimetal_p.h" +#include "qshader_p.h" #include <QGuiApplication> #include <QWindow> +#include <QUrl> +#include <QFile> +#include <QTemporaryFile> +#include <QFileInfo> #include <qmath.h> +#include <QOperatingSystemVersion> + +#include <QtCore/private/qcore_mac_p.h> #ifdef Q_OS_MACOS #include <AppKit/AppKit.h> @@ -32,19 +40,28 @@ QT_BEGIN_NAMESPACE #error ARC not supported #endif -// Note: we expect everything here pass the Metal API validation when running -// in Debug mode in XCode. Some of the issues that break validation are not -// obvious and not visible when running outside XCode. -// -// An exception is the nextDrawable Called Early blah blah warning, which is -// plain and simply false. +// Even though the macOS 13 MTLBinaryArchive problem (QTBUG-106703) seems +// to be solved in later 13.x releases, we have reports from old Intel hardware +// and older macOS versions where this causes problems (QTBUG-114338). +// Thus we no longer do OS version based differentiation, but rather have a +// single toggle that is currently on, and so QRhi::(set)pipelineCache() +// does nothing with Metal. +#define QRHI_METAL_DISABLE_BINARY_ARCHIVE + +// We should be able to operate with command buffers that do not automatically +// retain/release the resources used by them. (since we have logic that mirrors +// other backends such as the Vulkan one anyway) +#define QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES /*! \class QRhiMetalInitParams \inmodule QtRhi - \internal + \since 6.6 \brief Metal specific initialization parameters. + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. + A Metal-based QRhi needs no special parameters for initialization. \badcode @@ -52,10 +69,13 @@ QT_BEGIN_NAMESPACE rhi = QRhi::create(QRhi::Metal, ¶ms); \endcode - \note Metal API validation cannot be enabled by the application. Instead, - run the debug build of the application in XCode. Generating a - \c{.xcodeproj} file via \c{qmake -spec macx-xcode} provides a convenient - way to enable this. + \note Metal API validation cannot be enabled programmatically by the QRhi. + Instead, either run the debug build of the application in XCode, by + generating a \c{.xcodeproj} file via \c{cmake -G Xcode}, or set the + environment variable \c{METAL_DEVICE_WRAPPER_TYPE=1}. The variable needs to + be set early on in the environment, perferably before starting the process; + attempting to set it at QRhi creation time is not functional in practice. + (too late probably) \note QRhiSwapChain can only target QWindow instances that have their surface type set to QSurface::MetalSurface. @@ -74,14 +94,30 @@ QT_BEGIN_NAMESPACE /*! \class QRhiMetalNativeHandles \inmodule QtRhi - \internal + \since 6.6 \brief Holds the Metal device used by the QRhi. + + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. */ /*! + \variable QRhiMetalNativeHandles::dev + + Set to a valid MTLDevice to import an existing device. +*/ + +/*! + \variable QRhiMetalNativeHandles::cmdQueue + + Set to a valid MTLCommandQueue when importing an existing command queue. + When \nullptr, QRhi will create a new command queue. +*/ + +/*! \class QRhiMetalCommandBufferNativeHandles \inmodule QtRhi - \internal + \since 6.6 \brief Holds the MTLCommandBuffer and MTLRenderCommandEncoder objects that are backing a QRhiCommandBuffer. \note The command buffer object is only guaranteed to be valid while @@ -93,14 +129,28 @@ QT_BEGIN_NAMESPACE \note The command encoder is only valid while recording a pass, that is, between \l{QRhiCommandBuffer::beginPass()} - \l{QRhiCommandBuffer::endPass()}. + + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. */ +/*! + \variable QRhiMetalCommandBufferNativeHandles::commandBuffer +*/ + +/*! + \variable QRhiMetalCommandBufferNativeHandles::encoder +*/ + struct QMetalShader { id<MTLLibrary> lib = nil; id<MTLFunction> func = nil; - std::array<uint, 3> localSize; + std::array<uint, 3> localSize = {}; + uint outputVertexCount = 0; + QShaderDescription desc; QShader::NativeResourceBindingMap nativeResourceBindingMap; + QShader::NativeShaderInfo nativeShaderInfo; void destroy() { nativeResourceBindingMap.clear(); @@ -113,11 +163,14 @@ struct QMetalShader struct QRhiMetalData { - QRhiMetalData(QRhiImplementation *rhi) : ofr(rhi) { } + QRhiMetalData(QRhiMetal *rhi) : q(rhi), ofr(rhi) { } + QRhiMetal *q; id<MTLDevice> dev = nil; id<MTLCommandQueue> cmdQueue = nil; + API_AVAILABLE(macosx(11.0), ios(14.0)) id<MTLBinaryArchive> binArch = nil; + id<MTLCommandBuffer> newCommandBuffer(); MTLRenderPassDescriptor *createDefaultRenderPass(bool hasDepthStencil, const QColor &colorClearValue, const QRhiDepthStencilClearValue &depthStencilClearValue, @@ -125,6 +178,11 @@ struct QRhiMetalData id<MTLLibrary> createMetalLib(const QShader &shader, QShader::Variant shaderVariant, QString *error, QByteArray *entryPoint, QShaderKey *activeKey); id<MTLFunction> createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint); + bool setupBinaryArchive(NSURL *sourceFileUrl = nil); + void addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc); + void trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc); + void addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc); + void trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc); struct DeferredReleaseEntry { enum Type { @@ -159,6 +217,8 @@ struct QRhiMetalData struct { id<MTLRenderPipelineState> pipelineState; id<MTLDepthStencilState> depthStencilState; + std::array<id<MTLComputePipelineState>, 3> tessVertexComputeState; + id<MTLComputePipelineState> tessTessControlComputeState; } graphicsPipeline; struct { id<MTLComputePipelineState> pipelineState; @@ -170,6 +230,7 @@ struct QRhiMetalData struct OffscreenFrame { OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { } bool active = false; + double lastGpuTime = 0; QMetalCommandBuffer cbWrapper; } ofr; @@ -184,6 +245,17 @@ struct QRhiMetalData }; QVarLengthArray<TextureReadback, 2> activeTextureReadbacks; + struct BufferReadback + { + int activeFrameSlot = -1; + QRhiReadbackResult *result; + quint32 offset; + quint32 readSize; + id<MTLBuffer> buf; + }; + + QVarLengthArray<BufferReadback, 2> activeBufferReadbacks; + MTLCaptureManager *captureMgr; id<MTLCaptureScope> captureScope = nil; @@ -255,14 +327,16 @@ struct QMetalShaderResourceBindingsData { QRhiBatchedBindings<id<MTLTexture> > textureBatches; QRhiBatchedBindings<id<MTLSamplerState> > samplerBatches; } res[QRhiMetal::SUPPORTED_STAGES]; - enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2 }; + enum { VERTEX = 0, FRAGMENT = 1, COMPUTE = 2, TESSCTRL = 3, TESSEVAL = 4 }; }; struct QMetalCommandBufferData { id<MTLCommandBuffer> cb; + double lastGpuTime = 0; id<MTLRenderCommandEncoder> currentRenderPassEncoder; id<MTLComputeCommandEncoder> currentComputePassEncoder; + id<MTLComputeCommandEncoder> tessellationComputeEncoder; MTLRenderPassDescriptor *currentPassRpDesc; int currentFirstVertexBinding; QRhiBatchedBindings<id<MTLBuffer> > currentVertexInputsBuffers; @@ -294,8 +368,11 @@ struct QMetalRenderTargetData struct { ColorAtt colorAtt[QMetalRenderPassDescriptor::MAX_COLOR_ATTACHMENTS]; id<MTLTexture> dsTex = nil; + id<MTLTexture> dsResolveTex = nil; bool hasStencil = false; bool depthNeedsStore = false; + bool preserveColor = false; + bool preserveDs = false; } fb; QRhiRenderTargetAttachmentTracker::ResIdList currentResIdList; @@ -303,6 +380,7 @@ struct QMetalRenderTargetData struct QMetalGraphicsPipelineData { + QMetalGraphicsPipeline *q = nullptr; id<MTLRenderPipelineState> ps = nil; id<MTLDepthStencilState> ds = nil; MTLPrimitiveType primitiveType; @@ -313,6 +391,54 @@ struct QMetalGraphicsPipelineData float slopeScaledDepthBias; QMetalShader vs; QMetalShader fs; + struct ExtraBufferManager { + enum class WorkBufType { + DeviceLocal, + HostVisible + }; + QMetalBuffer *acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type = WorkBufType::DeviceLocal); + QVector<QMetalBuffer *> deviceLocalWorkBuffers; + QVector<QMetalBuffer *> hostVisibleWorkBuffers; + } extraBufMgr; + struct Tessellation { + QMetalGraphicsPipelineData *q = nullptr; + bool enabled = false; + bool failed = false; + uint inControlPointCount; + uint outControlPointCount; + QMetalShader compVs[3]; + std::array<id<MTLComputePipelineState>, 3> vertexComputeState = {}; + id<MTLComputePipelineState> tessControlComputeState = nil; + QMetalShader compTesc; + QMetalShader vertTese; + quint32 vsCompOutputBufferSize(quint32 vertexOrIndexCount, quint32 instanceCount) const + { + // max vertex output components = resourceLimit(MaxVertexOutputs) * 4 = 60 + return vertexOrIndexCount * instanceCount * sizeof(float) * 60; + } + quint32 tescCompOutputBufferSize(quint32 patchCount) const + { + return outControlPointCount * patchCount * sizeof(float) * 60; + } + quint32 tescCompPatchOutputBufferSize(quint32 patchCount) const + { + // assume maxTessellationControlPerPatchOutputComponents is 128 + return patchCount * sizeof(float) * 128; + } + quint32 patchCountForDrawCall(quint32 vertexOrIndexCount, quint32 instanceCount) const + { + return ((vertexOrIndexCount + inControlPointCount - 1) / inControlPointCount) * instanceCount; + } + static int vsCompVariantToIndex(QShader::Variant vertexCompVariant); + id<MTLComputePipelineState> vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant); + id<MTLComputePipelineState> tescCompPipeline(QRhiMetal *rhiD); + id<MTLRenderPipelineState> teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline); + } tess; + void setupVertexInputDescriptor(MTLVertexDescriptor *desc); + void setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc); + + // SPIRV-Cross buffer size buffers + QMetalBuffer *bufferSizeBuffer = nullptr; }; struct QMetalComputePipelineData @@ -320,6 +446,9 @@ struct QMetalComputePipelineData id<MTLComputePipelineState> ps = nil; QMetalShader cs; MTLSize localSize; + + // SPIRV-Cross buffer size buffers + QMetalBuffer *bufferSizeBuffer = nullptr; }; struct QMetalSwapChainData @@ -327,10 +456,16 @@ struct QMetalSwapChainData CAMetalLayer *layer = nullptr; id<CAMetalDrawable> curDrawable = nil; dispatch_semaphore_t sem[QMTL_FRAMES_IN_FLIGHT]; + double lastGpuTime[QMTL_FRAMES_IN_FLIGHT]; MTLRenderPassDescriptor *rp = nullptr; id<MTLTexture> msaaTex[QMTL_FRAMES_IN_FLIGHT]; QRhiTexture::Format rhiColorFormat; MTLPixelFormat colorFormat; +#ifdef Q_OS_MACOS + bool liveResizeObserverSet = false; + QMacNotificationObserver liveResizeStartObserver; + QMacNotificationObserver liveResizeEndObserver; +#endif }; QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *importDevice) @@ -341,7 +476,7 @@ QRhiMetal::QRhiMetal(QRhiMetalInitParams *params, QRhiMetalNativeHandles *import importedDevice = importDevice != nullptr; if (importedDevice) { - if (d->dev) { + if (importDevice->dev) { d->dev = (id<MTLDevice>) importDevice->dev; importedCmdQueue = importDevice->cmdQueue != nullptr; if (importedCmdQueue) @@ -375,9 +510,44 @@ bool QRhiMetal::probe(QRhiMetalInitParams *params) return false; } +id<MTLCommandBuffer> QRhiMetalData::newCommandBuffer() +{ +#ifdef QRHI_METAL_COMMAND_BUFFERS_WITH_UNRETAINED_REFERENCES + // Do not let the command buffer mess with the refcount of objects. We do + // have a proper render loop and will manage lifetimes similarly to other + // backends (Vulkan). + return [cmdQueue commandBufferWithUnretainedReferences]; +#else + return [cmdQueue commandBuffer]; +#endif +} + +bool QRhiMetalData::setupBinaryArchive(NSURL *sourceFileUrl) +{ +#ifdef QRHI_METAL_DISABLE_BINARY_ARCHIVE + return false; +#endif + + if (@available(macOS 11.0, iOS 14.0, *)) { + [binArch release]; + MTLBinaryArchiveDescriptor *binArchDesc = [MTLBinaryArchiveDescriptor new]; + binArchDesc.url = sourceFileUrl; + NSError *err = nil; + binArch = [dev newBinaryArchiveWithDescriptor: binArchDesc error: &err]; + [binArchDesc release]; + if (!binArch) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("newBinaryArchiveWithDescriptor failed: %s", qPrintable(msg)); + return false; + } + return true; + } + return false; +} + bool QRhiMetal::create(QRhi::Flags flags) { - Q_UNUSED(flags); + rhiFlags = flags; if (importedDevice) [d->dev retain]; @@ -392,10 +562,12 @@ bool QRhiMetal::create(QRhi::Flags flags) const QString deviceName = QString::fromNSString([d->dev name]); qCDebug(QRHI_LOG_INFO, "Metal device: %s", qPrintable(deviceName)); driverInfoStruct.deviceName = deviceName.toUtf8(); - driverInfoStruct.deviceId = [d->dev registryID]; -#ifdef Q_OS_IOS - driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice; -#else + + // deviceId and vendorId stay unset for now. Note that registryID is not + // suitable as deviceId because it does not seem stable on macOS and can + // apparently change when the system is rebooted. + +#ifdef Q_OS_MACOS if (@available(macOS 10.15, *)) { const MTLDeviceLocation deviceLocation = [d->dev location]; switch (deviceLocation) { @@ -412,8 +584,14 @@ bool QRhiMetal::create(QRhi::Flags flags) break; } } +#else + driverInfoStruct.deviceType = QRhiDriverInfo::IntegratedDevice; #endif + const QOperatingSystemVersion ver = QOperatingSystemVersion::current(); + osMajor = ver.majorVersion(); + osMinor = ver.minorVersion(); + if (importedCmdQueue) [d->cmdQueue retain]; else @@ -433,6 +611,7 @@ bool QRhiMetal::create(QRhi::Flags flags) if (@available(macOS 10.15, *)) caps.isAppleGPU = [d->dev supportsFamily:MTLGPUFamilyApple7]; caps.maxThreadGroupSize = 1024; + caps.multiView = true; #elif defined(Q_OS_TVOS) if ([d->dev supportsFeatureSet: MTLFeatureSet(30003)]) // MTLFeatureSet_tvOS_GPUFamily2_v1 caps.maxTextureSize = 16384; @@ -459,8 +638,10 @@ bool QRhiMetal::create(QRhi::Flags flags) } caps.isAppleGPU = true; if (@available(iOS 13, *)) { - if ([d->dev supportsFamily:MTLGPUFamilyApple4]) + if ([d->dev supportsFamily: MTLGPUFamilyApple4]) caps.maxThreadGroupSize = 1024; + if ([d->dev supportsFamily: MTLGPUFamilyApple5]) + caps.multiView = true; } #endif @@ -470,6 +651,9 @@ bool QRhiMetal::create(QRhi::Flags flags) caps.supportedSampleCounts.append(sampleCount); } + if (rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + d->setupBinaryArchive(); + nativeHandlesStruct.dev = (MTLDevice *) d->dev; nativeHandlesStruct.cmdQueue = (MTLCommandQueue *) d->cmdQueue; @@ -488,6 +672,11 @@ void QRhiMetal::destroy() [d->captureScope release]; d->captureScope = nil; + if (@available(macOS 11.0, iOS 14.0, *)) { + [d->binArch release]; + d->binArch = nil; + } + [d->cmdQueue release]; if (!importedCmdQueue) d->cmdQueue = nil; @@ -502,17 +691,6 @@ QVector<int> QRhiMetal::supportedSampleCounts() const return caps.supportedSampleCounts; } -int QRhiMetal::effectiveSampleCount(int sampleCount) const -{ - // Stay compatible with QSurfaceFormat and friends where samples == 0 means the same as 1. - const int s = qBound(1, sampleCount, 64); - if (!supportedSampleCounts().contains(s)) { - qWarning("Attempted to set unsupported sample count %d", sampleCount); - return 1; - } - return s; -} - QRhiSwapChain *QRhiMetal::createSwapChain() { return new QMetalSwapChain(this); @@ -600,7 +778,7 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const case QRhi::DebugMarkers: return true; case QRhi::Timestamps: - return false; + return true; case QRhi::Instancing: return true; case QRhi::CustomInstanceStepRate: @@ -644,7 +822,12 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const case QRhi::ReadBackAnyTextureFormat: return true; case QRhi::PipelineCacheDataLoadSave: - return false; + { + if (@available(macOS 11.0, iOS 14.0, *)) + return true; + else + return false; + } case QRhi::ImageDataStride: return true; case QRhi::RenderBufferImport: @@ -656,13 +839,29 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const case QRhi::TextureArrays: return true; case QRhi::Tessellation: - return false; + return true; case QRhi::GeometryShader: return false; case QRhi::TextureArrayRange: return false; case QRhi::NonFillPolygonMode: return true; + case QRhi::OneDimensionalTextures: + return true; + case QRhi::OneDimensionalTextureMipmaps: + return false; + case QRhi::HalfAttributes: + return true; + case QRhi::RenderToOneDimensionalTexture: + return false; + case QRhi::ThreeDimensionalTextureMipmaps: + return true; + case QRhi::MultiView: + return caps.multiView; + case QRhi::TextureViewFormat: + return false; + case QRhi::ResolveDepthStencil: + return true; default: Q_UNREACHABLE(); return false; @@ -742,14 +941,133 @@ bool QRhiMetal::isDeviceLost() const return false; } +struct QMetalPipelineCacheDataHeader +{ + quint32 rhiId; + quint32 arch; + quint32 dataSize; + quint32 osMajor; + quint32 osMinor; + char driver[236]; +}; + QByteArray QRhiMetal::pipelineCacheData() { - return QByteArray(); + Q_STATIC_ASSERT(sizeof(QMetalPipelineCacheDataHeader) == 256); + QByteArray data; + if (@available(macOS 11.0, iOS 14.0, *)) { + if (!d->binArch || !rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + return data; + + QTemporaryFile tmp; + if (!tmp.open()) { + qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal"); + return data; + } + tmp.close(); // the file exists until the tmp dtor runs + + const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath(); + NSURL *url = QUrl::fromLocalFile(fn).toNSURL(); + NSError *err = nil; + if (![d->binArch serializeToURL: url error: &err]) { + const QString msg = QString::fromNSString(err.localizedDescription); + // Some of these "errors" are not actual errors. (think of "Nothing to serialize") + qCDebug(QRHI_LOG_INFO, "Failed to serialize MTLBinaryArchive: %s", qPrintable(msg)); + return data; + } + + QFile f(fn); + if (!f.open(QIODevice::ReadOnly)) { + qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to reopen temporary file"); + return data; + } + const QByteArray blob = f.readAll(); + f.close(); + + const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader); + const quint32 dataSize = quint32(blob.size()); + + data.resize(headerSize + dataSize); + + QMetalPipelineCacheDataHeader header = {}; + header.rhiId = pipelineCacheRhiId(); + header.arch = quint32(sizeof(void*)); + header.dataSize = quint32(dataSize); + header.osMajor = osMajor; + header.osMinor = osMinor; + const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length())); + if (driverStrLen) + memcpy(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen); + header.driver[driverStrLen] = '\0'; + + memcpy(data.data(), &header, headerSize); + memcpy(data.data() + headerSize, blob.constData(), dataSize); + } + return data; } void QRhiMetal::setPipelineCacheData(const QByteArray &data) { - Q_UNUSED(data); + if (data.isEmpty()) + return; + + const size_t headerSize = sizeof(QMetalPipelineCacheDataHeader); + if (data.size() < qsizetype(headerSize)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (header incomplete)"); + return; + } + + const size_t dataOffset = headerSize; + QMetalPipelineCacheDataHeader header; + memcpy(&header, data.constData(), headerSize); + + const quint32 rhiId = pipelineCacheRhiId(); + if (header.rhiId != rhiId) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: The data is for a different QRhi version or backend (%u, %u)", + rhiId, header.rhiId); + return; + } + + const quint32 arch = quint32(sizeof(void*)); + if (header.arch != arch) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Architecture does not match (%u, %u)", + arch, header.arch); + return; + } + + if (header.osMajor != osMajor || header.osMinor != osMinor) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: OS version does not match (%u.%u, %u.%u)", + osMajor, osMinor, header.osMajor, header.osMinor); + return; + } + + const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.length())); + if (strncmp(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Metal device name does not match"); + return; + } + + if (data.size() < qsizetype(dataOffset + header.dataSize)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (data incomplete)"); + return; + } + + if (@available(macOS 11.0, iOS 14.0, *)) { + const char *p = data.constData() + dataOffset; + + QTemporaryFile tmp; + if (!tmp.open()) { + qCDebug(QRHI_LOG_INFO, "pipelineCacheData: Failed to create temporary file for Metal"); + return; + } + tmp.write(p, header.dataSize); + tmp.close(); // the file exists until the tmp dtor runs + + const QString fn = QFileInfo(tmp.fileName()).absoluteFilePath(); + NSURL *url = QUrl::fromLocalFile(fn).toNSURL(); + if (d->setupBinaryArchive(url)) + qCDebug(QRHI_LOG_INFO, "Created MTLBinaryArchive with initial data of %u bytes", header.dataSize); + } } QRhiRenderBuffer *QRhiMetal::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize, @@ -819,6 +1137,136 @@ static inline int mapBinding(int binding, return -1; } +static inline void bindStageBuffers(QMetalCommandBuffer *cbD, + int stage, + const QRhiBatchedBindings<id<MTLBuffer>>::Batch &bufferBatch, + const QRhiBatchedBindings<NSUInteger>::Batch &offsetBatch) +{ + switch (stage) { + case QMetalShaderResourceBindingsData::VERTEX: + [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::FRAGMENT: + [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::COMPUTE: + [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::TESSCTRL: + case QMetalShaderResourceBindingsData::TESSEVAL: + // do nothing. These are used later for tessellation + break; + default: + Q_UNREACHABLE(); + break; + } +} + +static inline void bindStageTextures(QMetalCommandBuffer *cbD, + int stage, + const QRhiBatchedBindings<id<MTLTexture>>::Batch &textureBatch) +{ + switch (stage) { + case QMetalShaderResourceBindingsData::VERTEX: + [cbD->d->currentRenderPassEncoder setVertexTextures: textureBatch.resources.constData() + withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::FRAGMENT: + [cbD->d->currentRenderPassEncoder setFragmentTextures: textureBatch.resources.constData() + withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::COMPUTE: + [cbD->d->currentComputePassEncoder setTextures: textureBatch.resources.constData() + withRange: NSMakeRange(textureBatch.startBinding, NSUInteger(textureBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::TESSCTRL: + case QMetalShaderResourceBindingsData::TESSEVAL: + // do nothing. These are used later for tessellation + break; + default: + Q_UNREACHABLE(); + break; + } +} + +static inline void bindStageSamplers(QMetalCommandBuffer *cbD, + int encoderStage, + const QRhiBatchedBindings<id<MTLSamplerState>>::Batch &samplerBatch) +{ + switch (encoderStage) { + case QMetalShaderResourceBindingsData::VERTEX: + [cbD->d->currentRenderPassEncoder setVertexSamplerStates: samplerBatch.resources.constData() + withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::FRAGMENT: + [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: samplerBatch.resources.constData() + withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::COMPUTE: + [cbD->d->currentComputePassEncoder setSamplerStates: samplerBatch.resources.constData() + withRange: NSMakeRange(samplerBatch.startBinding, NSUInteger(samplerBatch.resources.count()))]; + break; + case QMetalShaderResourceBindingsData::TESSCTRL: + case QMetalShaderResourceBindingsData::TESSEVAL: + // do nothing. These are used later for tessellation + break; + default: + Q_UNREACHABLE(); + break; + } +} + +// Helper that is not used during the common vertex+fragment and compute +// pipelines, but is necessary when tessellation is involved and so the +// graphics pipeline is under the hood a combination of multiple compute and +// render pipelines. We need to be able to set the buffers, textures, samplers +// when a switching between render and compute encoders. +static inline void rebindShaderResources(QMetalCommandBuffer *cbD, int resourceStage, int encoderStage, + const QMetalShaderResourceBindingsData *customBindingState = nullptr) +{ + const QMetalShaderResourceBindingsData *bindingData = customBindingState ? customBindingState : &cbD->d->currentShaderResourceBindingState; + + for (int i = 0, ie = bindingData->res[resourceStage].bufferBatches.batches.count(); i != ie; ++i) { + const auto &bufferBatch(bindingData->res[resourceStage].bufferBatches.batches[i]); + const auto &offsetBatch(bindingData->res[resourceStage].bufferOffsetBatches.batches[i]); + bindStageBuffers(cbD, encoderStage, bufferBatch, offsetBatch); + } + + for (int i = 0, ie = bindingData->res[resourceStage].textureBatches.batches.count(); i != ie; ++i) { + const auto &batch(bindingData->res[resourceStage].textureBatches.batches[i]); + bindStageTextures(cbD, encoderStage, batch); + } + + for (int i = 0, ie = bindingData->res[resourceStage].samplerBatches.batches.count(); i != ie; ++i) { + const auto &batch(bindingData->res[resourceStage].samplerBatches.batches[i]); + bindStageSamplers(cbD, encoderStage, batch); + } +} + +static inline QRhiShaderResourceBinding::StageFlag toRhiSrbStage(int stage) +{ + switch (stage) { + case QMetalShaderResourceBindingsData::VERTEX: + return QRhiShaderResourceBinding::StageFlag::VertexStage; + case QMetalShaderResourceBindingsData::TESSCTRL: + return QRhiShaderResourceBinding::StageFlag::TessellationControlStage; + case QMetalShaderResourceBindingsData::TESSEVAL: + return QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage; + case QMetalShaderResourceBindingsData::FRAGMENT: + return QRhiShaderResourceBinding::StageFlag::FragmentStage; + case QMetalShaderResourceBindingsData::COMPUTE: + return QRhiShaderResourceBinding::StageFlag::ComputeStage; + } + + Q_UNREACHABLE_RETURN(QRhiShaderResourceBinding::StageFlag::VertexStage); +} + void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD, QMetalCommandBuffer *cbD, int dynamicOffsetCount, @@ -828,8 +1276,8 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD { QMetalShaderResourceBindingsData bindingData; - for (const QRhiShaderResourceBinding &binding : qAsConst(srbD->sortedBindings)) { - const QRhiShaderResourceBinding::Data *b = binding.data(); + for (const QRhiShaderResourceBinding &binding : std::as_const(srbD->sortedBindings)) { + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(binding); switch (b->type) { case QRhiShaderResourceBinding::UniformBuffer: { @@ -843,20 +1291,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD break; } } - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::VERTEX].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset }); + + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer); + if (nativeBinding >= 0) + bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset }); + } } } break; @@ -868,36 +1309,21 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD for (int elem = 0; elem < data->count; ++elem) { QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.stex.texSamplers[elem].tex); QMetalSampler *samplerD = QRHI_RES(QMetalSampler, b->u.stex.texSamplers[elem].sampler); - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - // Must handle all three cases (combined, separate, separate): - // first = texture binding, second = sampler binding - // first = texture binding - // first = sampler binding (i.e. BindingType::Texture...) - const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture); - const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Sampler) - : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture) : -1); - if (textureBinding >= 0 && texD) - bindingData.res[QMetalShaderResourceBindingsData::VERTEX].textures.append({ textureBinding + elem, texD->d->tex }); - if (samplerBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::VERTEX].samplers.append({ samplerBinding + elem, samplerD->d->samplerState }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture); - const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Sampler) - : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture) : -1); - if (textureBinding >= 0 && texD) - bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].textures.append({ textureBinding + elem, texD->d->tex }); - if (samplerBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].samplers.append({ samplerBinding + elem, samplerD->d->samplerState }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int textureBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture); - const int samplerBinding = texD && samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Sampler) - : (samplerD ? mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture) : -1); - if (textureBinding >= 0 && texD) - bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].textures.append({ textureBinding + elem, texD->d->tex }); - if (samplerBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].samplers.append({ samplerBinding + elem, samplerD->d->samplerState }); + + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + // Must handle all three cases (combined, separate, separate): + // first = texture binding, second = sampler binding + // first = texture binding + // first = sampler binding (i.e. BindingType::Texture...) + const int textureBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture); + const int samplerBinding = texD && samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Sampler) + : (samplerD ? mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture) : -1); + if (textureBinding >= 0 && texD) + bindingData.res[stage].textures.append({ textureBinding + elem, texD->d->tex }); + if (samplerBinding >= 0) + bindingData.res[stage].samplers.append({ samplerBinding + elem, samplerD->d->samplerState }); + } } } } @@ -908,20 +1334,13 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD { QMetalTexture *texD = QRHI_RES(QMetalTexture, b->u.simage.tex); id<MTLTexture> t = texD->d->viewForLevel(b->u.simage.level); - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Texture); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::VERTEX].textures.append({ nativeBinding, t }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Texture); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].textures.append({ nativeBinding, t }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Texture); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].textures.append({ nativeBinding, t }); + + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Texture); + if (nativeBinding >= 0) + bindingData.res[stage].textures.append({ nativeBinding, t }); + } } } break; @@ -932,20 +1351,12 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); id<MTLBuffer> mtlbuf = bufD->d->buf[0]; quint32 offset = b->u.sbuf.offset; - if (b->stage.testFlag(QRhiShaderResourceBinding::VertexStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::VERTEX, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::VERTEX].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::FragmentStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::FRAGMENT, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::FRAGMENT].buffers.append({ nativeBinding, mtlbuf, offset }); - } - if (b->stage.testFlag(QRhiShaderResourceBinding::ComputeStage)) { - const int nativeBinding = mapBinding(b->binding, QMetalShaderResourceBindingsData::COMPUTE, nativeResourceBindingMaps, BindingType::Buffer); - if (nativeBinding >= 0) - bindingData.res[QMetalShaderResourceBindingsData::COMPUTE].buffers.append({ nativeBinding, mtlbuf, offset }); + for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { + if (b->stage.testFlag(toRhiSrbStage(stage))) { + const int nativeBinding = mapBinding(b->binding, stage, nativeResourceBindingMaps, BindingType::Buffer); + if (nativeBinding >= 0) + bindingData.res[stage].buffers.append({ nativeBinding, mtlbuf, offset }); + } } } break; @@ -956,9 +1367,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD } for (int stage = 0; stage < SUPPORTED_STAGES; ++stage) { - if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT)) + if (cbD->recordingPass != QMetalCommandBuffer::RenderPass && (stage == QMetalShaderResourceBindingsData::VERTEX || stage == QMetalShaderResourceBindingsData::FRAGMENT + || stage == QMetalShaderResourceBindingsData::TESSCTRL || stage == QMetalShaderResourceBindingsData::TESSEVAL)) continue; - if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && stage == QMetalShaderResourceBindingsData::COMPUTE) + if (cbD->recordingPass != QMetalCommandBuffer::ComputePass && (stage == QMetalShaderResourceBindingsData::COMPUTE)) continue; // QRhiBatchedBindings works with the native bindings and expects @@ -970,7 +1382,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD return a.nativeBinding < b.nativeBinding; }); - for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : qAsConst(bindingData.res[stage].buffers)) { + for (const QMetalShaderResourceBindingsData::Stage::Buffer &buf : std::as_const(bindingData.res[stage].buffers)) { bindingData.res[stage].bufferBatches.feed(buf.nativeBinding, buf.mtlbuf); bindingData.res[stage].bufferOffsetBatches.feed(buf.nativeBinding, buf.offset); } @@ -989,26 +1401,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD { continue; } - switch (stage) { - case QMetalShaderResourceBindingsData::VERTEX: - [cbD->d->currentRenderPassEncoder setVertexBuffers: bufferBatch.resources.constData() - offsets: offsetBatch.resources.constData() - withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; - break; - case QMetalShaderResourceBindingsData::FRAGMENT: - [cbD->d->currentRenderPassEncoder setFragmentBuffers: bufferBatch.resources.constData() - offsets: offsetBatch.resources.constData() - withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; - break; - case QMetalShaderResourceBindingsData::COMPUTE: - [cbD->d->currentComputePassEncoder setBuffers: bufferBatch.resources.constData() - offsets: offsetBatch.resources.constData() - withRange: NSMakeRange(bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; - break; - default: - Q_UNREACHABLE(); - break; - } + bindStageBuffers(cbD, stage, bufferBatch, offsetBatch); } if (offsetOnlyChange) @@ -1022,10 +1415,10 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD return a.nativeBinding < b.nativeBinding; }); - for (const QMetalShaderResourceBindingsData::Stage::Texture &t : qAsConst(bindingData.res[stage].textures)) + for (const QMetalShaderResourceBindingsData::Stage::Texture &t : std::as_const(bindingData.res[stage].textures)) bindingData.res[stage].textureBatches.feed(t.nativeBinding, t.mtltex); - for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : qAsConst(bindingData.res[stage].samplers)) + for (const QMetalShaderResourceBindingsData::Stage::Sampler &s : std::as_const(bindingData.res[stage].samplers)) bindingData.res[stage].samplerBatches.feed(s.nativeBinding, s.mtlsampler); bindingData.res[stage].textureBatches.finish(); @@ -1039,23 +1432,7 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD { continue; } - switch (stage) { - case QMetalShaderResourceBindingsData::VERTEX: - [cbD->d->currentRenderPassEncoder setVertexTextures: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case QMetalShaderResourceBindingsData::FRAGMENT: - [cbD->d->currentRenderPassEncoder setFragmentTextures: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case QMetalShaderResourceBindingsData::COMPUTE: - [cbD->d->currentComputePassEncoder setTextures: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - default: - Q_UNREACHABLE(); - break; - } + bindStageTextures(cbD, stage, batch); } for (int i = 0, ie = bindingData.res[stage].samplerBatches.batches.count(); i != ie; ++i) { @@ -1066,65 +1443,68 @@ void QRhiMetal::enqueueShaderResourceBindings(QMetalShaderResourceBindings *srbD { continue; } - switch (stage) { - case QMetalShaderResourceBindingsData::VERTEX: - [cbD->d->currentRenderPassEncoder setVertexSamplerStates: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case QMetalShaderResourceBindingsData::FRAGMENT: - [cbD->d->currentRenderPassEncoder setFragmentSamplerStates: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - case QMetalShaderResourceBindingsData::COMPUTE: - [cbD->d->currentComputePassEncoder setSamplerStates: batch.resources.constData() - withRange: NSMakeRange(batch.startBinding, NSUInteger(batch.resources.count()))]; - break; - default: - Q_UNREACHABLE(); - break; - } + bindStageSamplers(cbD, stage, batch); } } + cbD->d->currentShaderResourceBindingState = bindingData; } +void QMetalGraphicsPipeline::makeActiveForCurrentRenderPassEncoder(QMetalCommandBuffer *cbD) +{ + [cbD->d->currentRenderPassEncoder setRenderPipelineState: d->ps]; + + if (cbD->d->currentDepthStencilState != d->ds) { + [cbD->d->currentRenderPassEncoder setDepthStencilState: d->ds]; + cbD->d->currentDepthStencilState = d->ds; + } + + if (cbD->currentCullMode == -1 || d->cullMode != uint(cbD->currentCullMode)) { + [cbD->d->currentRenderPassEncoder setCullMode: d->cullMode]; + cbD->currentCullMode = int(d->cullMode); + } + if (cbD->currentTriangleFillMode == -1 || d->triangleFillMode != uint(cbD->currentTriangleFillMode)) { + [cbD->d->currentRenderPassEncoder setTriangleFillMode: d->triangleFillMode]; + cbD->currentTriangleFillMode = int(d->triangleFillMode); + } + if (cbD->currentFrontFaceWinding == -1 || d->winding != uint(cbD->currentFrontFaceWinding)) { + [cbD->d->currentRenderPassEncoder setFrontFacingWinding: d->winding]; + cbD->currentFrontFaceWinding = int(d->winding); + } + if (!qFuzzyCompare(d->depthBias, cbD->currentDepthBiasValues.first) + || !qFuzzyCompare(d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second)) + { + [cbD->d->currentRenderPassEncoder setDepthBias: d->depthBias + slopeScale: d->slopeScaledDepthBias + clamp: 0.0f]; + cbD->currentDepthBiasValues = { d->depthBias, d->slopeScaledDepthBias }; + } +} + void QRhiMetal::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline *ps) { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); QMetalGraphicsPipeline *psD = QRHI_RES(QMetalGraphicsPipeline, ps); - if (cbD->currentGraphicsPipeline != ps || cbD->currentPipelineGeneration != psD->generation) { - cbD->currentGraphicsPipeline = ps; - cbD->currentComputePipeline = nullptr; - cbD->currentPipelineGeneration = psD->generation; - - [cbD->d->currentRenderPassEncoder setRenderPipelineState: psD->d->ps]; + if (cbD->currentGraphicsPipeline == psD && cbD->currentPipelineGeneration == psD->generation) + return; - if (cbD->d->currentDepthStencilState != psD->d->ds) { - [cbD->d->currentRenderPassEncoder setDepthStencilState: psD->d->ds]; - cbD->d->currentDepthStencilState = psD->d->ds; - } + cbD->currentGraphicsPipeline = psD; + cbD->currentComputePipeline = nullptr; + cbD->currentPipelineGeneration = psD->generation; - if (cbD->currentCullMode == -1 || psD->d->cullMode != uint(cbD->currentCullMode)) { - [cbD->d->currentRenderPassEncoder setCullMode: psD->d->cullMode]; - cbD->currentCullMode = int(psD->d->cullMode); - } - if (cbD->currentTriangleFillMode == -1 || psD->d->triangleFillMode != uint(cbD->currentTriangleFillMode)) { - [cbD->d->currentRenderPassEncoder setTriangleFillMode: psD->d->triangleFillMode]; - cbD->currentTriangleFillMode = int(psD->d->triangleFillMode); - } - if (cbD->currentFrontFaceWinding == -1 || psD->d->winding != uint(cbD->currentFrontFaceWinding)) { - [cbD->d->currentRenderPassEncoder setFrontFacingWinding: psD->d->winding]; - cbD->currentFrontFaceWinding = int(psD->d->winding); + if (!psD->d->tess.enabled && !psD->d->tess.failed) { + psD->makeActiveForCurrentRenderPassEncoder(cbD); + } else { + // mark work buffers that can now be safely reused as reusable + for (QMetalBuffer *workBuf : psD->d->extraBufMgr.deviceLocalWorkBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot) + workBuf->lastActiveFrameSlot = -1; } - if (!qFuzzyCompare(psD->d->depthBias, cbD->currentDepthBiasValues.first) - || !qFuzzyCompare(psD->d->slopeScaledDepthBias, cbD->currentDepthBiasValues.second)) - { - [cbD->d->currentRenderPassEncoder setDepthBias: psD->d->depthBias - slopeScale: psD->d->slopeScaledDepthBias - clamp: 0.0f]; - cbD->currentDepthBiasValues = { psD->d->depthBias, psD->d->slopeScaledDepthBias }; + for (QMetalBuffer *workBuf : psD->d->extraBufMgr.hostVisibleWorkBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == currentFrameSlot) + workBuf->lastActiveFrameSlot = -1; } } @@ -1137,8 +1517,8 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); Q_ASSERT(cbD->recordingPass != QMetalCommandBuffer::NoPass); - QMetalGraphicsPipeline *gfxPsD = QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline); - QMetalComputePipeline *compPsD = QRHI_RES(QMetalComputePipeline, cbD->currentComputePipeline); + QMetalGraphicsPipeline *gfxPsD = cbD->currentGraphicsPipeline; + QMetalComputePipeline *compPsD = cbD->currentComputePipeline; if (!srb) { if (gfxPsD) @@ -1152,9 +1532,15 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind bool hasDynamicOffsetInSrb = false; bool resNeedsRebind = false; + // SPIRV-Cross buffer size buffers + // Need to determine storage buffer sizes here as this is the last opportunity for storage + // buffer bindings (offset, size) to be specified before draw / dispatch call + const bool needsBufferSizeBuffer = (compPsD && compPsD->d->bufferSizeBuffer) || (gfxPsD && gfxPsD->d->bufferSizeBuffer); + QMap<QRhiShaderResourceBinding::StageFlag, QMap<int, quint32>> storageBufferSizes; + // do buffer writes, figure out if we need to rebind, and mark as in-use for (int i = 0, ie = srbD->sortedBindings.count(); i != ie; ++i) { - const QRhiShaderResourceBinding::Data *b = srbD->sortedBindings.at(i).data(); + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->sortedBindings.at(i)); QMetalShaderResourceBindings::BoundResourceData &bd(srbD->boundResourceData[i]); switch (b->type) { case QRhiShaderResourceBinding::UniformBuffer: @@ -1228,6 +1614,17 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind { QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, b->u.sbuf.buf); Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)); + + if (needsBufferSizeBuffer) { + for (int i = 0; i < 6; ++i) { + const QRhiShaderResourceBinding::StageFlag stage = + QRhiShaderResourceBinding::StageFlag(1 << i); + if (b->stage.testFlag(stage)) { + storageBufferSizes[stage][b->binding] = b->u.sbuf.maybeSize ? b->u.sbuf.maybeSize : bufD->size(); + } + } + } + executeBufferHostWritesForCurrentFrame(bufD); if (bufD->generation != bd.sbuf.generation || bufD->m_id != bd.sbuf.id) { resNeedsRebind = true; @@ -1243,26 +1640,141 @@ void QRhiMetal::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind } } + if (needsBufferSizeBuffer) { + QMetalBuffer *bufD = nullptr; + QVarLengthArray<QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag>, 4> shaders; + + if (compPsD) { + bufD = compPsD->d->bufferSizeBuffer; + Q_ASSERT(compPsD->d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + shaders.append(qMakePair(&compPsD->d->cs, QRhiShaderResourceBinding::StageFlag::ComputeStage)); + } else { + bufD = gfxPsD->d->bufferSizeBuffer; + if (gfxPsD->d->tess.enabled) { + + // Assumptions + // * We only use one of the compute vertex shader variants in a pipeline at any one time + // * The vertex shader variants all have the same storage block bindings + // * The vertex shader variants all have the same native resource binding map + // * The vertex shader variants all have the same MslBufferSizeBufferBinding requirement + // * The vertex shader variants all have the same MslBufferSizeBufferBinding binding + // => We only need to use one vertex shader variant to generate the identical shader + // resource bindings + Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[1].desc.storageBlocks()); + Q_ASSERT(gfxPsD->d->tess.compVs[0].desc.storageBlocks() == gfxPsD->d->tess.compVs[2].desc.storageBlocks()); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding) + == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding) + == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding] + == gfxPsD->d->tess.compVs[1].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding] + == gfxPsD->d->tess.compVs[2].nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]); + + if (gfxPsD->d->tess.compVs[0].nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.compVs[0], QRhiShaderResourceBinding::StageFlag::VertexStage)); + + if (gfxPsD->d->tess.compTesc.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.compTesc, QRhiShaderResourceBinding::StageFlag::TessellationControlStage)); + + if (gfxPsD->d->tess.vertTese.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->tess.vertTese, QRhiShaderResourceBinding::StageFlag::TessellationEvaluationStage)); + + } else { + if (gfxPsD->d->vs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->vs, QRhiShaderResourceBinding::StageFlag::VertexStage)); + } + if (gfxPsD->d->fs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) + shaders.append(qMakePair(&gfxPsD->d->fs, QRhiShaderResourceBinding::StageFlag::FragmentStage)); + } + + quint32 offset = 0; + for (const QPair<QMetalShader *, QRhiShaderResourceBinding::StageFlag> &shader : shaders) { + + const int binding = shader.first->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + + // if we don't have a srb entry for the buffer size buffer + if (!(storageBufferSizes.contains(shader.second) && storageBufferSizes[shader.second].contains(binding))) { + + int maxNativeBinding = 0; + for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) + maxNativeBinding = qMax(maxNativeBinding, shader.first->nativeResourceBindingMap[block.binding].first); + + const int size = (maxNativeBinding + 1) * sizeof(int); + + Q_ASSERT(offset + size <= bufD->size()); + srbD->sortedBindings.append(QRhiShaderResourceBinding::bufferLoad(binding, shader.second, bufD, offset, size)); + + QMetalShaderResourceBindings::BoundResourceData bd; + bd.sbuf.id = bufD->m_id; + bd.sbuf.generation = bufD->generation; + srbD->boundResourceData.append(bd); + } + + // create the buffer size buffer data + QVarLengthArray<int, 8> bufferSizeBufferData; + Q_ASSERT(storageBufferSizes.contains(shader.second)); + const QMap<int, quint32> &sizes(storageBufferSizes[shader.second]); + for (const QShaderDescription::StorageBlock &block : shader.first->desc.storageBlocks()) { + const int index = shader.first->nativeResourceBindingMap[block.binding].first; + + // if the native binding is -1, the buffer is present but not accessed in the shader + if (index < 0) + continue; + + if (bufferSizeBufferData.size() <= index) + bufferSizeBufferData.resize(index + 1); + + Q_ASSERT(sizes.contains(block.binding)); + bufferSizeBufferData[index] = sizes[block.binding]; + } + + QRhiBufferData data; + const quint32 size = bufferSizeBufferData.size() * sizeof(int); + data.assign(reinterpret_cast<const char *>(bufferSizeBufferData.constData()), size); + Q_ASSERT(offset + size <= bufD->size()); + bufD->d->pendingUpdates[bufD->d->slotted ? currentFrameSlot : 0].append({ offset, data }); + + // buffer offsets must be 32byte aligned + offset += ((size + 31) / 32) * 32; + } + + executeBufferHostWritesForCurrentFrame(bufD); + bufD->lastActiveFrameSlot = currentFrameSlot; + } + // make sure the resources for the correct slot get bound const int resSlot = hasSlottedResourceInSrb ? currentFrameSlot : 0; if (hasSlottedResourceInSrb && cbD->currentResSlot != resSlot) resNeedsRebind = true; - const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb); + const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srbD) : (cbD->currentComputeSrb != srbD); const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation; // dynamic uniform buffer offsets always trigger a rebind if (hasDynamicOffsetInSrb || resNeedsRebind || srbChanged || srbRebuilt) { - const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr }; + const QShader::NativeResourceBindingMap *resBindMaps[SUPPORTED_STAGES] = { nullptr, nullptr, nullptr, nullptr, nullptr }; if (gfxPsD) { - cbD->currentGraphicsSrb = srb; + cbD->currentGraphicsSrb = srbD; cbD->currentComputeSrb = nullptr; - resBindMaps[0] = &gfxPsD->d->vs.nativeResourceBindingMap; - resBindMaps[1] = &gfxPsD->d->fs.nativeResourceBindingMap; + if (gfxPsD->d->tess.enabled) { + // If tessellating, we don't know which compVs shader to use until the draw call is + // made. They should all have the same native resource binding map, so pick one. + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[1].nativeResourceBindingMap); + Q_ASSERT(gfxPsD->d->tess.compVs[0].nativeResourceBindingMap == gfxPsD->d->tess.compVs[2].nativeResourceBindingMap); + resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->tess.compVs[0].nativeResourceBindingMap; + resBindMaps[QMetalShaderResourceBindingsData::TESSCTRL] = &gfxPsD->d->tess.compTesc.nativeResourceBindingMap; + resBindMaps[QMetalShaderResourceBindingsData::TESSEVAL] = &gfxPsD->d->tess.vertTese.nativeResourceBindingMap; + } else { + resBindMaps[QMetalShaderResourceBindingsData::VERTEX] = &gfxPsD->d->vs.nativeResourceBindingMap; + } + resBindMaps[QMetalShaderResourceBindingsData::FRAGMENT] = &gfxPsD->d->fs.nativeResourceBindingMap; } else { cbD->currentGraphicsSrb = nullptr; - cbD->currentComputeSrb = srb; - resBindMaps[2] = &compPsD->d->cs.nativeResourceBindingMap; + cbD->currentComputeSrb = srbD; + resBindMaps[QMetalShaderResourceBindingsData::COMPUTE] = &compPsD->d->cs.nativeResourceBindingMap; } cbD->currentSrbGeneration = srbD->generation; cbD->currentResSlot = resSlot; @@ -1293,13 +1805,13 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, offsets.finish(); // same binding space for vertex and constant buffers - work it around - QRhiShaderResourceBindings *srb = cbD->currentGraphicsSrb; + QMetalShaderResourceBindings *srbD = cbD->currentGraphicsSrb; // There's nothing guaranteeing setShaderResources() was called before // setVertexInput()... but whatever srb will get bound will have to be // layout-compatible anyways so maxBinding is the same. - if (!srb) - srb = cbD->currentGraphicsPipeline->shaderResourceBindings(); - const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, srb)->maxBinding + 1; + if (!srbD) + srbD = QRHI_RES(QMetalShaderResourceBindings, cbD->currentGraphicsPipeline->shaderResourceBindings()); + const int firstVertexBinding = srbD->maxBinding + 1; if (firstVertexBinding != cbD->d->currentFirstVertexBinding || buffers != cbD->d->currentVertexInputsBuffers @@ -1323,7 +1835,7 @@ void QRhiMetal::setVertexInput(QRhiCommandBuffer *cb, QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, indexBuf); executeBufferHostWritesForCurrentFrame(ibufD); ibufD->lastActiveFrameSlot = currentFrameSlot; - cbD->currentIndexBuffer = indexBuf; + cbD->currentIndexBuffer = ibufD; cbD->currentIndexOffset = indexOffset; cbD->currentIndexFormat = indexFormat; } else { @@ -1339,7 +1851,7 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) // x,y is top-left in MTLViewportRect but bottom-left in QRhiViewport float x, y, w, h; - if (!qrhi_toTopLeftRenderTargetRect(outputSize, viewport.viewport(), &x, &y, &w, &h)) + if (!qrhi_toTopLeftRenderTargetRect<UnBounded>(outputSize, viewport.viewport(), &x, &y, &w, &h)) return; MTLViewport vp; @@ -1352,8 +1864,10 @@ void QRhiMetal::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) [cbD->d->currentRenderPassEncoder setViewport: vp]; - if (!QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { + if (cbD->currentGraphicsPipeline + && !cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)) { MTLScissorRect s; + qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, viewport.viewport(), &x, &y, &w, &h); s.x = NSUInteger(x); s.y = NSUInteger(y); s.width = NSUInteger(w); @@ -1366,12 +1880,12 @@ void QRhiMetal::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); - Q_ASSERT(QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); + Q_ASSERT(cbD->currentGraphicsPipeline->m_flags.testFlag(QRhiGraphicsPipeline::UsesScissor)); const QSize outputSize = cbD->currentTarget->pixelSize(); // x,y is top-left in MTLScissorRect but bottom-left in QRhiScissor int x, y, w, h; - if (!qrhi_toTopLeftRenderTargetRect(outputSize, scissor.scissor(), &x, &y, &w, &h)) + if (!qrhi_toTopLeftRenderTargetRect<Bounded>(outputSize, scissor.scissor(), &x, &y, &w, &h)) return; MTLScissorRect s; @@ -1400,20 +1914,320 @@ void QRhiMetal::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) [cbD->d->currentRenderPassEncoder setStencilReferenceValue: refValue]; } +static id<MTLComputeCommandEncoder> tessellationComputeEncoder(QMetalCommandBuffer *cbD) +{ + if (cbD->d->currentRenderPassEncoder) { + [cbD->d->currentRenderPassEncoder endEncoding]; + cbD->d->currentRenderPassEncoder = nil; + } + + if (!cbD->d->tessellationComputeEncoder) + cbD->d->tessellationComputeEncoder = [cbD->d->cb computeCommandEncoder]; + + return cbD->d->tessellationComputeEncoder; +} + +static void endTessellationComputeEncoding(QMetalCommandBuffer *cbD) +{ + if (cbD->d->tessellationComputeEncoder) { + [cbD->d->tessellationComputeEncoder endEncoding]; + cbD->d->tessellationComputeEncoder = nil; + } + + QMetalRenderTargetData * rtD = nullptr; + + switch (cbD->currentTarget->resourceType()) { + case QRhiResource::SwapChainRenderTarget: + rtD = QRHI_RES(QMetalSwapChainRenderTarget, cbD->currentTarget)->d; + break; + case QRhiResource::TextureRenderTarget: + rtD = QRHI_RES(QMetalTextureRenderTarget, cbD->currentTarget)->d; + break; + default: + break; + } + + Q_ASSERT(rtD); + + QVarLengthArray<MTLLoadAction, 4> oldColorLoad; + for (uint i = 0; i < uint(rtD->colorAttCount); ++i) { + oldColorLoad.append(cbD->d->currentPassRpDesc.colorAttachments[i].loadAction); + if (cbD->d->currentPassRpDesc.colorAttachments[i].storeAction != MTLStoreActionDontCare) + cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad; + } + + MTLLoadAction oldDepthLoad; + MTLLoadAction oldStencilLoad; + if (rtD->dsAttCount) { + oldDepthLoad = cbD->d->currentPassRpDesc.depthAttachment.loadAction; + if (cbD->d->currentPassRpDesc.depthAttachment.storeAction != MTLStoreActionDontCare) + cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad; + + oldStencilLoad = cbD->d->currentPassRpDesc.stencilAttachment.loadAction; + if (cbD->d->currentPassRpDesc.stencilAttachment.storeAction != MTLStoreActionDontCare) + cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad; + } + + cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc]; + cbD->resetPerPassCachedState(); + + for (uint i = 0; i < uint(rtD->colorAttCount); ++i) { + cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = oldColorLoad[i]; + } + + if (rtD->dsAttCount) { + cbD->d->currentPassRpDesc.depthAttachment.loadAction = oldDepthLoad; + cbD->d->currentPassRpDesc.stencilAttachment.loadAction = oldStencilLoad; + } + +} + +void QRhiMetal::tessellatedDraw(const TessDrawArgs &args) +{ + QMetalCommandBuffer *cbD = args.cbD; + QMetalGraphicsPipeline *graphicsPipeline = cbD->currentGraphicsPipeline; + if (graphicsPipeline->d->tess.failed) + return; + + const bool indexed = args.type != TessDrawArgs::NonIndexed; + const quint32 instanceCount = indexed ? args.drawIndexed.instanceCount : args.draw.instanceCount; + const quint32 vertexOrIndexCount = indexed ? args.drawIndexed.indexCount : args.draw.vertexCount; + + QMetalGraphicsPipelineData::Tessellation &tess(graphicsPipeline->d->tess); + QMetalGraphicsPipelineData::ExtraBufferManager &extraBufMgr(graphicsPipeline->d->extraBufMgr); + const quint32 patchCount = tess.patchCountForDrawCall(vertexOrIndexCount, instanceCount); + QMetalBuffer *vertOutBuf = nullptr; + QMetalBuffer *tescOutBuf = nullptr; + QMetalBuffer *tescPatchOutBuf = nullptr; + QMetalBuffer *tescFactorBuf = nullptr; + QMetalBuffer *tescParamsBuf = nullptr; + id<MTLComputeCommandEncoder> vertTescComputeEncoder = tessellationComputeEncoder(cbD); + + // Step 1: vertex shader (as compute) + { + id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder; + QShader::Variant shaderVariant = QShader::NonIndexedVertexAsComputeShader; + if (args.type == TessDrawArgs::U16Indexed) + shaderVariant = QShader::UInt16IndexedVertexAsComputeShader; + else if (args.type == TessDrawArgs::U32Indexed) + shaderVariant = QShader::UInt32IndexedVertexAsComputeShader; + const int varIndex = QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(shaderVariant); + id<MTLComputePipelineState> computePipelineState = tess.vsCompPipeline(this, shaderVariant); + [computeEncoder setComputePipelineState: computePipelineState]; + + // Make uniform buffers, textures, and samplers (meant for the + // vertex stage from the client's point of view) visible in the + // "vertex as compute" shader + cbD->d->currentComputePassEncoder = computeEncoder; + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::VERTEX, QMetalShaderResourceBindingsData::COMPUTE); + cbD->d->currentComputePassEncoder = nil; + + const QMap<int, int> &ebb(tess.compVs[varIndex].nativeShaderInfo.extraBufferBindings); + const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1); + + if (outputBufferBinding >= 0) { + const quint32 workBufSize = tess.vsCompOutputBufferSize(vertexOrIndexCount, instanceCount); + vertOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize); + if (!vertOutBuf) + return; + [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding]; + } + + if (indexBufferBinding >= 0) + [computeEncoder setBuffer: (id<MTLBuffer>) args.drawIndexed.indexBuffer offset: 0 atIndex: indexBufferBinding]; + + for (int i = 0, ie = cbD->d->currentVertexInputsBuffers.batches.count(); i != ie; ++i) { + const auto &bufferBatch(cbD->d->currentVertexInputsBuffers.batches[i]); + const auto &offsetBatch(cbD->d->currentVertexInputOffsets.batches[i]); + [computeEncoder setBuffers: bufferBatch.resources.constData() + offsets: offsetBatch.resources.constData() + withRange: NSMakeRange(uint(cbD->d->currentFirstVertexBinding) + bufferBatch.startBinding, NSUInteger(bufferBatch.resources.count()))]; + } + + if (indexed) { + [computeEncoder setStageInRegion: MTLRegionMake2D(args.drawIndexed.vertexOffset, args.drawIndexed.firstInstance, + args.drawIndexed.indexCount, args.drawIndexed.instanceCount)]; + } else { + [computeEncoder setStageInRegion: MTLRegionMake2D(args.draw.firstVertex, args.draw.firstInstance, + args.draw.vertexCount, args.draw.instanceCount)]; + } + + [computeEncoder dispatchThreads: MTLSizeMake(vertexOrIndexCount, instanceCount, 1) + threadsPerThreadgroup: MTLSizeMake(computePipelineState.threadExecutionWidth, 1, 1)]; + } + + // Step 2: tessellation control shader (as compute) + { + id<MTLComputeCommandEncoder> computeEncoder = vertTescComputeEncoder; + id<MTLComputePipelineState> computePipelineState = tess.tescCompPipeline(this); + [computeEncoder setComputePipelineState: computePipelineState]; + + cbD->d->currentComputePassEncoder = computeEncoder; + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSCTRL, QMetalShaderResourceBindingsData::COMPUTE); + cbD->d->currentComputePassEncoder = nil; + + const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings); + const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1); + const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1); + const int paramsBufferBinding = ebb.value(QShaderPrivate::MslTessTescParamsBufferBinding, -1); + const int inputBufferBinding = ebb.value(QShaderPrivate::MslTessTescInputBufferBinding, -1); + + if (outputBufferBinding >= 0) { + const quint32 workBufSize = tess.tescCompOutputBufferSize(patchCount); + tescOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize); + if (!tescOutBuf) + return; + [computeEncoder setBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding]; + } + + if (patchOutputBufferBinding >= 0) { + const quint32 workBufSize = tess.tescCompPatchOutputBufferSize(patchCount); + tescPatchOutBuf = extraBufMgr.acquireWorkBuffer(this, workBufSize); + if (!tescPatchOutBuf) + return; + [computeEncoder setBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding]; + } + + if (tessFactorBufferBinding >= 0) { + tescFactorBuf = extraBufMgr.acquireWorkBuffer(this, patchCount * sizeof(MTLQuadTessellationFactorsHalf)); + [computeEncoder setBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding]; + } + + if (paramsBufferBinding >= 0) { + struct { + quint32 inControlPointCount; + quint32 patchCount; + } params; + tescParamsBuf = extraBufMgr.acquireWorkBuffer(this, sizeof(params), QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible); + if (!tescParamsBuf) + return; + params.inControlPointCount = tess.inControlPointCount; + params.patchCount = patchCount; + id<MTLBuffer> paramsBuf = tescParamsBuf->d->buf[0]; + char *p = reinterpret_cast<char *>([paramsBuf contents]); + memcpy(p, ¶ms, sizeof(params)); + [computeEncoder setBuffer: paramsBuf offset: 0 atIndex: paramsBufferBinding]; + } + + if (vertOutBuf && inputBufferBinding >= 0) + [computeEncoder setBuffer: vertOutBuf->d->buf[0] offset: 0 atIndex: inputBufferBinding]; + + int sgSize = int(computePipelineState.threadExecutionWidth); + int wgSize = std::lcm(tess.outControlPointCount, sgSize); + while (wgSize > caps.maxThreadGroupSize) { + sgSize /= 2; + wgSize = std::lcm(tess.outControlPointCount, sgSize); + } + [computeEncoder dispatchThreads: MTLSizeMake(patchCount * tess.outControlPointCount, 1, 1) + threadsPerThreadgroup: MTLSizeMake(wgSize, 1, 1)]; + } + + // Much of the state in the QMetalCommandBuffer is going to be reset + // when we get a new render encoder. Save what we need. (cheaper than + // starting to walk over the srb again) + const QMetalShaderResourceBindingsData resourceBindings = cbD->d->currentShaderResourceBindingState; + + endTessellationComputeEncoding(cbD); + + // Step 3: tessellation evaluation (as vertex) + fragment shader + { + // No need to call tess.teseFragRenderPipeline because it was done + // once and we know the result is stored in the standard place + // (graphicsPipeline->d->ps). + + graphicsPipeline->makeActiveForCurrentRenderPassEncoder(cbD); + id<MTLRenderCommandEncoder> renderEncoder = cbD->d->currentRenderPassEncoder; + + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::TESSEVAL, QMetalShaderResourceBindingsData::VERTEX, &resourceBindings); + rebindShaderResources(cbD, QMetalShaderResourceBindingsData::FRAGMENT, QMetalShaderResourceBindingsData::FRAGMENT, &resourceBindings); + + const QMap<int, int> &ebb(tess.compTesc.nativeShaderInfo.extraBufferBindings); + const int outputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int patchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1); + const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1); + + if (outputBufferBinding >= 0 && tescOutBuf) + [renderEncoder setVertexBuffer: tescOutBuf->d->buf[0] offset: 0 atIndex: outputBufferBinding]; + + if (patchOutputBufferBinding >= 0 && tescPatchOutBuf) + [renderEncoder setVertexBuffer: tescPatchOutBuf->d->buf[0] offset: 0 atIndex: patchOutputBufferBinding]; + + if (tessFactorBufferBinding >= 0 && tescFactorBuf) { + [renderEncoder setTessellationFactorBuffer: tescFactorBuf->d->buf[0] offset: 0 instanceStride: 0]; + [renderEncoder setVertexBuffer: tescFactorBuf->d->buf[0] offset: 0 atIndex: tessFactorBufferBinding]; + } + + [cbD->d->currentRenderPassEncoder drawPatches: tess.outControlPointCount + patchStart: 0 + patchCount: patchCount + patchIndexBuffer: nil + patchIndexBufferOffset: 0 + instanceCount: 1 + baseInstance: 0]; + } +} + +void QRhiMetal::adjustForMultiViewDraw(quint32 *instanceCount, QRhiCommandBuffer *cb) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + const int multiViewCount = cbD->currentGraphicsPipeline->m_multiViewCount; + if (multiViewCount <= 1) + return; + + const QMap<int, int> &ebb(cbD->currentGraphicsPipeline->d->vs.nativeShaderInfo.extraBufferBindings); + const int viewMaskBufBinding = ebb.value(QShaderPrivate::MslMultiViewMaskBufferBinding, -1); + if (viewMaskBufBinding == -1) { + qWarning("No extra buffer for multiview in the vertex shader; was it built with --view-count specified?"); + return; + } + struct { + quint32 viewOffset; + quint32 viewCount; + } multiViewInfo; + multiViewInfo.viewOffset = 0; + multiViewInfo.viewCount = quint32(multiViewCount); + QMetalBuffer *buf = cbD->currentGraphicsPipeline->d->extraBufMgr.acquireWorkBuffer(this, sizeof(multiViewInfo), + QMetalGraphicsPipelineData::ExtraBufferManager::WorkBufType::HostVisible); + if (buf) { + id<MTLBuffer> mtlbuf = buf->d->buf[0]; + char *p = reinterpret_cast<char *>([mtlbuf contents]); + memcpy(p, &multiViewInfo, sizeof(multiViewInfo)); + [cbD->d->currentRenderPassEncoder setVertexBuffer: mtlbuf offset: 0 atIndex: viewMaskBufBinding]; + // The instance count is adjusted for layered rendering. The vertex shader is expected to contain something like: + // uint gl_ViewIndex = spvViewMask[0] + (gl_InstanceIndex - gl_BaseInstance) % spvViewMask[1]; + // where spvViewMask is the buffer with multiViewInfo passed in above. + *instanceCount *= multiViewCount; + } +} + void QRhiMetal::draw(QRhiCommandBuffer *cb, quint32 vertexCount, quint32 instanceCount, quint32 firstVertex, quint32 firstInstance) { QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::RenderPass); + if (cbD->currentGraphicsPipeline->d->tess.enabled) { + TessDrawArgs a; + a.cbD = cbD; + a.type = TessDrawArgs::NonIndexed; + a.draw.vertexCount = vertexCount; + a.draw.instanceCount = instanceCount; + a.draw.firstVertex = firstVertex; + a.draw.firstInstance = firstInstance; + tessellatedDraw(a); + return; + } + + adjustForMultiViewDraw(&instanceCount, cb); + if (caps.baseVertexAndInstance) { - [cbD->d->currentRenderPassEncoder drawPrimitives: - QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType - vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance]; + [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType + vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount baseInstance: firstInstance]; } else { - [cbD->d->currentRenderPassEncoder drawPrimitives: - QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType - vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount]; + [cbD->d->currentRenderPassEncoder drawPrimitives: cbD->currentGraphicsPipeline->d->primitiveType + vertexStart: firstVertex vertexCount: vertexCount instanceCount: instanceCount]; } } @@ -1429,23 +2243,39 @@ void QRhiMetal::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, const quint32 indexOffset = cbD->currentIndexOffset + firstIndex * (cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? 2 : 4); Q_ASSERT(indexOffset == aligned(indexOffset, 4u)); - QMetalBuffer *ibufD = QRHI_RES(QMetalBuffer, cbD->currentIndexBuffer); - id<MTLBuffer> mtlbuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0]; + QMetalBuffer *ibufD = cbD->currentIndexBuffer; + id<MTLBuffer> mtlibuf = ibufD->d->buf[ibufD->d->slotted ? currentFrameSlot : 0]; + + if (cbD->currentGraphicsPipeline->d->tess.enabled) { + TessDrawArgs a; + a.cbD = cbD; + a.type = cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? TessDrawArgs::U16Indexed : TessDrawArgs::U32Indexed; + a.drawIndexed.indexCount = indexCount; + a.drawIndexed.instanceCount = instanceCount; + a.drawIndexed.firstIndex = firstIndex; + a.drawIndexed.vertexOffset = vertexOffset; + a.drawIndexed.firstInstance = firstInstance; + a.drawIndexed.indexBuffer = mtlibuf; + tessellatedDraw(a); + return; + } + + adjustForMultiViewDraw(&instanceCount, cb); if (caps.baseVertexAndInstance) { - [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType + [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType indexCount: indexCount indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 - indexBuffer: mtlbuf + indexBuffer: mtlibuf indexBufferOffset: indexOffset instanceCount: instanceCount baseVertex: vertexOffset baseInstance: firstInstance]; } else { - [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: QRHI_RES(QMetalGraphicsPipeline, cbD->currentGraphicsPipeline)->d->primitiveType + [cbD->d->currentRenderPassEncoder drawIndexedPrimitives: cbD->currentGraphicsPipeline->d->primitiveType indexCount: indexCount indexType: cbD->currentIndexFormat == QRhiCommandBuffer::IndexUInt16 ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32 - indexBuffer: mtlbuf + indexBuffer: mtlibuf indexBufferOffset: indexOffset instanceCount: instanceCount]; } @@ -1502,34 +2332,39 @@ void QRhiMetal::endExternal(QRhiCommandBuffer *cb) cbD->resetPerPassCachedState(); } +double QRhiMetal::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + return cbD->d->lastGpuTime; +} + QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain); + currentSwapChain = swapChainD; + currentFrameSlot = swapChainD->currentFrameSlot; - // This is a bit messed up since for this swapchain we want to wait for the - // commands+present to complete, while for others just for the commands - // (for this same frame slot) but not sure how to do that in a sane way so - // wait for full cb completion for now. - for (QMetalSwapChain *sc : qAsConst(swapchains)) { - dispatch_semaphore_t sem = sc->d->sem[swapChainD->currentFrameSlot]; - dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); + // If we are too far ahead, block. This is also what ensures that any + // resource used in the previous frame for this slot is now not in use + // anymore by the GPU. + dispatch_semaphore_wait(swapChainD->d->sem[currentFrameSlot], DISPATCH_TIME_FOREVER); + + // Do this also for any other swapchain's commands with the same frame slot + // While this reduces concurrency, it keeps resource usage safe: swapchain + // A starting its frame 0, followed by swapchain B starting its own frame 0 + // will make B wait for A's frame 0 commands, so if a resource is written + // in B's frame or when B checks for pending resource releases, that won't + // mess up A's in-flight commands (as they are not in flight anymore). + for (QMetalSwapChain *sc : std::as_const(swapchains)) { if (sc != swapChainD) - dispatch_semaphore_signal(sem); + sc->waitUntilCompleted(currentFrameSlot); // wait+signal } - currentSwapChain = swapChainD; - currentFrameSlot = swapChainD->currentFrameSlot; - if (swapChainD->ds) - swapChainD->ds->lastActiveFrameSlot = currentFrameSlot; - [d->captureScope beginScope]; - // Do not let the command buffer mess with the refcount of objects. We do - // have a proper render loop and will manage lifetimes similarly to other - // backends (Vulkan). - swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; + swapChainD->cbWrapper.d->cb = d->newCommandBuffer(); QMetalRenderTargetData::ColorAtt colorAtt; if (swapChainD->samples > 1) { @@ -1541,11 +2376,16 @@ QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF swapChainD->rtWrapper.d->fb.colorAtt[0] = colorAtt; swapChainD->rtWrapper.d->fb.dsTex = swapChainD->ds ? swapChainD->ds->d->tex : nil; + swapChainD->rtWrapper.d->fb.dsResolveTex = nil; swapChainD->rtWrapper.d->fb.hasStencil = swapChainD->ds ? true : false; swapChainD->rtWrapper.d->fb.depthNeedsStore = false; + if (swapChainD->ds) + swapChainD->ds->lastActiveFrameSlot = currentFrameSlot; + executeDeferredReleases(); - swapChainD->cbWrapper.resetState(); + swapChainD->cbWrapper.resetState(swapChainD->d->lastGpuTime[currentFrameSlot]); + swapChainD->d->lastGpuTime[currentFrameSlot] = 0; finishActiveReadbacks(); return QRhi::FrameOpSuccess; @@ -1556,26 +2396,30 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame QMetalSwapChain *swapChainD = QRHI_RES(QMetalSwapChain, swapChain); Q_ASSERT(currentSwapChain == swapChainD); + __block int thisFrameSlot = currentFrameSlot; + [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer> cb) { + swapChainD->d->lastGpuTime[thisFrameSlot] += cb.GPUEndTime - cb.GPUStartTime; + dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]); + }]; + const bool needsPresent = !flags.testFlag(QRhi::SkipPresent); - if (needsPresent) { - // beginFrame-endFrame without a render pass inbetween means there is no - // drawable, handle this gracefully because presentDrawable does not like - // null arguments. - if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) { - // QTBUG-103415: while the docs suggest the following two approaches are - // equivalent, there is a difference in case a frame is recorded earlier than - // (i.e. not in response to) the next CVDisplayLink callback. Therefore, stick - // with presentDrawable, which gives results identical to OpenGL, and all other - // platforms, i.e. throttles to vsync as expected, meaning constant 15-17 ms with - // a 60 Hz screen, no jumps with smaller intervals, regardless of when the frame - // is submitted by the app) -#if 1 + const bool presentsWithTransaction = swapChainD->d->layer.presentsWithTransaction; + if (!presentsWithTransaction && needsPresent) { + // beginFrame-endFrame without a render pass inbetween means there is no drawable. + if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) [swapChainD->cbWrapper.d->cb presentDrawable: drawable]; -#else - [swapChainD->cbWrapper.d->cb addScheduledHandler:^(id<MTLCommandBuffer>) { - [drawable present]; - }]; -#endif + } + + [swapChainD->cbWrapper.d->cb commit]; + + if (presentsWithTransaction && needsPresent) { + // beginFrame-endFrame without a render pass inbetween means there is no drawable. + if (id<CAMetalDrawable> drawable = swapChainD->d->curDrawable) { + // The layer has presentsWithTransaction set to true to avoid flicker on resizing, + // so here it is important to follow what the Metal docs say when it comes to the + // issuing the present. + [swapChainD->cbWrapper.d->cb waitUntilScheduled]; + [drawable present]; } } @@ -1583,13 +2427,6 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame [swapChainD->d->curDrawable release]; swapChainD->d->curDrawable = nil; - __block int thisFrameSlot = currentFrameSlot; - [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id<MTLCommandBuffer>) { - dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]); - }]; - - [swapChainD->cbWrapper.d->cb commit]; - [d->captureScope endScope]; if (needsPresent) @@ -1605,23 +2442,17 @@ QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi: Q_UNUSED(flags); currentFrameSlot = (currentFrameSlot + 1) % QMTL_FRAMES_IN_FLIGHT; - if (swapchains.count() > 1) { - for (QMetalSwapChain *sc : qAsConst(swapchains)) { - // wait+signal is the general pattern to ensure the commands for a - // given frame slot have completed (if sem is 1, we go 0 then 1; if - // sem is 0 we go -1, block, completion increments to 0, then us to 1) - dispatch_semaphore_t sem = sc->d->sem[currentFrameSlot]; - dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); - dispatch_semaphore_signal(sem); - } - } + + for (QMetalSwapChain *sc : std::as_const(swapchains)) + sc->waitUntilCompleted(currentFrameSlot); d->ofr.active = true; *cb = &d->ofr.cbWrapper; - d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; + d->ofr.cbWrapper.d->cb = d->newCommandBuffer(); executeDeferredReleases(); - d->ofr.cbWrapper.resetState(); + d->ofr.cbWrapper.resetState(d->ofr.lastGpuTime); + d->ofr.lastGpuTime = 0; finishActiveReadbacks(); return QRhi::FrameOpSuccess; @@ -1633,10 +2464,13 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame(QRhi::EndFrameFlags flags) Q_ASSERT(d->ofr.active); d->ofr.active = false; - [d->ofr.cbWrapper.d->cb commit]; + id<MTLCommandBuffer> cb = d->ofr.cbWrapper.d->cb; + [cb commit]; // offscreen frames wait for completion, unlike swapchain ones - [d->ofr.cbWrapper.d->cb waitUntilCompleted]; + [cb waitUntilCompleted]; + + d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime; finishActiveReadbacks(true); @@ -1660,16 +2494,14 @@ QRhi::FrameOpResult QRhiMetal::finish() } } - for (QMetalSwapChain *sc : qAsConst(swapchains)) { + for (QMetalSwapChain *sc : std::as_const(swapchains)) { for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { if (currentSwapChain && sc == currentSwapChain && i == currentFrameSlot) { // no wait as this is the thing we're going to be commit below and // beginFrame decremented sem already and going to be signaled by endFrame continue; } - dispatch_semaphore_t sem = sc->d->sem[i]; - dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); - dispatch_semaphore_signal(sem); + sc->waitUntilCompleted(i); } } @@ -1679,10 +2511,13 @@ QRhi::FrameOpResult QRhiMetal::finish() } if (inFrame) { - if (d->ofr.active) - d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; - else - swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; + if (d->ofr.active) { + d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime; + d->ofr.cbWrapper.d->cb = d->newCommandBuffer(); + } else { + swapChainD->d->lastGpuTime[currentFrameSlot] += cb.GPUEndTime - cb.GPUStartTime; + swapChainD->cbWrapper.d->cb = d->newCommandBuffer(); + } } executeDeferredReleases(true); @@ -1860,6 +2695,15 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates); + id<MTLBlitCommandEncoder> blitEnc = nil; + auto ensureBlit = [&blitEnc, cbD, this]() { + if (!blitEnc) { + blitEnc = [cbD->d->cb blitCommandEncoder]; + if (debugMarkers) + [blitEnc pushDebugGroup: @"Texture upload/copy"]; + } + }; + for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) { const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]); if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) { @@ -1882,25 +2726,33 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate QMetalBuffer *bufD = QRHI_RES(QMetalBuffer, u.buf); executeBufferHostWritesForCurrentFrame(bufD); const int idx = bufD->d->slotted ? currentFrameSlot : 0; - char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]); - if (p) { - u.result->data.resize(u.readSize); - memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize)); + if (bufD->m_type == QRhiBuffer::Dynamic) { + char *p = reinterpret_cast<char *>([bufD->d->buf[idx] contents]); + if (p) { + u.result->data.resize(u.readSize); + memcpy(u.result->data.data(), p + u.offset, size_t(u.readSize)); + } + if (u.result->completed) + u.result->completed(); + } else { + QRhiMetalData::BufferReadback readback; + readback.activeFrameSlot = idx; + readback.buf = bufD->d->buf[idx]; + readback.offset = u.offset; + readback.readSize = u.readSize; + readback.result = u.result; + d->activeBufferReadbacks.append(readback); +#ifdef Q_OS_MACOS + if (bufD->d->managed) { + // On non-Apple Silicon, manually synchronize memory from GPU to CPU + ensureBlit(); + [blitEnc synchronizeResource:readback.buf]; + } +#endif } - if (u.result->completed) - u.result->completed(); } } - id<MTLBlitCommandEncoder> blitEnc = nil; - auto ensureBlit = [&blitEnc, cbD, this] { - if (!blitEnc) { - blitEnc = [cbD->d->cb blitCommandEncoder]; - if (debugMarkers) - [blitEnc pushDebugGroup: @"Texture upload/copy"]; - } - }; - for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) { const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]); if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) { @@ -1908,7 +2760,7 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate qsizetype stagingSize = 0; for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) { for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) { - for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level])) + for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) stagingSize += subresUploadByteSize(subresDesc); } } @@ -1922,7 +2774,7 @@ void QRhiMetal::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate qsizetype curOfs = 0; for (int layer = 0, maxLayer = u.subresDesc.count(); layer < maxLayer; ++layer) { for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) { - for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level])) + for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) enqueueSubresUpload(utexD, mp, blitEnc, layer, level, subresDesc, &curOfs); } } @@ -2035,7 +2887,7 @@ void QRhiMetal::executeBufferHostWritesForSlot(QMetalBuffer *bufD, int slot) void *p = [bufD->d->buf[slot] contents]; quint32 changeBegin = UINT32_MAX; quint32 changeEnd = 0; - for (const QMetalBufferData::BufferUpdate &u : qAsConst(bufD->d->pendingUpdates[slot])) { + for (const QMetalBufferData::BufferUpdate &u : std::as_const(bufD->d->pendingUpdates[slot])) { memcpy(static_cast<char *>(p) + u.offset, u.data.constData(), size_t(u.data.size())); if (u.offset < changeBegin) changeBegin = u.offset; @@ -2111,21 +2963,24 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QMetalTexture, QMetalRenderBuffer>(rtTex->description(), rtD->currentResIdList)) rtTex->create(); cbD->d->currentPassRpDesc = d->createDefaultRenderPass(rtD->dsAttCount, colorClearValue, depthStencilClearValue, rtD->colorAttCount); - if (rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents)) { + if (rtD->fb.preserveColor) { for (uint i = 0; i < uint(rtD->colorAttCount); ++i) cbD->d->currentPassRpDesc.colorAttachments[i].loadAction = MTLLoadActionLoad; } - if (rtD->dsAttCount && rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents)) { + if (rtD->dsAttCount && rtD->fb.preserveDs) { cbD->d->currentPassRpDesc.depthAttachment.loadAction = MTLLoadActionLoad; cbD->d->currentPassRpDesc.stencilAttachment.loadAction = MTLLoadActionLoad; } for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments(); it != itEnd; ++it) { - if (it->texture()) + if (it->texture()) { QRHI_RES(QMetalTexture, it->texture())->lastActiveFrameSlot = currentFrameSlot; - else if (it->renderBuffer()) + if (it->multiViewCount() >= 2) + cbD->d->currentPassRpDesc.renderTargetArrayLength = NSUInteger(it->multiViewCount()); + } else if (it->renderBuffer()) { QRHI_RES(QMetalRenderBuffer, it->renderBuffer())->lastActiveFrameSlot = currentFrameSlot; + } if (it->resolveTexture()) QRHI_RES(QMetalTexture, it->resolveTexture())->lastActiveFrameSlot = currentFrameSlot; } @@ -2133,6 +2988,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, QRHI_RES(QMetalRenderBuffer, rtTex->m_desc.depthStencilBuffer())->lastActiveFrameSlot = currentFrameSlot; if (rtTex->m_desc.depthTexture()) QRHI_RES(QMetalTexture, rtTex->m_desc.depthTexture())->lastActiveFrameSlot = currentFrameSlot; + if (rtTex->m_desc.depthResolveTexture()) + QRHI_RES(QMetalTexture, rtTex->m_desc.depthResolveTexture())->lastActiveFrameSlot = currentFrameSlot; } break; default: @@ -2146,7 +3003,8 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, cbD->d->currentPassRpDesc.colorAttachments[i].depthPlane = NSUInteger(rtD->fb.colorAtt[i].slice); cbD->d->currentPassRpDesc.colorAttachments[i].level = NSUInteger(rtD->fb.colorAtt[i].level); if (rtD->fb.colorAtt[i].resolveTex) { - cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = MTLStoreActionMultisampleResolve; + cbD->d->currentPassRpDesc.colorAttachments[i].storeAction = rtD->fb.preserveColor ? MTLStoreActionStoreAndMultisampleResolve + : MTLStoreActionMultisampleResolve; cbD->d->currentPassRpDesc.colorAttachments[i].resolveTexture = rtD->fb.colorAtt[i].resolveTex; cbD->d->currentPassRpDesc.colorAttachments[i].resolveSlice = NSUInteger(rtD->fb.colorAtt[i].resolveLayer); cbD->d->currentPassRpDesc.colorAttachments[i].resolveLevel = NSUInteger(rtD->fb.colorAtt[i].resolveLevel); @@ -2159,6 +3017,15 @@ void QRhiMetal::beginPass(QRhiCommandBuffer *cb, cbD->d->currentPassRpDesc.stencilAttachment.texture = rtD->fb.hasStencil ? rtD->fb.dsTex : nil; if (rtD->fb.depthNeedsStore) // Depth/Stencil is set to DontCare by default, override if needed cbD->d->currentPassRpDesc.depthAttachment.storeAction = MTLStoreActionStore; + if (rtD->fb.dsResolveTex) { + cbD->d->currentPassRpDesc.depthAttachment.storeAction = rtD->fb.depthNeedsStore ? MTLStoreActionStoreAndMultisampleResolve + : MTLStoreActionMultisampleResolve; + cbD->d->currentPassRpDesc.depthAttachment.resolveTexture = rtD->fb.dsResolveTex; + if (rtD->fb.hasStencil) { + cbD->d->currentPassRpDesc.stencilAttachment.resolveTexture = rtD->fb.dsResolveTex; + cbD->d->currentPassRpDesc.stencilAttachment.storeAction = cbD->d->currentPassRpDesc.depthAttachment.storeAction; + } + } } cbD->d->currentRenderPassEncoder = [cbD->d->cb renderCommandEncoderWithDescriptor: cbD->d->currentPassRpDesc]; @@ -2216,9 +3083,9 @@ void QRhiMetal::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *p Q_ASSERT(cbD->recordingPass == QMetalCommandBuffer::ComputePass); QMetalComputePipeline *psD = QRHI_RES(QMetalComputePipeline, ps); - if (cbD->currentComputePipeline != ps || cbD->currentPipelineGeneration != psD->generation) { + if (cbD->currentComputePipeline != psD || cbD->currentPipelineGeneration != psD->generation) { cbD->currentGraphicsPipeline = nullptr; - cbD->currentComputePipeline = ps; + cbD->currentComputePipeline = psD; cbD->currentPipelineGeneration = psD->generation; [cbD->d->currentComputePassEncoder setComputePipelineState: psD->d->ps]; @@ -2284,8 +3151,12 @@ void QRhiMetal::executeDeferredReleases(bool forced) [e.stagingBuffer.buffer release]; break; case QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline: - [e.graphicsPipeline.depthStencilState release]; [e.graphicsPipeline.pipelineState release]; + [e.graphicsPipeline.depthStencilState release]; + [e.graphicsPipeline.tessVertexComputeState[0] release]; + [e.graphicsPipeline.tessVertexComputeState[1] release]; + [e.graphicsPipeline.tessVertexComputeState[2] release]; + [e.graphicsPipeline.tessTessControlComputeState release]; break; case QRhiMetalData::DeferredReleaseEntry::ComputePipeline: [e.computePipeline.pipelineState release]; @@ -2315,7 +3186,23 @@ void QRhiMetal::finishActiveReadbacks(bool forced) if (readback.result->completed) completedCallbacks.append(readback.result->completed); - d->activeTextureReadbacks.removeLast(); + d->activeTextureReadbacks.remove(i); + } + } + + for (int i = d->activeBufferReadbacks.count() - 1; i >= 0; --i) { + const QRhiMetalData::BufferReadback &readback(d->activeBufferReadbacks[i]); + if (forced || currentFrameSlot == readback.activeFrameSlot + || readback.activeFrameSlot < 0) { + readback.result->data.resize(readback.readSize); + char *p = reinterpret_cast<char *>([readback.buf contents]); + Q_ASSERT(p); + memcpy(readback.result->data.data(), p + readback.offset, size_t(readback.readSize)); + + if (readback.result->completed) + completedCallbacks.append(readback.result->completed); + + d->activeBufferReadbacks.remove(i); } } @@ -2388,6 +3275,9 @@ bool QMetalBuffer::create() // Static maps to on macOS) is not safe when another frame reading from the // same buffer is still in flight. d->slotted = !m_usage.testFlag(QRhiBuffer::StorageBuffer); // except for SSBOs written in the shader + // and a special case for internal work buffers + if (int(m_usage) == WorkBufPoolUsage) + d->slotted = false; for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { if (i == 0 || d->slotted) { @@ -2863,11 +3753,14 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize) if (d->tex) destroy(); - const QSize size = m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize; const bool isCube = m_flags.testFlag(CubeMap); const bool is3D = m_flags.testFlag(ThreeDimensional); const bool isArray = m_flags.testFlag(TextureArray); const bool hasMipMaps = m_flags.testFlag(MipMapped); + const bool is1D = m_flags.testFlag(OneDimensional); + + const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1) + : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize); QRHI_RES_RHI(QRhiMetal); d->format = toMetalTextureFormat(m_format, m_flags, rhiD); @@ -2895,12 +3788,18 @@ bool QMetalTexture::prepareCreate(QSize *adjustedSize) qWarning("Texture cannot be both array and 3D"); return false; } - m_depth = qMax(1, m_depth); + if (is1D && is3D) { + qWarning("Texture cannot be both 1D and 3D"); + return false; + } + if (is1D && isCube) { + qWarning("Texture cannot be both 1D and cube"); + return false; + } if (m_depth > 1 && !is3D) { qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth); return false; } - m_arraySize = qMax(0, m_arraySize); if (m_arraySize > 0 && !isArray) { qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize); return false; @@ -2927,17 +3826,20 @@ bool QMetalTexture::create() const bool isCube = m_flags.testFlag(CubeMap); const bool is3D = m_flags.testFlag(ThreeDimensional); const bool isArray = m_flags.testFlag(TextureArray); + const bool is1D = m_flags.testFlag(OneDimensional); if (isCube) { desc.textureType = MTLTextureTypeCube; } else if (is3D) { desc.textureType = MTLTextureType3D; + } else if (is1D) { + desc.textureType = isArray ? MTLTextureType1DArray : MTLTextureType1D; } else if (isArray) { #ifdef Q_OS_IOS - if (samples > 1) { - // would be available on iOS 14.0+ but cannot test for that with a 13 SDK - qWarning("Multisample 2D texture array is not supported on iOS"); + if (@available(iOS 14, *)) { + desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray; + } else { + desc.textureType = MTLTextureType2DArray; } - desc.textureType = MTLTextureType2DArray; #else desc.textureType = samples > 1 ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray; #endif @@ -2947,12 +3849,12 @@ bool QMetalTexture::create() desc.pixelFormat = d->format; desc.width = NSUInteger(size.width()); desc.height = NSUInteger(size.height()); - desc.depth = is3D ? m_depth : 1; + desc.depth = is3D ? qMax(1, m_depth) : 1; desc.mipmapLevelCount = NSUInteger(mipLevelCount); if (samples > 1) desc.sampleCount = NSUInteger(samples); if (isArray) - desc.arrayLength = NSUInteger(m_arraySize); + desc.arrayLength = NSUInteger(qMax(0, m_arraySize)); desc.resourceOptions = MTLResourceStorageModePrivate; desc.storageMode = MTLStorageModePrivate; desc.usage = MTLTextureUsageShaderRead; @@ -3011,7 +3913,8 @@ id<MTLTexture> QMetalTextureData::viewForLevel(int level) const bool isCube = q->m_flags.testFlag(QRhiTexture::CubeMap); const bool isArray = q->m_flags.testFlag(QRhiTexture::TextureArray); id<MTLTexture> view = [tex newTextureViewWithPixelFormat: format textureType: type - levels: NSMakeRange(NSUInteger(level), 1) slices: NSMakeRange(0, isCube ? 6 : (isArray ? q->m_arraySize : 1))]; + levels: NSMakeRange(NSUInteger(level), 1) + slices: NSMakeRange(0, isCube ? 6 : (isArray ? qMax(0, q->m_arraySize) : 1))]; perLevelViews[level] = view; return view; @@ -3156,7 +4059,9 @@ QMetalRenderPassDescriptor::~QMetalRenderPassDescriptor() void QMetalRenderPassDescriptor::destroy() { - // nothing to do here + QRHI_RES_RHI(QRhiMetal); + if (rhiD) + rhiD->unregisterResource(this); } bool QMetalRenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const @@ -3199,13 +4104,17 @@ void QMetalRenderPassDescriptor::updateSerializedFormat() QRhiRenderPassDescriptor *QMetalRenderPassDescriptor::newCompatibleRenderPassDescriptor() const { - QMetalRenderPassDescriptor *rp = new QMetalRenderPassDescriptor(m_rhi); - rp->colorAttachmentCount = colorAttachmentCount; - rp->hasDepthStencil = hasDepthStencil; - memcpy(rp->colorFormat, colorFormat, sizeof(colorFormat)); - rp->dsFormat = dsFormat; - rp->updateSerializedFormat(); - return rp; + QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi); + rpD->colorAttachmentCount = colorAttachmentCount; + rpD->hasDepthStencil = hasDepthStencil; + memcpy(rpD->colorFormat, colorFormat, sizeof(colorFormat)); + rpD->dsFormat = dsFormat; + + rpD->updateSerializedFormat(); + + QRHI_RES_RHI(QRhiMetal); + rhiD->registerResource(rpD, false); + return rpD; } QVector<quint32> QMetalRenderPassDescriptor::serializedFormat() const @@ -3261,12 +4170,14 @@ QMetalTextureRenderTarget::~QMetalTextureRenderTarget() void QMetalTextureRenderTarget::destroy() { - // nothing to do here + QRHI_RES_RHI(QRhiMetal); + if (rhiD) + rhiD->unregisterResource(this); } QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDescriptor() { - const int colorAttachmentCount = m_desc.cendColorAttachments() - m_desc.cbeginColorAttachments(); + const int colorAttachmentCount = int(m_desc.colorAttachmentCount()); QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi); rpD->colorAttachmentCount = colorAttachmentCount; rpD->hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture(); @@ -3284,14 +4195,16 @@ QRhiRenderPassDescriptor *QMetalTextureRenderTarget::newCompatibleRenderPassDesc rpD->dsFormat = int(QRHI_RES(QMetalRenderBuffer, m_desc.depthStencilBuffer())->d->format); rpD->updateSerializedFormat(); + + QRHI_RES_RHI(QRhiMetal); + rhiD->registerResource(rpD, false); return rpD; } bool QMetalTextureRenderTarget::create() { QRHI_RES_RHI(QRhiMetal); - const bool hasColorAttachments = m_desc.cbeginColorAttachments() != m_desc.cendColorAttachments(); - Q_ASSERT(hasColorAttachments || m_desc.depthTexture()); + Q_ASSERT(m_desc.colorAttachmentCount() > 0 || m_desc.depthTexture()); Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture()); const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture(); @@ -3335,8 +4248,9 @@ bool QMetalTextureRenderTarget::create() if (m_desc.depthTexture()) { QMetalTexture *depthTexD = QRHI_RES(QMetalTexture, m_desc.depthTexture()); d->fb.dsTex = depthTexD->d->tex; - d->fb.hasStencil = false; - d->fb.depthNeedsStore = true; + d->fb.hasStencil = rhiD->isStencilSupportingFormat(depthTexD->format()); + d->fb.depthNeedsStore = !m_flags.testFlag(DoNotStoreDepthStencilContents) && !m_desc.depthResolveTexture(); + d->fb.preserveDs = m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents); if (d->colorAttCount == 0) { d->pixelSize = depthTexD->pixelSize(); d->sampleCount = depthTexD->samples; @@ -3346,18 +4260,27 @@ bool QMetalTextureRenderTarget::create() d->fb.dsTex = depthRbD->d->tex; d->fb.hasStencil = true; d->fb.depthNeedsStore = false; + d->fb.preserveDs = false; if (d->colorAttCount == 0) { d->pixelSize = depthRbD->pixelSize(); d->sampleCount = depthRbD->samples; } } + if (m_desc.depthResolveTexture()) { + QMetalTexture *depthResolveTexD = QRHI_RES(QMetalTexture, m_desc.depthResolveTexture()); + d->fb.dsResolveTex = depthResolveTexD->d->tex; + } d->dsAttCount = 1; } else { d->dsAttCount = 0; } + if (d->colorAttCount > 0) + d->fb.preserveColor = m_flags.testFlag(QRhiTextureRenderTarget::PreserveColorContents); + QRhiRenderTargetAttachmentTracker::updateResIdList<QMetalTexture, QMetalRenderBuffer>(m_desc, &d->currentResIdList); + rhiD->registerResource(this, false); return true; } @@ -3393,6 +4316,10 @@ void QMetalShaderResourceBindings::destroy() { sortedBindings.clear(); maxBinding = -1; + + QRHI_RES_RHI(QRhiMetal); + if (rhiD) + rhiD->unregisterResource(this); } bool QMetalShaderResourceBindings::create() @@ -3407,13 +4334,9 @@ bool QMetalShaderResourceBindings::create() rhiD->updateLayoutDesc(this); std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings)); - std::sort(sortedBindings.begin(), sortedBindings.end(), - [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b) - { - return a.data()->binding < b.data()->binding; - }); + std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan); if (!sortedBindings.isEmpty()) - maxBinding = sortedBindings.last().data()->binding; + maxBinding = QRhiImplementation::shaderResourceBindingData(sortedBindings.last())->binding; else maxBinding = -1; @@ -3423,6 +4346,7 @@ bool QMetalShaderResourceBindings::create() memset(&bd, 0, sizeof(BoundResourceData)); generation += 1; + rhiD->registerResource(this, false); return true; } @@ -3430,13 +4354,8 @@ void QMetalShaderResourceBindings::updateResources(UpdateFlags flags) { sortedBindings.clear(); std::copy(m_bindings.cbegin(), m_bindings.cend(), std::back_inserter(sortedBindings)); - if (!flags.testFlag(BindingsAreSorted)) { - std::sort(sortedBindings.begin(), sortedBindings.end(), - [](const QRhiShaderResourceBinding &a, const QRhiShaderResourceBinding &b) - { - return a.data()->binding < b.data()->binding; - }); - } + if (!flags.testFlag(BindingsAreSorted)) + std::sort(sortedBindings.begin(), sortedBindings.end(), QRhiImplementation::sortedBindingLessThan); for (BoundResourceData &bd : boundResourceData) memset(&bd, 0, sizeof(BoundResourceData)); @@ -3448,6 +4367,8 @@ QMetalGraphicsPipeline::QMetalGraphicsPipeline(QRhiImplementation *rhi) : QRhiGraphicsPipeline(rhi), d(new QMetalGraphicsPipelineData) { + d->q = this; + d->tess.q = d; } QMetalGraphicsPipeline::~QMetalGraphicsPipeline() @@ -3461,16 +4382,39 @@ void QMetalGraphicsPipeline::destroy() d->vs.destroy(); d->fs.destroy(); - if (!d->ps) + d->tess.compVs[0].destroy(); + d->tess.compVs[1].destroy(); + d->tess.compVs[2].destroy(); + + d->tess.compTesc.destroy(); + d->tess.vertTese.destroy(); + + qDeleteAll(d->extraBufMgr.deviceLocalWorkBuffers); + d->extraBufMgr.deviceLocalWorkBuffers.clear(); + qDeleteAll(d->extraBufMgr.hostVisibleWorkBuffers); + d->extraBufMgr.hostVisibleWorkBuffers.clear(); + + delete d->bufferSizeBuffer; + d->bufferSizeBuffer = nullptr; + + if (!d->ps && !d->ds + && !d->tess.vertexComputeState[0] && !d->tess.vertexComputeState[1] && !d->tess.vertexComputeState[2] + && !d->tess.tessControlComputeState) + { return; + } QRhiMetalData::DeferredReleaseEntry e; e.type = QRhiMetalData::DeferredReleaseEntry::GraphicsPipeline; e.lastActiveFrameSlot = lastActiveFrameSlot; - e.graphicsPipeline.depthStencilState = d->ds; e.graphicsPipeline.pipelineState = d->ps; - d->ds = nil; + e.graphicsPipeline.depthStencilState = d->ds; + e.graphicsPipeline.tessVertexComputeState = d->tess.vertexComputeState; + e.graphicsPipeline.tessTessControlComputeState = d->tess.tessControlComputeState; d->ps = nil; + d->ds = nil; + d->tess.vertexComputeState = {}; + d->tess.tessControlComputeState = nil; QRHI_RES_RHI(QRhiMetal); if (rhiD) { @@ -3512,6 +4456,30 @@ static inline MTLVertexFormat toMetalAttributeFormat(QRhiVertexInputAttribute::F return MTLVertexFormatInt2; case QRhiVertexInputAttribute::SInt: return MTLVertexFormatInt; + case QRhiVertexInputAttribute::Half4: + return MTLVertexFormatHalf4; + case QRhiVertexInputAttribute::Half3: + return MTLVertexFormatHalf3; + case QRhiVertexInputAttribute::Half2: + return MTLVertexFormatHalf2; + case QRhiVertexInputAttribute::Half: + return MTLVertexFormatHalf; + case QRhiVertexInputAttribute::UShort4: + return MTLVertexFormatUShort4; + case QRhiVertexInputAttribute::UShort3: + return MTLVertexFormatUShort3; + case QRhiVertexInputAttribute::UShort2: + return MTLVertexFormatUShort2; + case QRhiVertexInputAttribute::UShort: + return MTLVertexFormatUShort; + case QRhiVertexInputAttribute::SShort4: + return MTLVertexFormatShort4; + case QRhiVertexInputAttribute::SShort3: + return MTLVertexFormatShort3; + case QRhiVertexInputAttribute::SShort2: + return MTLVertexFormatShort2; + case QRhiVertexInputAttribute::SShort: + return MTLVertexFormatShort; default: Q_UNREACHABLE(); return MTLVertexFormatFloat4; @@ -3667,6 +4635,24 @@ static inline MTLPrimitiveType toMetalPrimitiveType(QRhiGraphicsPipeline::Topolo } } +static inline MTLPrimitiveTopologyClass toMetalPrimitiveTopologyClass(QRhiGraphicsPipeline::Topology t) +{ + switch (t) { + case QRhiGraphicsPipeline::Triangles: + case QRhiGraphicsPipeline::TriangleStrip: + case QRhiGraphicsPipeline::TriangleFan: + return MTLPrimitiveTopologyClassTriangle; + case QRhiGraphicsPipeline::Lines: + case QRhiGraphicsPipeline::LineStrip: + return MTLPrimitiveTopologyClassLine; + case QRhiGraphicsPipeline::Points: + return MTLPrimitiveTopologyClassPoint; + default: + Q_UNREACHABLE(); + return MTLPrimitiveTopologyClassTriangle; + } +} + static inline MTLCullMode toMetalCullMode(QRhiGraphicsPipeline::CullMode c) { switch (c) { @@ -3695,15 +4681,67 @@ static inline MTLTriangleFillMode toMetalTriangleFillMode(QRhiGraphicsPipeline:: } } +static inline MTLWinding toMetalTessellationWindingOrder(QShaderDescription::TessellationWindingOrder w) +{ + switch (w) { + case QShaderDescription::CwTessellationWindingOrder: + return MTLWindingClockwise; + case QShaderDescription::CcwTessellationWindingOrder: + return MTLWindingCounterClockwise; + default: + // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown + return MTLWindingCounterClockwise; + } +} + +static inline MTLTessellationPartitionMode toMetalTessellationPartitionMode(QShaderDescription::TessellationPartitioning p) +{ + switch (p) { + case QShaderDescription::EqualTessellationPartitioning: + return MTLTessellationPartitionModePow2; + case QShaderDescription::FractionalEvenTessellationPartitioning: + return MTLTessellationPartitionModeFractionalEven; + case QShaderDescription::FractionalOddTessellationPartitioning: + return MTLTessellationPartitionModeFractionalOdd; + default: + // this is reachable, consider a tess.eval. shader not declaring it, the value is then Unknown + return MTLTessellationPartitionModePow2; + } +} + +static inline MTLLanguageVersion toMetalLanguageVersion(const QShaderVersion &version) +{ + int v = version.version(); + return MTLLanguageVersion(((v / 10) << 16) + (v % 10)); +} + id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Variant shaderVariant, QString *error, QByteArray *entryPoint, QShaderKey *activeKey) { - QShaderKey key = { QShader::MetalLibShader, 20, shaderVariant }; - QShaderCode mtllib = shader.shader(key); - if (mtllib.shader().isEmpty()) { - key.setSourceVersion(12); - mtllib = shader.shader(key); + QVarLengthArray<int, 8> versions; + if (@available(macOS 13, iOS 16, *)) + versions << 30; + if (@available(macOS 12, iOS 15, *)) + versions << 24; + if (@available(macOS 11, iOS 14, *)) + versions << 23; + if (@available(macOS 10.15, iOS 13, *)) + versions << 22; + if (@available(macOS 10.14, iOS 12, *)) + versions << 21; + versions << 20 << 12; + + const QList<QShaderKey> shaders = shader.availableShaders(); + + QShaderKey key; + + for (const int &version : versions) { + key = { QShader::Source::MetalLibShader, version, shaderVariant }; + if (shaders.contains(key)) + break; } + + QShaderCode mtllib = shader.shader(key); if (!mtllib.shader().isEmpty()) { dispatch_data_t data = dispatch_data_create(mtllib.shader().constData(), size_t(mtllib.shader().size()), @@ -3722,12 +4760,13 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var } } - key = { QShader::MslShader, 20, shaderVariant }; - QShaderCode mslSource = shader.shader(key); - if (mslSource.shader().isEmpty()) { - key.setSourceVersion(12); - mslSource = shader.shader(key); + for (const int &version : versions) { + key = { QShader::Source::MslShader, version, shaderVariant }; + if (shaders.contains(key)) + break; } + + QShaderCode mslSource = shader.shader(key); if (mslSource.shader().isEmpty()) { qWarning() << "No MSL 2.0 or 1.2 code found in baked shader" << shader; return nil; @@ -3735,7 +4774,7 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var NSString *src = [NSString stringWithUTF8String: mslSource.shader().constData()]; MTLCompileOptions *opts = [[MTLCompileOptions alloc] init]; - opts.languageVersion = key.sourceVersion() == 20 ? MTLLanguageVersion2_0 : MTLLanguageVersion1_2; + opts.languageVersion = toMetalLanguageVersion(key.sourceVersion()); NSError *err = nil; id<MTLLibrary> lib = [dev newLibraryWithSource: src options: opts error: &err]; [opts release]; @@ -3757,56 +4796,192 @@ id<MTLLibrary> QRhiMetalData::createMetalLib(const QShader &shader, QShader::Var id<MTLFunction> QRhiMetalData::createMSLShaderFunction(id<MTLLibrary> lib, const QByteArray &entryPoint) { - NSString *name = [NSString stringWithUTF8String: entryPoint.constData()]; - id<MTLFunction> f = [lib newFunctionWithName: name]; - [name release]; - return f; + return [lib newFunctionWithName:[NSString stringWithUTF8String:entryPoint.constData()]]; } -bool QMetalGraphicsPipeline::create() +void QMetalGraphicsPipeline::setupAttachmentsInMetalRenderPassDescriptor(void *metalRpDesc, QMetalRenderPassDescriptor *rpD) { - if (d->ps) - destroy(); + MTLRenderPipelineDescriptor *rpDesc = reinterpret_cast<MTLRenderPipelineDescriptor *>(metalRpDesc); + + if (rpD->colorAttachmentCount) { + // defaults when no targetBlends are provided + rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]); + rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll; + rpDesc.colorAttachments[0].blendingEnabled = false; + + Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount + || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1)); + + for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) { + const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]); + rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]); + rpDesc.colorAttachments[i].blendingEnabled = b.enable; + rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor); + rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor); + rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor); + rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha); + rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha); + rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha); + rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite); + } + } + + if (rpD->hasDepthStencil) { + // Must only be set when a depth-stencil buffer will actually be bound, + // validation blows up otherwise. + MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat); + rpDesc.depthAttachmentPixelFormat = fmt; +#if defined(Q_OS_MACOS) + if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float) +#else + if (fmt != MTLPixelFormatDepth32Float) +#endif + rpDesc.stencilAttachmentPixelFormat = fmt; + } QRHI_RES_RHI(QRhiMetal); - rhiD->pipelineCreationStart(); - if (!rhiD->sanityCheckGraphicsPipeline(this)) - return false; + rpDesc.sampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount)); +} +void QMetalGraphicsPipeline::setupMetalDepthStencilDescriptor(void *metalDsDesc) +{ + MTLDepthStencilDescriptor *dsDesc = reinterpret_cast<MTLDepthStencilDescriptor *>(metalDsDesc); + + dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways; + dsDesc.depthWriteEnabled = m_depthWrite; + if (m_stencilTest) { + dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init]; + dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp); + dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp); + dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp); + dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp); + dsDesc.frontFaceStencil.readMask = m_stencilReadMask; + dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask; + + dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init]; + dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp); + dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp); + dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp); + dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp); + dsDesc.backFaceStencil.readMask = m_stencilReadMask; + dsDesc.backFaceStencil.writeMask = m_stencilWriteMask; + } +} + +void QMetalGraphicsPipeline::mapStates() +{ + d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise; + d->cullMode = toMetalCullMode(m_cullMode); + d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode); + d->depthBias = float(m_depthBias); + d->slopeScaledDepthBias = m_slopeScaledDepthBias; +} + +void QMetalGraphicsPipelineData::setupVertexInputDescriptor(MTLVertexDescriptor *desc) +{ // same binding space for vertex and constant buffers - work it around - const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, m_shaderResourceBindings)->maxBinding + 1; + // should be in native resource binding not SPIR-V, but this will work anyway + const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1; - MTLVertexDescriptor *inputLayout = [MTLVertexDescriptor vertexDescriptor]; - for (auto it = m_vertexInputLayout.cbeginAttributes(), itEnd = m_vertexInputLayout.cendAttributes(); + QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout(); + for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes(); it != itEnd; ++it) { const uint loc = uint(it->location()); - inputLayout.attributes[loc].format = toMetalAttributeFormat(it->format()); - inputLayout.attributes[loc].offset = NSUInteger(it->offset()); - inputLayout.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding()); + desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format())); + desc.attributes[loc].offset = NSUInteger(it->offset()); + desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding()); } int bindingIndex = 0; - for (auto it = m_vertexInputLayout.cbeginBindings(), itEnd = m_vertexInputLayout.cendBindings(); + const NSUInteger viewCount = qMax<NSUInteger>(1, q->multiViewCount()); + for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings(); it != itEnd; ++it, ++bindingIndex) { const uint layoutIdx = uint(firstVertexBinding + bindingIndex); - inputLayout.layouts[layoutIdx].stepFunction = - it->classification() == QRhiVertexInputBinding::PerInstance - ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex; - inputLayout.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate()); - inputLayout.layouts[layoutIdx].stride = it->stride(); + desc.layouts[layoutIdx].stepFunction = + it->classification() == QRhiVertexInputBinding::PerInstance + ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex; + desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate()); + if (desc.layouts[layoutIdx].stepFunction == MTLVertexStepFunctionPerInstance) + desc.layouts[layoutIdx].stepRate *= viewCount; + desc.layouts[layoutIdx].stride = it->stride(); } +} - MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init]; +void QMetalGraphicsPipelineData::setupStageInputDescriptor(MTLStageInputOutputDescriptor *desc) +{ + // same binding space for vertex and constant buffers - work it around + // should be in native resource binding not SPIR-V, but this will work anyway + const int firstVertexBinding = QRHI_RES(QMetalShaderResourceBindings, q->shaderResourceBindings())->maxBinding + 1; + + QRhiVertexInputLayout vertexInputLayout = q->vertexInputLayout(); + for (auto it = vertexInputLayout.cbeginAttributes(), itEnd = vertexInputLayout.cendAttributes(); + it != itEnd; ++it) + { + const uint loc = uint(it->location()); + desc.attributes[loc].format = decltype(desc.attributes[loc].format)(toMetalAttributeFormat(it->format())); + desc.attributes[loc].offset = NSUInteger(it->offset()); + desc.attributes[loc].bufferIndex = NSUInteger(firstVertexBinding + it->binding()); + } + int bindingIndex = 0; + for (auto it = vertexInputLayout.cbeginBindings(), itEnd = vertexInputLayout.cendBindings(); + it != itEnd; ++it, ++bindingIndex) + { + const uint layoutIdx = uint(firstVertexBinding + bindingIndex); + if (desc.indexBufferIndex) { + desc.layouts[layoutIdx].stepFunction = + it->classification() == QRhiVertexInputBinding::PerInstance + ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridXIndexed; + } else { + desc.layouts[layoutIdx].stepFunction = + it->classification() == QRhiVertexInputBinding::PerInstance + ? MTLStepFunctionThreadPositionInGridY : MTLStepFunctionThreadPositionInGridX; + } + desc.layouts[layoutIdx].stepRate = NSUInteger(it->instanceStepRate()); + desc.layouts[layoutIdx].stride = it->stride(); + } +} + +void QRhiMetalData::trySeedingRenderPipelineFromBinaryArchive(MTLRenderPipelineDescriptor *rpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil]; + rpDesc.binaryArchives = binArchArray; + } + } +} + +void QRhiMetalData::addRenderPipelineToBinaryArchive(MTLRenderPipelineDescriptor *rpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSError *err = nil; + if (![binArch addRenderPipelineFunctionsWithDescriptor: rpDesc error: &err]) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to collect render pipeline functions to binary archive: %s", qPrintable(msg)); + } + } + } +} - rpDesc.vertexDescriptor = inputLayout; +bool QMetalGraphicsPipeline::createVertexFragmentPipeline() +{ + QRHI_RES_RHI(QRhiMetal); + + MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor]; + d->setupVertexInputDescriptor(vertexDesc); + + MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init]; + rpDesc.vertexDescriptor = vertexDesc; - // mutability cannot be determined (slotted buffers could be set as + // Mutability cannot be determined (slotted buffers could be set as // MTLMutabilityImmutable, but then we potentially need a different // descriptor for each buffer combination as this depends on the actual - // buffers not just the resource binding layout) so leave it at the default + // buffers not just the resource binding layout), so leave + // rpDesc.vertex/fragmentBuffers at the defaults. - for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) { + for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { auto cacheIt = rhiD->d->shaderCache.constFind(shaderStage); if (cacheIt != rhiD->d->shaderCache.constEnd()) { switch (shaderStage.type()) { @@ -3853,6 +5028,8 @@ bool QMetalGraphicsPipeline::create() d->vs.lib = lib; d->vs.func = func; d->vs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->vs.desc = shader.description(); + d->vs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); rhiD->d->shaderCache.insert(shaderStage, d->vs); [d->vs.lib retain]; [d->vs.func retain]; @@ -3862,6 +5039,8 @@ bool QMetalGraphicsPipeline::create() d->fs.lib = lib; d->fs.func = func; d->fs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->fs.desc = shader.description(); + d->fs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); rhiD->d->shaderCache.insert(shaderStage, d->fs); [d->fs.lib retain]; [d->fs.func retain]; @@ -3876,85 +5055,836 @@ bool QMetalGraphicsPipeline::create() } QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, m_renderPassDesc); + setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD); - if (rpD->colorAttachmentCount) { - // defaults when no targetBlends are provided - rpDesc.colorAttachments[0].pixelFormat = MTLPixelFormat(rpD->colorFormat[0]); - rpDesc.colorAttachments[0].writeMask = MTLColorWriteMaskAll; - rpDesc.colorAttachments[0].blendingEnabled = false; + if (m_multiViewCount >= 2) + rpDesc.inputPrimitiveTopology = toMetalPrimitiveTopologyClass(m_topology); - Q_ASSERT(m_targetBlends.count() == rpD->colorAttachmentCount - || (m_targetBlends.isEmpty() && rpD->colorAttachmentCount == 1)); + rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc); - for (uint i = 0, ie = uint(m_targetBlends.count()); i != ie; ++i) { - const QRhiGraphicsPipeline::TargetBlend &b(m_targetBlends[int(i)]); - rpDesc.colorAttachments[i].pixelFormat = MTLPixelFormat(rpD->colorFormat[i]); - rpDesc.colorAttachments[i].blendingEnabled = b.enable; - rpDesc.colorAttachments[i].sourceRGBBlendFactor = toMetalBlendFactor(b.srcColor); - rpDesc.colorAttachments[i].destinationRGBBlendFactor = toMetalBlendFactor(b.dstColor); - rpDesc.colorAttachments[i].rgbBlendOperation = toMetalBlendOp(b.opColor); - rpDesc.colorAttachments[i].sourceAlphaBlendFactor = toMetalBlendFactor(b.srcAlpha); - rpDesc.colorAttachments[i].destinationAlphaBlendFactor = toMetalBlendFactor(b.dstAlpha); - rpDesc.colorAttachments[i].alphaBlendOperation = toMetalBlendOp(b.opAlpha); - rpDesc.colorAttachments[i].writeMask = toMetalColorWriteMask(b.colorWrite); + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addRenderPipelineToBinaryArchive(rpDesc); + + NSError *err = nil; + d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err]; + [rpDesc release]; + if (!d->ps) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to create render pipeline state: %s", qPrintable(msg)); + return false; + } + + MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init]; + setupMetalDepthStencilDescriptor(dsDesc); + d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc]; + [dsDesc release]; + + d->primitiveType = toMetalPrimitiveType(m_topology); + mapStates(); + + return true; +} + +int QMetalGraphicsPipelineData::Tessellation::vsCompVariantToIndex(QShader::Variant vertexCompVariant) +{ + switch (vertexCompVariant) { + case QShader::NonIndexedVertexAsComputeShader: + return 0; + case QShader::UInt32IndexedVertexAsComputeShader: + return 1; + case QShader::UInt16IndexedVertexAsComputeShader: + return 2; + default: + break; + } + return -1; +} + +id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::vsCompPipeline(QRhiMetal *rhiD, QShader::Variant vertexCompVariant) +{ + const int varIndex = vsCompVariantToIndex(vertexCompVariant); + if (varIndex >= 0 && vertexComputeState[varIndex]) + return vertexComputeState[varIndex]; + + id<MTLFunction> func = nil; + if (varIndex >= 0) + func = compVs[varIndex].func; + + if (!func) { + qWarning("No compute function found for vertex shader translated for tessellation, this should not happen"); + return nil; + } + + const QMap<int, int> &ebb(compVs[varIndex].nativeShaderInfo.extraBufferBindings); + const int indexBufferBinding = ebb.value(QShaderPrivate::MslTessVertIndicesBufferBinding, -1); + + MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new]; + cpDesc.computeFunction = func; + cpDesc.threadGroupSizeIsMultipleOfThreadExecutionWidth = YES; + cpDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor]; + if (indexBufferBinding >= 0) { + if (vertexCompVariant == QShader::UInt32IndexedVertexAsComputeShader) { + cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32; + cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding; + } else if (vertexCompVariant == QShader::UInt16IndexedVertexAsComputeShader) { + cpDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16; + cpDesc.stageInputDescriptor.indexBufferIndex = indexBufferBinding; } } + q->setupStageInputDescriptor(cpDesc.stageInputDescriptor); - if (rpD->hasDepthStencil) { - // Must only be set when a depth-stencil buffer will actually be bound, - // validation blows up otherwise. - MTLPixelFormat fmt = MTLPixelFormat(rpD->dsFormat); - rpDesc.depthAttachmentPixelFormat = fmt; -#if defined(Q_OS_MACOS) - if (fmt != MTLPixelFormatDepth16Unorm && fmt != MTLPixelFormatDepth32Float) -#else - if (fmt != MTLPixelFormatDepth32Float) -#endif - rpDesc.stencilAttachmentPixelFormat = fmt; + rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addComputePipelineToBinaryArchive(cpDesc); + + NSError *err = nil; + id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc + options: MTLPipelineOptionNone + reflection: nil + error: &err]; + [cpDesc release]; + if (!ps) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to create compute pipeline state: %s", qPrintable(msg)); + } else { + vertexComputeState[varIndex] = ps; } + // not retained, the only owner is vertexComputeState and so the QRhiGraphicsPipeline + return ps; +} - rpDesc.sampleCount = NSUInteger(rhiD->effectiveSampleCount(m_sampleCount)); +id<MTLComputePipelineState> QMetalGraphicsPipelineData::Tessellation::tescCompPipeline(QRhiMetal *rhiD) +{ + if (tessControlComputeState) + return tessControlComputeState; + + MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new]; + cpDesc.computeFunction = compTesc.func; + + rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addComputePipelineToBinaryArchive(cpDesc); NSError *err = nil; - d->ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err]; - if (!d->ps) { + id<MTLComputePipelineState> ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc + options: MTLPipelineOptionNone + reflection: nil + error: &err]; + [cpDesc release]; + if (!ps) { const QString msg = QString::fromNSString(err.localizedDescription); - qWarning("Failed to create render pipeline state: %s", qPrintable(msg)); - [rpDesc release]; - return false; + qWarning("Failed to create compute pipeline state: %s", qPrintable(msg)); + } else { + tessControlComputeState = ps; + } + // not retained, the only owner is tessControlComputeState and so the QRhiGraphicsPipeline + return ps; +} + +static inline bool indexTaken(quint32 index, quint64 indices) +{ + return (indices >> index) & 0x1; +} + +static inline void takeIndex(quint32 index, quint64 &indices) +{ + indices |= 1 << index; +} + +static inline int nextAttributeIndex(quint64 indices) +{ + // Maximum number of vertex attributes per vertex descriptor. There does + // not appear to be a way to query this from the implementation. + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf indicates + // that all GPU families have a value of 31. + static const int maxVertexAttributes = 31; + + for (int index = 0; index < maxVertexAttributes; ++index) { + if (!indexTaken(index, indices)) + return index; + } + + Q_UNREACHABLE_RETURN(-1); +} + +static inline int aligned(quint32 offset, quint32 alignment) +{ + return ((offset + alignment - 1) / alignment) * alignment; +} + +template<typename T> +static void addUnusedVertexAttribute(const T &variable, QRhiMetal *rhiD, quint32 &offset, quint32 &vertexAlignment) +{ + + int elements = 1; + for (const int dim : variable.arrayDims) + elements *= dim; + + if (variable.type == QShaderDescription::VariableType::Struct) { + for (int element = 0; element < elements; ++element) { + for (const auto &member : variable.structMembers) { + addUnusedVertexAttribute(member, rhiD, offset, vertexAlignment); + } + } + } else { + const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type); + const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format); + + // MSL specification 3.0 says alignment = size for non packed scalars and vectors + const quint32 alignment = size; + vertexAlignment = std::max(vertexAlignment, alignment); + + for (int element = 0; element < elements; ++element) { + // adjust alignment + offset = aligned(offset, alignment); + offset += size; + } } +} + +template<typename T> +static void addVertexAttribute(const T &variable, int binding, QRhiMetal *rhiD, int &index, quint32 &offset, MTLVertexAttributeDescriptorArray *attributes, quint64 &indices, quint32 &vertexAlignment) +{ + + int elements = 1; + for (const int dim : variable.arrayDims) + elements *= dim; + + if (variable.type == QShaderDescription::VariableType::Struct) { + for (int element = 0; element < elements; ++element) { + for (const auto &member : variable.structMembers) { + addVertexAttribute(member, binding, rhiD, index, offset, attributes, indices, vertexAlignment); + } + } + } else { + const QRhiVertexInputAttribute::Format format = rhiD->shaderDescVariableFormatToVertexInputFormat(variable.type); + const quint32 size = rhiD->byteSizePerVertexForVertexInputFormat(format); + + // MSL specification 3.0 says alignment = size for non packed scalars and vectors + const quint32 alignment = size; + vertexAlignment = std::max(vertexAlignment, alignment); + + for (int element = 0; element < elements; ++element) { + Q_ASSERT(!indexTaken(index, indices)); + + // adjust alignment + offset = aligned(offset, alignment); + + attributes[index].bufferIndex = binding; + attributes[index].format = toMetalAttributeFormat(format); + attributes[index].offset = offset; + + takeIndex(index, indices); + index++; + if (indexTaken(index, indices)) + index = nextAttributeIndex(indices); + + offset += size; + } + } +} + +static inline bool matches(const QList<QShaderDescription::BlockVariable> &a, const QList<QShaderDescription::BlockVariable> &b) +{ + if (a.size() == b.size()) { + bool match = true; + for (int i = 0; i < a.size() && match; ++i) { + match &= a[i].type == b[i].type + && a[i].arrayDims == b[i].arrayDims + && matches(a[i].structMembers, b[i].structMembers); + } + return match; + } + + return false; +} + +static inline bool matches(const QShaderDescription::InOutVariable &a, const QShaderDescription::InOutVariable &b) +{ + return a.location == b.location + && a.type == b.type + && a.perPatch == b.perPatch + && matches(a.structMembers, b.structMembers); +} + +// +// Create the tessellation evaluation render pipeline state +// +// The tesc runs as a compute shader in a compute pipeline and writes per patch and per patch +// control point data into separate storage buffers. The tese runs as a vertex shader in a render +// pipeline. Our task is to generate a render pipeline descriptor for the tese that pulls vertices +// from these buffers. +// +// As the buffers we are pulling vertices from are written by a compute pipeline, they follow the +// MSL alignment conventions which we must take into account when generating our +// MTLVertexDescriptor. We must include the user defined tese input attributes, and any builtins +// that were used. +// +// SPIRV-Cross generates the MSL tese shader code with input attribute indices that reflect the +// specified GLSL locations. Interface blocks are flattened with each member having an incremented +// attribute index. SPIRV-Cross reports an error on compilation if there are clashes in the index +// address space. +// +// After the user specified attributes are processed, SPIRV-Cross places the in-use builtins at the +// next available (lowest value) attribute index. Tese builtins are processed in the following +// order: +// +// in gl_PerVertex +// { +// vec4 gl_Position; +// float gl_PointSize; +// float gl_ClipDistance[]; +// }; +// +// patch in float gl_TessLevelOuter[4]; +// patch in float gl_TessLevelInner[2]; +// +// Enumerations in QShaderDescription::BuiltinType are defined in this order. +// +// For quads, SPIRV-Cross places MTLQuadTessellationFactorsHalf per patch in the tessellation +// factor buffer. For triangles it uses MTLTriangleTessellationFactorsHalf. +// +// It should be noted that SPIRV-Cross handles the following builtin inputs internally, with no +// host side support required. +// +// in vec3 gl_TessCoord; +// in int gl_PatchVerticesIn; +// in int gl_PrimitiveID; +// +id<MTLRenderPipelineState> QMetalGraphicsPipelineData::Tessellation::teseFragRenderPipeline(QRhiMetal *rhiD, QMetalGraphicsPipeline *pipeline) +{ + if (pipeline->d->ps) + return pipeline->d->ps; + + MTLRenderPipelineDescriptor *rpDesc = [[MTLRenderPipelineDescriptor alloc] init]; + MTLVertexDescriptor *vertexDesc = [MTLVertexDescriptor vertexDescriptor]; + + // tesc output buffers + const QMap<int, int> &ebb(compTesc.nativeShaderInfo.extraBufferBindings); + const int tescOutputBufferBinding = ebb.value(QShaderPrivate::MslTessVertTescOutputBufferBinding, -1); + const int tescPatchOutputBufferBinding = ebb.value(QShaderPrivate::MslTessTescPatchOutputBufferBinding, -1); + const int tessFactorBufferBinding = ebb.value(QShaderPrivate::MslTessTescTessLevelBufferBinding, -1); + quint32 offsetInTescOutput = 0; + quint32 offsetInTescPatchOutput = 0; + quint32 offsetInTessFactorBuffer = 0; + quint32 tescOutputAlignment = 0; + quint32 tescPatchOutputAlignment = 0; + quint32 tessFactorAlignment = 0; + QSet<int> usedBuffers; + + // tesc output variables in ascending location order + QMap<int, QShaderDescription::InOutVariable> tescOutVars; + for (const auto &tescOutVar : compTesc.desc.outputVariables()) + tescOutVars[tescOutVar.location] = tescOutVar; + + // tese input variables in ascending location order + QMap<int, QShaderDescription::InOutVariable> teseInVars; + for (const auto &teseInVar : vertTese.desc.inputVariables()) + teseInVars[teseInVar.location] = teseInVar; + + // bit mask tracking usage of vertex attribute indices + quint64 indices = 0; + + for (QShaderDescription::InOutVariable &tescOutVar : tescOutVars) { + + int index = tescOutVar.location; + int binding = -1; + quint32 *offset = nullptr; + quint32 *alignment = nullptr; + + if (tescOutVar.perPatch) { + binding = tescPatchOutputBufferBinding; + offset = &offsetInTescPatchOutput; + alignment = &tescPatchOutputAlignment; + } else { + tescOutVar.arrayDims.removeLast(); + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + } + + if (teseInVars.contains(index)) { + + if (!matches(teseInVars[index], tescOutVar)) { + qWarning() << "mismatched tessellation control output -> tesssellation evaluation input at location" << index; + qWarning() << " tesc out:" << tescOutVar; + qWarning() << " tese in:" << teseInVars[index]; + } + + if (binding != -1) { + addVertexAttribute(tescOutVar, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment); + usedBuffers << binding; + } else { + qWarning() << "baked tessellation control shader missing output buffer binding information"; + addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment); + } + + } else { + qWarning() << "missing tessellation evaluation input for tessellation control output:" << tescOutVar; + addUnusedVertexAttribute(tescOutVar, rhiD, *offset, *alignment); + } + + teseInVars.remove(tescOutVar.location); + } + + for (const QShaderDescription::InOutVariable &teseInVar : teseInVars) + qWarning() << "missing tessellation control output for tessellation evaluation input:" << teseInVar; + + // tesc output builtins in ascending location order + QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> tescOutBuiltins; + for (const auto &tescOutBuiltin : compTesc.desc.outputBuiltinVariables()) + tescOutBuiltins[tescOutBuiltin.type] = tescOutBuiltin; + + // tese input builtins in ascending location order + QMap<QShaderDescription::BuiltinType, QShaderDescription::BuiltinVariable> teseInBuiltins; + for (const auto &teseInBuiltin : vertTese.desc.inputBuiltinVariables()) + teseInBuiltins[teseInBuiltin.type] = teseInBuiltin; + + const bool trianglesMode = vertTese.desc.tessellationMode() == QShaderDescription::TrianglesTessellationMode; + bool tessLevelAdded = false; + + for (const QShaderDescription::BuiltinVariable &builtin : tescOutBuiltins) { + + QShaderDescription::InOutVariable variable; + int binding = -1; + quint32 *offset = nullptr; + quint32 *alignment = nullptr; + + switch (builtin.type) { + case QShaderDescription::BuiltinType::PositionBuiltin: + variable.type = QShaderDescription::VariableType::Vec4; + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + break; + case QShaderDescription::BuiltinType::PointSizeBuiltin: + variable.type = QShaderDescription::VariableType::Float; + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + break; + case QShaderDescription::BuiltinType::ClipDistanceBuiltin: + variable.type = QShaderDescription::VariableType::Float; + variable.arrayDims = builtin.arrayDims; + binding = tescOutputBufferBinding; + offset = &offsetInTescOutput; + alignment = &tescOutputAlignment; + break; + case QShaderDescription::BuiltinType::TessLevelOuterBuiltin: + variable.type = QShaderDescription::VariableType::Half4; + binding = tessFactorBufferBinding; + offset = &offsetInTessFactorBuffer; + tessLevelAdded = trianglesMode; + alignment = &tessFactorAlignment; + break; + case QShaderDescription::BuiltinType::TessLevelInnerBuiltin: + if (trianglesMode) { + if (!tessLevelAdded) { + variable.type = QShaderDescription::VariableType::Half4; + binding = tessFactorBufferBinding; + offsetInTessFactorBuffer = 0; + offset = &offsetInTessFactorBuffer; + alignment = &tessFactorAlignment; + tessLevelAdded = true; + } else { + teseInBuiltins.remove(builtin.type); + continue; + } + } else { + variable.type = QShaderDescription::VariableType::Half2; + binding = tessFactorBufferBinding; + offsetInTessFactorBuffer = 8; + offset = &offsetInTessFactorBuffer; + alignment = &tessFactorAlignment; + } + break; + default: + Q_UNREACHABLE(); + break; + } + + if (teseInBuiltins.contains(builtin.type)) { + if (binding != -1) { + int index = nextAttributeIndex(indices); + addVertexAttribute(variable, binding, rhiD, index, *offset, vertexDesc.attributes, indices, *alignment); + usedBuffers << binding; + } else { + qWarning() << "baked tessellation control shader missing output buffer binding information"; + addUnusedVertexAttribute(variable, rhiD, *offset, *alignment); + } + } else { + addUnusedVertexAttribute(variable, rhiD, *offset, *alignment); + } + + teseInBuiltins.remove(builtin.type); + } + + for (const QShaderDescription::BuiltinVariable &builtin : teseInBuiltins) { + switch (builtin.type) { + case QShaderDescription::BuiltinType::PositionBuiltin: + case QShaderDescription::BuiltinType::PointSizeBuiltin: + case QShaderDescription::BuiltinType::ClipDistanceBuiltin: + qWarning() << "missing tessellation control output for tessellation evaluation builtin input:" << builtin; + break; + default: + break; + } + } + + if (usedBuffers.contains(tescOutputBufferBinding)) { + vertexDesc.layouts[tescOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatchControlPoint; + vertexDesc.layouts[tescOutputBufferBinding].stride = aligned(offsetInTescOutput, tescOutputAlignment); + } + + if (usedBuffers.contains(tescPatchOutputBufferBinding)) { + vertexDesc.layouts[tescPatchOutputBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch; + vertexDesc.layouts[tescPatchOutputBufferBinding].stride = aligned(offsetInTescPatchOutput, tescPatchOutputAlignment); + } + + if (usedBuffers.contains(tessFactorBufferBinding)) { + vertexDesc.layouts[tessFactorBufferBinding].stepFunction = MTLVertexStepFunctionPerPatch; + vertexDesc.layouts[tessFactorBufferBinding].stride = trianglesMode ? sizeof(MTLTriangleTessellationFactorsHalf) : sizeof(MTLQuadTessellationFactorsHalf); + } + + rpDesc.vertexDescriptor = vertexDesc; + rpDesc.vertexFunction = vertTese.func; + rpDesc.fragmentFunction = pipeline->d->fs.func; + + // The portable, cross-API approach is to use CCW, the results are then + // identical (assuming the applied clipSpaceCorrMatrix) for all the 3D + // APIs. The tess.eval. GLSL shader is thus expected to specify ccw. If it + // doesn't, things may not work as expected. + rpDesc.tessellationOutputWindingOrder = toMetalTessellationWindingOrder(vertTese.desc.tessellationWindingOrder()); + + rpDesc.tessellationPartitionMode = toMetalTessellationPartitionMode(vertTese.desc.tessellationPartitioning()); + + QMetalRenderPassDescriptor *rpD = QRHI_RES(QMetalRenderPassDescriptor, pipeline->renderPassDescriptor()); + pipeline->setupAttachmentsInMetalRenderPassDescriptor(rpDesc, rpD); + + rhiD->d->trySeedingRenderPipelineFromBinaryArchive(rpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addRenderPipelineToBinaryArchive(rpDesc); + + NSError *err = nil; + id<MTLRenderPipelineState> ps = [rhiD->d->dev newRenderPipelineStateWithDescriptor: rpDesc error: &err]; [rpDesc release]; + if (!ps) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to create render pipeline state for tessellation: %s", qPrintable(msg)); + } else { + // ps is stored in the QMetalGraphicsPipelineData so the end result in this + // regard is no different from what createVertexFragmentPipeline does + pipeline->d->ps = ps; + } + return ps; +} - MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init]; - dsDesc.depthCompareFunction = m_depthTest ? toMetalCompareOp(m_depthOp) : MTLCompareFunctionAlways; - dsDesc.depthWriteEnabled = m_depthWrite; - if (m_stencilTest) { - dsDesc.frontFaceStencil = [[MTLStencilDescriptor alloc] init]; - dsDesc.frontFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilFront.failOp); - dsDesc.frontFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilFront.depthFailOp); - dsDesc.frontFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilFront.passOp); - dsDesc.frontFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilFront.compareOp); - dsDesc.frontFaceStencil.readMask = m_stencilReadMask; - dsDesc.frontFaceStencil.writeMask = m_stencilWriteMask; +QMetalBuffer *QMetalGraphicsPipelineData::ExtraBufferManager::acquireWorkBuffer(QRhiMetal *rhiD, quint32 size, WorkBufType type) +{ + QVector<QMetalBuffer *> *workBuffers = type == WorkBufType::DeviceLocal ? &deviceLocalWorkBuffers : &hostVisibleWorkBuffers; - dsDesc.backFaceStencil = [[MTLStencilDescriptor alloc] init]; - dsDesc.backFaceStencil.stencilFailureOperation = toMetalStencilOp(m_stencilBack.failOp); - dsDesc.backFaceStencil.depthFailureOperation = toMetalStencilOp(m_stencilBack.depthFailOp); - dsDesc.backFaceStencil.depthStencilPassOperation = toMetalStencilOp(m_stencilBack.passOp); - dsDesc.backFaceStencil.stencilCompareFunction = toMetalCompareOp(m_stencilBack.compareOp); - dsDesc.backFaceStencil.readMask = m_stencilReadMask; - dsDesc.backFaceStencil.writeMask = m_stencilWriteMask; + // Check if something is reusable as-is. + for (QMetalBuffer *workBuf : *workBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == -1 && workBuf->size() >= size) { + workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot; + return workBuf; + } + } + + // Once the pool is above a certain threshold, see if there is something + // unused (but too small) and recreate that our size. + if (workBuffers->count() > QMTL_FRAMES_IN_FLIGHT * 8) { + for (QMetalBuffer *workBuf : *workBuffers) { + if (workBuf && workBuf->lastActiveFrameSlot == -1) { + workBuf->setSize(size); + if (workBuf->create()) { + workBuf->lastActiveFrameSlot = rhiD->currentFrameSlot; + return workBuf; + } + } + } + } + + // Add a new buffer to the pool. + QMetalBuffer *buf; + if (type == WorkBufType::DeviceLocal) { + // for GPU->GPU data (non-slotted, not necessarily host writable) + buf = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size); + } else { + // for CPU->GPU (non-slotted, host writable/coherent) + buf = new QMetalBuffer(rhiD, QRhiBuffer::Dynamic, QRhiBuffer::UsageFlags(QMetalBuffer::WorkBufPoolUsage), size); + } + if (buf->create()) { + buf->lastActiveFrameSlot = rhiD->currentFrameSlot; + workBuffers->append(buf); + return buf; + } + + qWarning("Failed to acquire work buffer of size %u", size); + return nullptr; +} + +bool QMetalGraphicsPipeline::createTessellationPipelines(const QShader &tessVert, const QShader &tesc, const QShader &tese, const QShader &tessFrag) +{ + QRHI_RES_RHI(QRhiMetal); + QString error; + QByteArray entryPoint; + QShaderKey activeKey; + + const QShaderDescription tescDesc = tesc.description(); + const QShaderDescription teseDesc = tese.description(); + d->tess.inControlPointCount = uint(m_patchControlPointCount); + d->tess.outControlPointCount = tescDesc.tessellationOutputVertexCount(); + if (!d->tess.outControlPointCount) + d->tess.outControlPointCount = teseDesc.tessellationOutputVertexCount(); + + if (!d->tess.outControlPointCount) { + qWarning("Failed to determine output vertex count from the tessellation control or evaluation shader, cannot tessellate"); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + if (m_multiViewCount >= 2) + qWarning("Multiview is not supported with tessellation"); + + // Now the vertex shader is a compute shader. + // It should have three dedicated *VertexAsComputeShader variants. + // What the requested variant was (Standard or Batchable) plays no role here. + // (the Qt Quick scenegraph does not use tessellation with its materials) + // Create all three versions. + + bool variantsPresent[3] = {}; + const QVector<QShaderKey> tessVertKeys = tessVert.availableShaders(); + for (const QShaderKey &k : tessVertKeys) { + switch (k.sourceVariant()) { + case QShader::NonIndexedVertexAsComputeShader: + variantsPresent[0] = true; + break; + case QShader::UInt32IndexedVertexAsComputeShader: + variantsPresent[1] = true; + break; + case QShader::UInt16IndexedVertexAsComputeShader: + variantsPresent[2] = true; + break; + default: + break; + } + } + if (!(variantsPresent[0] && variantsPresent[1] && variantsPresent[2])) { + qWarning("Vertex shader is not prepared for Metal tessellation. Cannot tessellate. " + "Perhaps the relevant variants (UInt32IndexedVertexAsComputeShader et al) were not generated? " + "Try passing --msltess to qsb."); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + int varIndex = 0; // Will map NonIndexed as 0, UInt32 as 1, UInt16 as 2. Do not change this ordering. + for (QShader::Variant variant : { + QShader::NonIndexedVertexAsComputeShader, + QShader::UInt32IndexedVertexAsComputeShader, + QShader::UInt16IndexedVertexAsComputeShader }) + { + id<MTLLibrary> lib = rhiD->d->createMetalLib(tessVert, variant, &error, &entryPoint, &activeKey); + if (!lib) { + qWarning("MSL shader compilation failed for vertex-as-compute shader %d: %s", int(variant), qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> func = rhiD->d->createMSLShaderFunction(lib, entryPoint); + if (!func) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [lib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + QMetalShader &compVs(d->tess.compVs[varIndex]); + compVs.lib = lib; + compVs.func = func; + compVs.desc = tessVert.description(); + compVs.nativeResourceBindingMap = tessVert.nativeResourceBindingMap(activeKey); + compVs.nativeShaderInfo = tessVert.nativeShaderInfo(activeKey); + + // pre-create all three MTLComputePipelineStates + if (!d->tess.vsCompPipeline(rhiD, variant)) { + qWarning("Failed to pre-generate compute pipeline for vertex compute shader (tessellation variant %d)", int(variant)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + ++varIndex; + } + + // Pipeline #2 is a compute that runs the tessellation control (compute) shader + id<MTLLibrary> tessControlLib = rhiD->d->createMetalLib(tesc, QShader::StandardShader, &error, &entryPoint, &activeKey); + if (!tessControlLib) { + qWarning("MSL shader compilation failed for tessellation control compute shader: %s", qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> tessControlFunc = rhiD->d->createMSLShaderFunction(tessControlLib, entryPoint); + if (!tessControlFunc) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [tessControlLib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + d->tess.compTesc.lib = tessControlLib; + d->tess.compTesc.func = tessControlFunc; + d->tess.compTesc.desc = tesc.description(); + d->tess.compTesc.nativeResourceBindingMap = tesc.nativeResourceBindingMap(activeKey); + d->tess.compTesc.nativeShaderInfo = tesc.nativeShaderInfo(activeKey); + if (!d->tess.tescCompPipeline(rhiD)) { + qWarning("Failed to pre-generate compute pipeline for tessellation control shader"); + d->tess.enabled = false; + d->tess.failed = true; + return false; } + // Pipeline #3 is a render pipeline with the tessellation evaluation (vertex) + the fragment shader + id<MTLLibrary> tessEvalLib = rhiD->d->createMetalLib(tese, QShader::StandardShader, &error, &entryPoint, &activeKey); + if (!tessEvalLib) { + qWarning("MSL shader compilation failed for tessellation evaluation vertex shader: %s", qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> tessEvalFunc = rhiD->d->createMSLShaderFunction(tessEvalLib, entryPoint); + if (!tessEvalFunc) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [tessEvalLib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + d->tess.vertTese.lib = tessEvalLib; + d->tess.vertTese.func = tessEvalFunc; + d->tess.vertTese.desc = tese.description(); + d->tess.vertTese.nativeResourceBindingMap = tese.nativeResourceBindingMap(activeKey); + d->tess.vertTese.nativeShaderInfo = tese.nativeShaderInfo(activeKey); + + id<MTLLibrary> fragLib = rhiD->d->createMetalLib(tessFrag, QShader::StandardShader, &error, &entryPoint, &activeKey); + if (!fragLib) { + qWarning("MSL shader compilation failed for fragment shader: %s", qPrintable(error)); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + id<MTLFunction> fragFunc = rhiD->d->createMSLShaderFunction(fragLib, entryPoint); + if (!fragFunc) { + qWarning("MSL function for entry point %s not found", entryPoint.constData()); + [fragLib release]; + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + d->fs.lib = fragLib; + d->fs.func = fragFunc; + d->fs.desc = tessFrag.description(); + d->fs.nativeShaderInfo = tessFrag.nativeShaderInfo(activeKey); + d->fs.nativeResourceBindingMap = tessFrag.nativeResourceBindingMap(activeKey); + + if (!d->tess.teseFragRenderPipeline(rhiD, this)) { + qWarning("Failed to pre-generate render pipeline for tessellation evaluation + fragment shader"); + d->tess.enabled = false; + d->tess.failed = true; + return false; + } + + MTLDepthStencilDescriptor *dsDesc = [[MTLDepthStencilDescriptor alloc] init]; + setupMetalDepthStencilDescriptor(dsDesc); d->ds = [rhiD->d->dev newDepthStencilStateWithDescriptor: dsDesc]; [dsDesc release]; - d->primitiveType = toMetalPrimitiveType(m_topology); - d->winding = m_frontFace == CCW ? MTLWindingCounterClockwise : MTLWindingClockwise; - d->cullMode = toMetalCullMode(m_cullMode); - d->triangleFillMode = toMetalTriangleFillMode(m_polygonMode); - d->depthBias = float(m_depthBias); - d->slopeScaledDepthBias = m_slopeScaledDepthBias; + // no primitiveType + mapStates(); + + return true; +} + +bool QMetalGraphicsPipeline::create() +{ + destroy(); // no early test, always invoke and leave it to destroy to decide what to clean up + + QRHI_RES_RHI(QRhiMetal); + rhiD->pipelineCreationStart(); + if (!rhiD->sanityCheckGraphicsPipeline(this)) + return false; + + // See if tessellation is involved. Things will be very different, if so. + QShader tessVert; + QShader tesc; + QShader tese; + QShader tessFrag; + for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { + switch (shaderStage.type()) { + case QRhiShaderStage::Vertex: + tessVert = shaderStage.shader(); + break; + case QRhiShaderStage::TessellationControl: + tesc = shaderStage.shader(); + break; + case QRhiShaderStage::TessellationEvaluation: + tese = shaderStage.shader(); + break; + case QRhiShaderStage::Fragment: + tessFrag = shaderStage.shader(); + break; + default: + break; + } + } + d->tess.enabled = tesc.isValid() && tese.isValid() && m_topology == Patches && m_patchControlPointCount > 0; + d->tess.failed = false; + + bool ok = d->tess.enabled ? createTessellationPipelines(tessVert, tesc, tese, tessFrag) : createVertexFragmentPipeline(); + if (!ok) + return false; + + // SPIRV-Cross buffer size buffers + int buffers = 0; + QVarLengthArray<QMetalShader *, 6> shaders; + if (d->tess.enabled) { + shaders.append(&d->tess.compVs[0]); + shaders.append(&d->tess.compVs[1]); + shaders.append(&d->tess.compVs[2]); + shaders.append(&d->tess.compTesc); + shaders.append(&d->tess.vertTese); + } else { + shaders.append(&d->vs); + } + shaders.append(&d->fs); + + for (QMetalShader *shader : shaders) { + if (shader->nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + const int binding = shader->nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + shader->nativeResourceBindingMap[binding] = qMakePair(binding, -1); + int maxNativeBinding = 0; + for (const QShaderDescription::StorageBlock &block : shader->desc.storageBlocks()) + maxNativeBinding = qMax(maxNativeBinding, shader->nativeResourceBindingMap[block.binding].first); + + // we use one buffer to hold data for all graphics shader stages, each with a different offset. + // buffer offsets must be 32byte aligned - adjust buffer count accordingly + buffers += ((maxNativeBinding + 1 + 7) / 8) * 8; + } + } + + if (buffers) { + if (!d->bufferSizeBuffer) + d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int)); + + d->bufferSizeBuffer->setSize(buffers * sizeof(int)); + d->bufferSizeBuffer->create(); + } rhiD->pipelineCreationEnd(); lastActiveFrameSlot = -1; @@ -3982,6 +5912,9 @@ void QMetalComputePipeline::destroy() if (!d->ps) return; + delete d->bufferSizeBuffer; + d->bufferSizeBuffer = nullptr; + QRhiMetalData::DeferredReleaseEntry e; e.type = QRhiMetalData::DeferredReleaseEntry::ComputePipeline; e.lastActiveFrameSlot = lastActiveFrameSlot; @@ -3995,6 +5928,29 @@ void QMetalComputePipeline::destroy() } } +void QRhiMetalData::trySeedingComputePipelineFromBinaryArchive(MTLComputePipelineDescriptor *cpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSArray *binArchArray = [NSArray arrayWithObjects: binArch, nil]; + cpDesc.binaryArchives = binArchArray; + } + } +} + +void QRhiMetalData::addComputePipelineToBinaryArchive(MTLComputePipelineDescriptor *cpDesc) +{ + if (@available(macOS 11.0, iOS 14.0, *)) { + if (binArch) { + NSError *err = nil; + if (![binArch addComputePipelineFunctionsWithDescriptor: cpDesc error: &err]) { + const QString msg = QString::fromNSString(err.localizedDescription); + qWarning("Failed to collect compute pipeline functions to binary archive: %s", qPrintable(msg)); + } + } + } +} + bool QMetalComputePipeline::create() { if (d->ps) @@ -4027,6 +5983,14 @@ bool QMetalComputePipeline::create() d->cs.func = func; d->cs.localSize = shader.description().computeShaderLocalSize(); d->cs.nativeResourceBindingMap = shader.nativeResourceBindingMap(activeKey); + d->cs.desc = shader.description(); + d->cs.nativeShaderInfo = shader.nativeShaderInfo(activeKey); + + // SPIRV-Cross buffer size buffers + if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + const int binding = d->cs.nativeShaderInfo.extraBufferBindings[QShaderPrivate::MslBufferSizeBufferBinding]; + d->cs.nativeResourceBindingMap[binding] = qMakePair(binding, -1); + } if (rhiD->d->shaderCache.count() >= QRhiMetal::MAX_SHADER_CACHE_ENTRIES) { for (QMetalShader &s : rhiD->d->shaderCache) @@ -4041,14 +6005,41 @@ bool QMetalComputePipeline::create() d->localSize = MTLSizeMake(d->cs.localSize[0], d->cs.localSize[1], d->cs.localSize[2]); + MTLComputePipelineDescriptor *cpDesc = [MTLComputePipelineDescriptor new]; + cpDesc.computeFunction = d->cs.func; + + rhiD->d->trySeedingComputePipelineFromBinaryArchive(cpDesc); + + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) + rhiD->d->addComputePipelineToBinaryArchive(cpDesc); + NSError *err = nil; - d->ps = [rhiD->d->dev newComputePipelineStateWithFunction: d->cs.func error: &err]; + d->ps = [rhiD->d->dev newComputePipelineStateWithDescriptor: cpDesc + options: MTLPipelineOptionNone + reflection: nil + error: &err]; + [cpDesc release]; if (!d->ps) { const QString msg = QString::fromNSString(err.localizedDescription); - qWarning("Failed to create render pipeline state: %s", qPrintable(msg)); + qWarning("Failed to create compute pipeline state: %s", qPrintable(msg)); return false; } + // SPIRV-Cross buffer size buffers + if (d->cs.nativeShaderInfo.extraBufferBindings.contains(QShaderPrivate::MslBufferSizeBufferBinding)) { + int buffers = 0; + for (const QShaderDescription::StorageBlock &block : d->cs.desc.storageBlocks()) + buffers = qMax(buffers, d->cs.nativeResourceBindingMap[block.binding].first); + + buffers += 1; + + if (!d->bufferSizeBuffer) + d->bufferSizeBuffer = new QMetalBuffer(rhiD, QRhiBuffer::Static, QRhiBuffer::StorageBuffer, buffers * sizeof(int)); + + d->bufferSizeBuffer->setSize(buffers * sizeof(int)); + d->bufferSizeBuffer->create(); + } + rhiD->pipelineCreationEnd(); lastActiveFrameSlot = -1; generation += 1; @@ -4081,10 +6072,12 @@ const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles() return &nativeHandlesStruct; } -void QMetalCommandBuffer::resetState() +void QMetalCommandBuffer::resetState(double lastGpuTime) { + d->lastGpuTime = lastGpuTime; d->currentRenderPassEncoder = nil; d->currentComputePassEncoder = nil; + d->tessellationComputeEncoder = nil; d->currentPassRpDesc = nil; resetPerPassState(); } @@ -4146,8 +6139,7 @@ void QMetalSwapChain::destroy() for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { if (d->sem[i]) { // the semaphores cannot be released if they do not have the initial value - dispatch_semaphore_wait(d->sem[i], DISPATCH_TIME_FOREVER); - dispatch_semaphore_signal(d->sem[i]); + waitUntilCompleted(i); dispatch_release(d->sem[i]); d->sem[i] = nullptr; @@ -4159,7 +6151,14 @@ void QMetalSwapChain::destroy() d->msaaTex[i] = nil; } +#ifdef Q_OS_MACOS + d->liveResizeStartObserver.remove(); + d->liveResizeEndObserver.remove(); + d->liveResizeObserverSet = false; +#endif + d->layer = nullptr; + m_proxyData = {}; [d->curDrawable release]; d->curDrawable = nil; @@ -4181,6 +6180,9 @@ QRhiRenderTarget *QMetalSwapChain::currentFrameRenderTarget() return &rtWrapper; } +// view.layer should ideally be called on the main thread, otherwise the UI +// Thread Checker in Xcode drops a warning. Hence trying to proxy it through +// QRhiSwapChainProxyData instead of just calling this function directly. static inline CAMetalLayer *layerForWindow(QWindow *window) { Q_ASSERT(window); @@ -4193,29 +6195,51 @@ static inline CAMetalLayer *layerForWindow(QWindow *window) return static_cast<CAMetalLayer *>(view.layer); } +// If someone calls this, it is hopefully from the main thread, and they will +// then set the returned data on the QRhiSwapChain, so it won't need to query +// the layer on its own later on. +QRhiSwapChainProxyData QRhiMetal::updateSwapChainProxyData(QWindow *window) +{ + QRhiSwapChainProxyData d; + d.reserved[0] = layerForWindow(window); + return d; +} + QSize QMetalSwapChain::surfacePixelSize() { Q_ASSERT(m_window); CAMetalLayer *layer = d->layer; if (!layer) - layer = layerForWindow(m_window); + layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, m_window, QRhi::Metal, 0); - CGSize layerSize = layer.bounds.size; - layerSize.width *= layer.contentsScale; - layerSize.height *= layer.contentsScale; - return QSizeF::fromCGSize(layerSize).toSize(); + Q_ASSERT(layer); + int height = (int)layer.bounds.size.height; + int width = (int)layer.bounds.size.width; + width *= layer.contentsScale; + height *= layer.contentsScale; + return QSize(width, height); } bool QMetalSwapChain::isFormatSupported(Format f) { -#ifdef Q_OS_MACOS - return f == SDR || f == HDRExtendedSrgbLinear; -#endif + if (f == HDRExtendedSrgbLinear) { + if (@available(macOS 10.11, iOS 16.0, *)) + return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f; + else + return false; + } else if (f == HDRExtendedDisplayP3Linear) { + if (@available(macOS 11.0, iOS 14.0, *)) + return hdrInfo().limits.colorComponentValue.maxPotentialColorComponentValue > 1.0f; + else + return false; + } return f == SDR; } QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor() { + QRHI_RES_RHI(QRhiMetal); + chooseFormats(); // ensure colorFormat and similar are filled out QMetalRenderPassDescriptor *rpD = new QMetalRenderPassDescriptor(m_rhi); @@ -4226,7 +6250,6 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor() #ifdef Q_OS_MACOS // m_depthStencil may not be built yet so cannot rely on computed fields in it - QRHI_RES_RHI(QRhiMetal); rpD->dsFormat = rhiD->d->dev.depth24Stencil8PixelFormatSupported ? MTLPixelFormatDepth24Unorm_Stencil8 : MTLPixelFormatDepth32Float_Stencil8; #else @@ -4234,6 +6257,8 @@ QRhiRenderPassDescriptor *QMetalSwapChain::newCompatibleRenderPassDescriptor() #endif rpD->updateSerializedFormat(); + + rhiD->registerResource(rpD, false); return rpD; } @@ -4242,7 +6267,7 @@ void QMetalSwapChain::chooseFormats() QRHI_RES_RHI(QRhiMetal); samples = rhiD->effectiveSampleCount(m_sampleCount); // pick a format that is allowed for CAMetalLayer.pixelFormat - if (m_format == HDRExtendedSrgbLinear) { + if (m_format == HDRExtendedSrgbLinear || m_format == HDRExtendedDisplayP3Linear) { d->colorFormat = MTLPixelFormatRGBA16Float; d->rhiColorFormat = QRhiTexture::RGBA16F; return; @@ -4251,6 +6276,17 @@ void QMetalSwapChain::chooseFormats() d->rhiColorFormat = QRhiTexture::BGRA8; } +void QMetalSwapChain::waitUntilCompleted(int slot) +{ + // wait+signal is the general pattern to ensure the commands for a + // given frame slot have completed (if sem is 1, we go 0 then 1; if + // sem is 0 we go -1, block, completion increments to 0, then us to 1) + + dispatch_semaphore_t sem = d->sem[slot]; + dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); + dispatch_semaphore_signal(sem); +} + bool QMetalSwapChain::createOrResize() { Q_ASSERT(m_window); @@ -4272,19 +6308,24 @@ bool QMetalSwapChain::createOrResize() return false; } - d->layer = layerForWindow(window); + d->layer = qrhi_objectFromProxyData<CAMetalLayer>(&m_proxyData, window, QRhi::Metal, 0); Q_ASSERT(d->layer); chooseFormats(); if (d->colorFormat != d->layer.pixelFormat) d->layer.pixelFormat = d->colorFormat; -#ifdef Q_OS_MACOS - // Can't enable this on iOS until wantsExtendedDynamicRangeContent is available + if (m_format == HDRExtendedSrgbLinear) { - d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB); - d->layer.wantsExtendedDynamicRangeContent = YES; + if (@available(macOS 10.11, iOS 16.0, *)) { + d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearSRGB); + d->layer.wantsExtendedDynamicRangeContent = YES; + } + } else if (m_format == HDRExtendedDisplayP3Linear) { + if (@available(macOS 11.0, iOS 16.0, *)) { + d->layer.colorspace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3); + d->layer.wantsExtendedDynamicRangeContent = YES; + } } -#endif if (m_flags.testFlag(UsedAsTransferSource)) d->layer.framebufferOnly = NO; @@ -4309,9 +6350,12 @@ bool QMetalSwapChain::createOrResize() // Now set the layer's drawableSize which will stay set to the same value // until the next createOrResize(), thus ensuring atomicity with regards to // the drawable size in frames. - CGSize layerSize = d->layer.bounds.size; - layerSize.width *= d->layer.contentsScale; - layerSize.height *= d->layer.contentsScale; + int width = (int)d->layer.bounds.size.width; + int height = (int)d->layer.bounds.size.height; + CGSize layerSize = CGSizeMake(width, height); + const float scaleFactor = d->layer.contentsScale; + layerSize.width *= scaleFactor; + layerSize.height *= scaleFactor; d->layer.drawableSize = layerSize; m_currentPixelSize = QSizeF::fromCGSize(layerSize).toSize(); @@ -4319,10 +6363,39 @@ bool QMetalSwapChain::createOrResize() [d->layer setDevice: rhiD->d->dev]; +#ifdef Q_OS_MACOS + // Can only use presentsWithTransaction (to get smooth resizing) when + // presenting from the main (gui) thread. We predict that based on the + // thread this function is called on since if the QRhiSwapChain is + // initialied on a given thread then that's almost certainly the thread on + // which the QRhi renders and presents. + const bool canUsePresentsWithTransaction = NSThread.isMainThread; + + // Have an env.var. just in case it turns out presentsWithTransaction is + // not desired in some specific case. + static bool allowPresentsWithTransaction = !qEnvironmentVariableIntValue("QT_MTL_NO_TRANSACTION"); + + if (allowPresentsWithTransaction && canUsePresentsWithTransaction && !d->liveResizeObserverSet) { + d->liveResizeObserverSet = true; + NSView *view = reinterpret_cast<NSView *>(window->winId()); + NSWindow *window = view.window; + if (window) { + qCDebug(QRHI_LOG_INFO, "will set presentsWithTransaction during live resize"); + d->liveResizeStartObserver = QMacNotificationObserver(window, NSWindowWillStartLiveResizeNotification, [this] { + d->layer.presentsWithTransaction = true; + }); + d->liveResizeEndObserver = QMacNotificationObserver(window, NSWindowDidEndLiveResizeNotification, [this] { + d->layer.presentsWithTransaction = false; + }); + } + } +#endif + [d->curDrawable release]; d->curDrawable = nil; for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { + d->lastGpuTime[i] = 0; if (!d->sem[i]) d->sem[i] = dispatch_semaphore_create(QMTL_FRAMES_IN_FLIGHT - 1); } @@ -4350,12 +6423,13 @@ bool QMetalSwapChain::createOrResize() rtWrapper.setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget rtWrapper.d->pixelSize = pixelSize; - rtWrapper.d->dpr = float(window->devicePixelRatio()); + rtWrapper.d->dpr = scaleFactor; rtWrapper.d->sampleCount = samples; rtWrapper.d->colorAttCount = 1; rtWrapper.d->dsAttCount = ds ? 1 : 0; - qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, size %dx%d", pixelSize.width(), pixelSize.height()); + qCDebug(QRHI_LOG_INFO, "got CAMetalLayer, pixel size %dx%d (scale %.2f)", + pixelSize.width(), pixelSize.height(), scaleFactor); if (samples > 1) { MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init]; @@ -4384,21 +6458,28 @@ QRhiSwapChainHdrInfo QMetalSwapChain::hdrInfo() { QRhiSwapChainHdrInfo info; info.limitsType = QRhiSwapChainHdrInfo::ColorComponentValue; - if (m_format == SDR) { - info.limits.colorComponentValue.maxColorComponentValue = 1; - return info; - } + info.limits.colorComponentValue.maxColorComponentValue = 1; + info.limits.colorComponentValue.maxPotentialColorComponentValue = 1; + info.luminanceBehavior = QRhiSwapChainHdrInfo::DisplayReferred; // 1.0 = SDR white + info.sdrWhiteLevel = 200; // typical value, but dummy (don't know the real one); won't matter due to being display-referred -#ifdef Q_OS_MACOS - info.isHardCodedDefaults = false; - NSView *view = reinterpret_cast<NSView *>(window->winId()); - info.limits.colorComponentValue.maxColorComponentValue = view.window.screen.maximumExtendedDynamicRangeColorComponentValue; -#else - // ### Fixme: Maybe retrieve the brightness from the screen and if we're not at full brightness we might be able to do more. - // For now, assume 2, in line with iPhone 12 specs that claim 625 nits max brightness and 1200 nits max HDR brightness. - info.isHardCodedDefaults = true; - info.limits.colorComponentValue.maxColorComponentValue = 2; + if (m_window) { + // Must use m_window, not window, given this may be called before createOrResize(). +#if defined(Q_OS_MACOS) + NSView *view = reinterpret_cast<NSView *>(m_window->winId()); + NSScreen *screen = view.window.screen; + info.limits.colorComponentValue.maxColorComponentValue = screen.maximumExtendedDynamicRangeColorComponentValue; + info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.maximumPotentialExtendedDynamicRangeColorComponentValue; +#elif defined(Q_OS_IOS) + if (@available(iOS 16.0, *)) { + UIView *view = reinterpret_cast<UIView *>(m_window->winId()); + UIScreen *screen = view.window.windowScene.screen; + info.limits.colorComponentValue.maxColorComponentValue = view.window.windowScene.screen.currentEDRHeadroom; + info.limits.colorComponentValue.maxPotentialColorComponentValue = screen.potentialEDRHeadroom; + } #endif + } + return info; } |