diff options
Diffstat (limited to 'src/gui/rhi/qrhigles2.cpp')
-rw-r--r-- | src/gui/rhi/qrhigles2.cpp | 3661 |
1 files changed, 2898 insertions, 763 deletions
diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp index 4440182264..62830c291d 100644 --- a/src/gui/rhi/qrhigles2.cpp +++ b/src/gui/rhi/qrhigles2.cpp @@ -1,45 +1,14 @@ -/**************************************************************************** -** -** Copyright (C) 2019 The Qt Company Ltd. -** Contact: http://www.qt.io/licensing/ -** -** This file is part of the Qt Gui module -** -** $QT_BEGIN_LICENSE:LGPL3$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see http://www.qt.io/terms-conditions. For further -** information use the contact form at http://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPLv3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or later as published by the Free -** Software Foundation and appearing in the file LICENSE.GPL included in -** the packaging of this file. Please review the following information to -** ensure the GNU General Public License version 2.0 requirements will be -** met: http://www.gnu.org/licenses/gpl-2.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "qrhigles2_p_p.h" -#include <QWindow> +// Copyright (C) 2023 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#include "qrhigles2_p.h" #include <QOffscreenSurface> #include <QOpenGLContext> +#include <QtCore/qmap.h> #include <QtGui/private/qopenglextensions_p.h> #include <QtGui/private/qopenglprogrambinarycache_p.h> +#include <QtGui/private/qwindow_p.h> +#include <qpa/qplatformopenglcontext.h> #include <qmath.h> QT_BEGIN_NAMESPACE @@ -59,13 +28,17 @@ QT_BEGIN_NAMESPACE /*! \class QRhiGles2InitParams - \internal \inmodule QtGui + \since 6.6 \brief OpenGL specific initialization parameters. - An OpenGL-based QRhi needs an already created QOffscreenSurface at minimum. - Additionally, while optional, it is recommended that the QWindow the first - QRhiSwapChain will target is passed in as well. + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. + + An OpenGL-based QRhi needs an already created QSurface that can be used in + combination with QOpenGLContext. Most commonly, this is a QOffscreenSurface + in practice. Additionally, while optional, it is recommended that the QWindow + the first QRhiSwapChain will target is passed in as well. \badcode QOffscreenSurface *fallbackSurface = QRhiGles2InitParams::newFallbackSurface(); @@ -82,21 +55,21 @@ QT_BEGIN_NAMESPACE thread) are satisfied. The implicitly created context is destroyed automatically together with the QRhi. - The QSurfaceFormat for the context is specified in \l format. The + The QSurfaceFormat for the context is specified in \c format. The constructor sets this to QSurfaceFormat::defaultFormat() so applications - that use QSurfaceFormat::setDefaultFormat() do not need to set the format - again. + that call QSurfaceFormat::setDefaultFormat() with the appropriate settings + before the constructor runs will not need to change value of \c format. - \note The depth and stencil buffer sizes are set automatically to 24 and 8 - when no size was explicitly set for these buffers in \l format. As there - are possible adjustments to \l format, applications can use - adjustedFormat() to query the effective format that is passed to - QOpenGLContext::setFormat() internally. + \note Remember to set the depth and stencil buffer sizes to 24 and 8 when + the renderer relies on depth or stencil testing, either in the global + default QSurfaceFormat, or, alternatively, separately in all the involved + QSurfaceFormat instances: in \c format, the format argument passed to + newFallbackSurface(), and on any QWindow that is used with the QRhi. - A QOffscreenSurface has to be specified in \l fallbackSurface. In order to - prevent mistakes in threaded situations, this is never created - automatically by the QRhi since, like QWindow, QOffscreenSurface can only - be created on the gui/main thread. + A QSurface has to be specified in \c fallbackSurface. In order to prevent + mistakes in threaded situations, this is never created automatically by the + QRhi because, like QWindow, instances of QSurface subclasses can often be + created on the gui/main thread only. As a convenience, applications can use newFallbackSurface() which creates and returns a QOffscreenSurface that is compatible with the QOpenGLContext @@ -104,23 +77,27 @@ QT_BEGIN_NAMESPACE of the returned QOffscreenSurface is transferred to the caller and the QRhi will not destroy it. - \note QRhiSwapChain can only target QWindow instances that have their - surface type set to QSurface::OpenGLSurface. + \note With the OpenGL backend, QRhiSwapChain can only target QWindow + instances that have their surface type set to QSurface::OpenGLSurface or + QSurface::RasterGLSurface. - \note \l window is optional. It is recommended to specify it whenever + \note \c window is optional. It is recommended to specify it whenever possible, in order to avoid problems on multi-adapter and multi-screen - systems. When \l window is not set, the very first - QOpenGLContext::makeCurrent() happens with \l fallbackSurface which may be + systems. When \c window is not set, the very first + QOpenGLContext::makeCurrent() happens with \c fallbackSurface which may be an invisible window on some platforms (for example, Windows) and that may trigger unexpected problems in some cases. + In case resource sharing with an existing QOpenGLContext is desired, \c + shareContext can be set to an existing QOpenGLContext. Alternatively, + Qt::AA_ShareOpenGLContexts is honored as well, when enabled. + \section2 Working with existing OpenGL contexts When interoperating with another graphics engine, it may be necessary to get a QRhi instance that uses the same OpenGL context. This can be achieved by passing a pointer to a QRhiGles2NativeHandles to QRhi::create(). The - \l{QRhiGles2NativeHandles::context}{context} must be set to a non-null - value. + \c{QRhiGles2NativeHandles::context} must be set to a non-null value then. An alternative approach is to create a QOpenGLContext that \l{QOpenGLContext::setShareContext()}{shares resources} with the other @@ -131,12 +108,50 @@ QT_BEGIN_NAMESPACE */ /*! + \variable QRhiGles2InitParams::format + + The QSurfaceFormat, initialized to QSurfaceFormat::defaultFormat() by default. +*/ + +/*! + \variable QRhiGles2InitParams::fallbackSurface + + A QSurface compatible with \l format. Typically a QOffscreenSurface. + Providing this is mandatory. Be aware of the threading implications: a + QOffscreenSurface, like QWindow, must only ever be created and destroyed on + the main (gui) thread, even if the QRhi is created and operates on another + thread. +*/ + +/*! + \variable QRhiGles2InitParams::window + + Optional, but setting it is recommended when targeting a QWindow with the + QRhi. +*/ + +/*! + \variable QRhiGles2InitParams::shareContext + + Optional, the QOpenGLContext to share resource with. QRhi creates its own + context, and setting this member to a valid QOpenGLContext leads to calling + \l{QOpenGLContext::setShareContext()}{setShareContext()} with it. +*/ + +/*! \class QRhiGles2NativeHandles - \internal \inmodule QtGui + \since 6.6 \brief Holds the OpenGL context used by the QRhi. + + \note This is a RHI API with limited compatibility guarantees, see \l QRhi + for details. */ +/*! + \variable QRhiGles2NativeHandles::context +*/ + #ifndef GL_BGRA #define GL_BGRA 0x80E1 #endif @@ -157,6 +172,10 @@ QT_BEGIN_NAMESPACE #define GL_R16 0x822A #endif +#ifndef GL_RG16 +#define GL_RG16 0x822C +#endif + #ifndef GL_RED #define GL_RED 0x1903 #endif @@ -197,6 +216,10 @@ QT_BEGIN_NAMESPACE #define GL_DEPTH_COMPONENT32F 0x8CAC #endif +#ifndef GL_UNSIGNED_INT_24_8 +#define GL_UNSIGNED_INT_24_8 0x84FA +#endif + #ifndef GL_STENCIL_INDEX #define GL_STENCIL_INDEX 0x1901 #endif @@ -222,7 +245,7 @@ QT_BEGIN_NAMESPACE #endif #ifndef GL_FRAMEBUFFER_SRGB -#define GL_FRAMEBUFFER_SRGB 0x8DB9 +#define GL_FRAMEBUFFER_SRGB 0x8DB9 #endif #ifndef GL_READ_FRAMEBUFFER @@ -273,16 +296,48 @@ QT_BEGIN_NAMESPACE #define GL_COMPUTE_SHADER 0x91B9 #endif -#ifndef GL_ALL_BARRIER_BITS -#define GL_ALL_BARRIER_BITS 0xFFFFFFFF +#ifndef GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT +#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001 +#endif + +#ifndef GL_ELEMENT_ARRAY_BARRIER_BIT +#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002 +#endif + +#ifndef GL_UNIFORM_BARRIER_BIT +#define GL_UNIFORM_BARRIER_BIT 0x00000004 +#endif + +#ifndef GL_BUFFER_UPDATE_BARRIER_BIT +#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200 +#endif + +#ifndef GL_SHADER_STORAGE_BARRIER_BIT +#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 +#endif + +#ifndef GL_TEXTURE_FETCH_BARRIER_BIT +#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008 #endif #ifndef GL_SHADER_IMAGE_ACCESS_BARRIER_BIT #define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 #endif -#ifndef GL_SHADER_STORAGE_BARRIER_BIT -#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 +#ifndef GL_PIXEL_BUFFER_BARRIER_BIT +#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080 +#endif + +#ifndef GL_TEXTURE_UPDATE_BARRIER_BIT +#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100 +#endif + +#ifndef GL_FRAMEBUFFER_BARRIER_BIT +#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400 +#endif + +#ifndef GL_ALL_BARRIER_BITS +#define GL_ALL_BARRIER_BITS 0xFFFFFFFF #endif #ifndef GL_VERTEX_PROGRAM_POINT_SIZE @@ -297,10 +352,22 @@ QT_BEGIN_NAMESPACE #define GL_MAP_READ_BIT 0x0001 #endif +#ifndef GL_MAP_WRITE_BIT +#define GL_MAP_WRITE_BIT 0x0002 +#endif + #ifndef GL_TEXTURE_2D_MULTISAMPLE #define GL_TEXTURE_2D_MULTISAMPLE 0x9100 #endif +#ifndef GL_TEXTURE_2D_MULTISAMPLE_ARRAY +#define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 +#endif + +#ifndef GL_TEXTURE_EXTERNAL_OES +#define GL_TEXTURE_EXTERNAL_OES 0x8D65 +#endif + #ifndef GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS #define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB #endif @@ -313,6 +380,154 @@ QT_BEGIN_NAMESPACE #define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF #endif +#ifndef GL_TEXTURE_CUBE_MAP_SEAMLESS +#define GL_TEXTURE_CUBE_MAP_SEAMLESS 0x884F +#endif + +#ifndef GL_CONTEXT_LOST +#define GL_CONTEXT_LOST 0x0507 +#endif + +#ifndef GL_PROGRAM_BINARY_LENGTH +#define GL_PROGRAM_BINARY_LENGTH 0x8741 +#endif + +#ifndef GL_NUM_PROGRAM_BINARY_FORMATS +#define GL_NUM_PROGRAM_BINARY_FORMATS 0x87FE +#endif + +#ifndef GL_UNPACK_ROW_LENGTH +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#endif + +#ifndef GL_TEXTURE_3D +#define GL_TEXTURE_3D 0x806F +#endif + +#ifndef GL_TEXTURE_WRAP_R +#define GL_TEXTURE_WRAP_R 0x8072 +#endif + +#ifndef GL_TEXTURE_RECTANGLE +#define GL_TEXTURE_RECTANGLE 0x84F5 +#endif + +#ifndef GL_TEXTURE_2D_ARRAY +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#endif + +#ifndef GL_MAX_ARRAY_TEXTURE_LAYERS +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#endif + +#ifndef GL_MAX_VERTEX_UNIFORM_COMPONENTS +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#endif + +#ifndef GL_MAX_FRAGMENT_UNIFORM_COMPONENTS +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#endif + +#ifndef GL_MAX_VERTEX_UNIFORM_VECTORS +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#endif + +#ifndef GL_MAX_FRAGMENT_UNIFORM_VECTORS +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#endif + +#ifndef GL_RGB10_A2 +#define GL_RGB10_A2 0x8059 +#endif + +#ifndef GL_UNSIGNED_INT_2_10_10_10_REV +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#endif + +#ifndef GL_MAX_VARYING_COMPONENTS +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#endif + +#ifndef GL_MAX_VARYING_FLOATS +#define GL_MAX_VARYING_FLOATS 0x8B4B +#endif + +#ifndef GL_MAX_VARYING_VECTORS +#define GL_MAX_VARYING_VECTORS 0x8DFC +#endif + +#ifndef GL_TESS_CONTROL_SHADER +#define GL_TESS_CONTROL_SHADER 0x8E88 +#endif + +#ifndef GL_TESS_EVALUATION_SHADER +#define GL_TESS_EVALUATION_SHADER 0x8E87 +#endif + +#ifndef GL_PATCH_VERTICES +#define GL_PATCH_VERTICES 0x8E72 +#endif + +#ifndef GL_LINE +#define GL_LINE 0x1B01 +#endif + +#ifndef GL_FILL +#define GL_FILL 0x1B02 +#endif + +#ifndef GL_PATCHES +#define GL_PATCHES 0x000E +#endif + +#ifndef GL_GEOMETRY_SHADER +#define GL_GEOMETRY_SHADER 0x8DD9 +#endif + +#ifndef GL_BACK_LEFT +#define GL_BACK_LEFT 0x0402 +#endif + +#ifndef GL_BACK_RIGHT +#define GL_BACK_RIGHT 0x0403 +#endif + +#ifndef GL_TEXTURE_1D +# define GL_TEXTURE_1D 0x0DE0 +#endif + +#ifndef GL_TEXTURE_1D_ARRAY +# define GL_TEXTURE_1D_ARRAY 0x8C18 +#endif + +#ifndef GL_HALF_FLOAT +#define GL_HALF_FLOAT 0x140B +#endif + +#ifndef GL_MAX_VERTEX_OUTPUT_COMPONENTS +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#endif + +#ifndef GL_TIMESTAMP +#define GL_TIMESTAMP 0x8E28 +#endif + +#ifndef GL_QUERY_RESULT +#define GL_QUERY_RESULT 0x8866 +#endif + +#ifndef GL_QUERY_RESULT_AVAILABLE +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#endif + +#ifndef GL_BUFFER +#define GL_BUFFER 0x82E0 +#endif + +#ifndef GL_PROGRAM +#define GL_PROGRAM 0x82E2 +#endif + /*! Constructs a new QRhiGles2InitParams. @@ -324,28 +539,12 @@ QRhiGles2InitParams::QRhiGles2InitParams() } /*! - \return the QSurfaceFormat that will be set on the QOpenGLContext before - calling QOpenGLContext::create(). This format is based on \a format, but - may be adjusted. Applicable only when QRhi creates the context. - Applications are advised to set this format on their QWindow in order to - avoid potential BAD_MATCH failures. - */ -QSurfaceFormat QRhiGles2InitParams::adjustedFormat(const QSurfaceFormat &format) -{ - QSurfaceFormat fmt = format; - - if (fmt.depthBufferSize() == -1) - fmt.setDepthBufferSize(24); - if (fmt.stencilBufferSize() == -1) - fmt.setStencilBufferSize(8); - - return fmt; -} - -/*! \return a new QOffscreenSurface that can be used with a QRhi by passing it via a QRhiGles2InitParams. + When \a format is not specified, its default value is the global default + format settable via QSurfaceFormat::setDefaultFormat(). + \a format is adjusted as appropriate in order to avoid having problems afterwards due to an incompatible context and surface. @@ -357,7 +556,7 @@ QSurfaceFormat QRhiGles2InitParams::adjustedFormat(const QSurfaceFormat &format) */ QOffscreenSurface *QRhiGles2InitParams::newFallbackSurface(const QSurfaceFormat &format) { - QSurfaceFormat fmt = adjustedFormat(format); + QSurfaceFormat fmt = format; // To resolve all fields in the format as much as possible, create a context. // This may be heavy, but allows avoiding BAD_MATCH on some systems. @@ -378,9 +577,10 @@ QOffscreenSurface *QRhiGles2InitParams::newFallbackSurface(const QSurfaceFormat QRhiGles2::QRhiGles2(QRhiGles2InitParams *params, QRhiGles2NativeHandles *importDevice) : ofr(this) { - requestedFormat = QRhiGles2InitParams::adjustedFormat(params->format); + requestedFormat = params->format; fallbackSurface = params->fallbackSurface; maybeWindow = params->window; // may be null + maybeShareContext = params->shareContext; // may be null importedContext = importDevice != nullptr; if (importedContext) { @@ -392,21 +592,50 @@ QRhiGles2::QRhiGles2(QRhiGles2InitParams *params, QRhiGles2NativeHandles *import } } -bool QRhiGles2::ensureContext(QSurface *surface) const +static inline QSurface *currentSurfaceForCurrentContext(QOpenGLContext *ctx) { - bool nativeWindowGone = false; - if (surface && surface->surfaceClass() == QSurface::Window && !surface->surfaceHandle()) { - surface = fallbackSurface; - nativeWindowGone = true; - } + if (QOpenGLContext::currentContext() != ctx) + return nullptr; - if (!surface) - surface = fallbackSurface; + QSurface *currentSurface = ctx->surface(); + if (!currentSurface) + return nullptr; - if (needsMakeCurrent) - needsMakeCurrent = false; - else if (!nativeWindowGone && QOpenGLContext::currentContext() == ctx && (surface == fallbackSurface || ctx->surface() == surface)) + if (currentSurface->surfaceClass() == QSurface::Window && !currentSurface->surfaceHandle()) + return nullptr; + + return currentSurface; +} + +QSurface *QRhiGles2::evaluateFallbackSurface() const +{ + // With Apple's deprecated OpenGL support we need to minimize the usage of + // QOffscreenSurface since delicate problems can pop up with + // NSOpenGLContext and drawables. +#if defined(Q_OS_MACOS) + return maybeWindow && maybeWindow->handle() ? static_cast<QSurface *>(maybeWindow) : fallbackSurface; +#else + return fallbackSurface; +#endif +} + +bool QRhiGles2::ensureContext(QSurface *surface) const +{ + if (!surface) { + // null means any surface is good because not going to render + if (currentSurfaceForCurrentContext(ctx)) + return true; + // if the context is not already current with a valid surface, use our + // fallback surface, but platform specific quirks may apply + surface = evaluateFallbackSurface(); + } else if (surface->surfaceClass() == QSurface::Window && !surface->surfaceHandle()) { + // the window is not usable anymore (no native window underneath), behave as if offscreen + surface = evaluateFallbackSurface(); + } else if (!needsMakeCurrentDueToSwap && currentSurfaceForCurrentContext(ctx) == surface) { + // bail out if the makeCurrent is not necessary return true; + } + needsMakeCurrentDueToSwap = false; if (!ctx->makeCurrent(surface)) { if (ctx->isValid()) { @@ -421,17 +650,74 @@ bool QRhiGles2::ensureContext(QSurface *surface) const return true; } +static inline GLenum toGlCompressedTextureFormat(QRhiTexture::Format format, QRhiTexture::Flags flags) +{ + const bool srgb = flags.testFlag(QRhiTexture::sRGB); + switch (format) { + case QRhiTexture::BC1: + return srgb ? 0x8C4C : 0x83F0; + case QRhiTexture::BC2: + return srgb ? 0x8C4E : 0x83F2; + case QRhiTexture::BC3: + return srgb ? 0x8C4F : 0x83F3; + + case QRhiTexture::ETC2_RGB8: + return srgb ? 0x9275 : 0x9274; + case QRhiTexture::ETC2_RGB8A1: + return srgb ? 0x9277 : 0x9276; + case QRhiTexture::ETC2_RGBA8: + return srgb ? 0x9279 : 0x9278; + + case QRhiTexture::ASTC_4x4: + return srgb ? 0x93D0 : 0x93B0; + case QRhiTexture::ASTC_5x4: + return srgb ? 0x93D1 : 0x93B1; + case QRhiTexture::ASTC_5x5: + return srgb ? 0x93D2 : 0x93B2; + case QRhiTexture::ASTC_6x5: + return srgb ? 0x93D3 : 0x93B3; + case QRhiTexture::ASTC_6x6: + return srgb ? 0x93D4 : 0x93B4; + case QRhiTexture::ASTC_8x5: + return srgb ? 0x93D5 : 0x93B5; + case QRhiTexture::ASTC_8x6: + return srgb ? 0x93D6 : 0x93B6; + case QRhiTexture::ASTC_8x8: + return srgb ? 0x93D7 : 0x93B7; + case QRhiTexture::ASTC_10x5: + return srgb ? 0x93D8 : 0x93B8; + case QRhiTexture::ASTC_10x6: + return srgb ? 0x93D9 : 0x93B9; + case QRhiTexture::ASTC_10x8: + return srgb ? 0x93DA : 0x93BA; + case QRhiTexture::ASTC_10x10: + return srgb ? 0x93DB : 0x93BB; + case QRhiTexture::ASTC_12x10: + return srgb ? 0x93DC : 0x93BC; + case QRhiTexture::ASTC_12x12: + return srgb ? 0x93DD : 0x93BD; + + default: + return 0; // this is reachable, just return an invalid format + } +} + bool QRhiGles2::create(QRhi::Flags flags) { - Q_UNUSED(flags); Q_ASSERT(fallbackSurface); + rhiFlags = flags; if (!importedContext) { ctx = new QOpenGLContext; ctx->setFormat(requestedFormat); - if (QOpenGLContext *shareContext = qt_gl_global_share_context()) { + if (maybeShareContext) { + ctx->setShareContext(maybeShareContext); + ctx->setScreen(maybeShareContext->screen()); + } else if (QOpenGLContext *shareContext = qt_gl_global_share_context()) { ctx->setShareContext(shareContext); ctx->setScreen(shareContext->screen()); + } else if (maybeWindow) { + ctx->setScreen(maybeWindow->screen()); } if (!ctx->create()) { qWarning("QRhiGles2: Failed to create context"); @@ -446,6 +732,40 @@ bool QRhiGles2::create(QRhi::Flags flags) return false; f = static_cast<QOpenGLExtensions *>(ctx->extraFunctions()); + const QSurfaceFormat actualFormat = ctx->format(); + caps.gles = actualFormat.renderableType() == QSurfaceFormat::OpenGLES; + + if (!caps.gles) { + glPolygonMode = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLenum)>( + ctx->getProcAddress(QByteArrayLiteral("glPolygonMode"))); + + glTexImage1D = reinterpret_cast<void(QOPENGLF_APIENTRYP)( + GLenum, GLint, GLint, GLsizei, GLint, GLenum, GLenum, const void *)>( + ctx->getProcAddress(QByteArrayLiteral("glTexImage1D"))); + + glTexStorage1D = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLint, GLenum, GLsizei)>( + ctx->getProcAddress(QByteArrayLiteral("glTexStorage1D"))); + + glTexSubImage1D = reinterpret_cast<void(QOPENGLF_APIENTRYP)( + GLenum, GLint, GLint, GLsizei, GLenum, GLenum, const GLvoid *)>( + ctx->getProcAddress(QByteArrayLiteral("glTexSubImage1D"))); + + glCopyTexSubImage1D = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLint, GLint, GLint, + GLint, GLsizei)>( + ctx->getProcAddress(QByteArrayLiteral("glCopyTexSubImage1D"))); + + glCompressedTexImage1D = reinterpret_cast<void(QOPENGLF_APIENTRYP)( + GLenum, GLint, GLenum, GLsizei, GLint, GLsizei, const GLvoid *)>( + ctx->getProcAddress(QByteArrayLiteral("glCompressedTexImage1D"))); + + glCompressedTexSubImage1D = reinterpret_cast<void(QOPENGLF_APIENTRYP)( + GLenum, GLint, GLint, GLsizei, GLenum, GLsizei, const GLvoid *)>( + ctx->getProcAddress(QByteArrayLiteral("glCompressedTexSubImage1D"))); + + glFramebufferTexture1D = + reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLenum, GLenum, GLuint, GLint)>( + ctx->getProcAddress(QByteArrayLiteral("glFramebufferTexture1D"))); + } const char *vendor = reinterpret_cast<const char *>(f->glGetString(GL_VENDOR)); const char *renderer = reinterpret_cast<const char *>(f->glGetString(GL_RENDERER)); @@ -453,26 +773,89 @@ bool QRhiGles2::create(QRhi::Flags flags) if (vendor && renderer && version) qCDebug(QRHI_LOG_INFO, "OpenGL VENDOR: %s RENDERER: %s VERSION: %s", vendor, renderer, version); - const QSurfaceFormat actualFormat = ctx->format(); + if (vendor) { + driverInfoStruct.deviceName += QByteArray(vendor); + driverInfoStruct.deviceName += ' '; + } + if (renderer) { + driverInfoStruct.deviceName += QByteArray(renderer); + driverInfoStruct.deviceName += ' '; + } + if (version) + driverInfoStruct.deviceName += QByteArray(version); caps.ctxMajor = actualFormat.majorVersion(); caps.ctxMinor = actualFormat.minorVersion(); GLint n = 0; f->glGetIntegerv(GL_NUM_COMPRESSED_TEXTURE_FORMATS, &n); - supportedCompressedFormats.resize(n); - if (n > 0) - f->glGetIntegerv(GL_COMPRESSED_TEXTURE_FORMATS, supportedCompressedFormats.data()); + if (n > 0) { + QVarLengthArray<GLint, 16> compressedTextureFormats(n); + f->glGetIntegerv(GL_COMPRESSED_TEXTURE_FORMATS, compressedTextureFormats.data()); + for (GLint format : compressedTextureFormats) + supportedCompressedFormats.insert(format); + + } + // The above looks nice, if only it worked always. With GLES the list we + // query is likely the full list of compressed formats (mostly anything + // that can be decoded). With OpenGL however the list is not required to + // include all formats due to the way the spec is worded. For instance, we + // cannot rely on ASTC formats being present in the list on non-ES. Some + // drivers do include them (Intel, NVIDIA), some don't (Mesa). On the other + // hand, relying on extension strings only is not ok: for example, Intel + // reports GL_KHR_texture_compression_astc_ldr whereas NVIDIA doesn't. So + // the only reasonable thing to do is to query the list always and then see + // if there is something we can add - if not already in there. + std::array<QRhiTexture::Flags, 2> textureVariantFlags; + textureVariantFlags[0] = {}; + textureVariantFlags[1] = QRhiTexture::sRGB; + if (f->hasOpenGLExtension(QOpenGLExtensions::DDSTextureCompression)) { + for (QRhiTexture::Flags f : textureVariantFlags) { + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::BC1, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::BC2, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::BC3, f)); + } + } + if (f->hasOpenGLExtension(QOpenGLExtensions::ETC2TextureCompression)) { + for (QRhiTexture::Flags f : textureVariantFlags) { + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ETC2_RGB8, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ETC2_RGB8A1, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ETC2_RGBA8, f)); + } + } + if (f->hasOpenGLExtension(QOpenGLExtensions::ASTCTextureCompression)) { + for (QRhiTexture::Flags f : textureVariantFlags) { + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_4x4, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_5x4, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_5x5, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_6x5, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_6x6, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_8x5, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_8x6, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_8x8, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_10x5, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_10x8, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_10x10, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_12x10, f)); + supportedCompressedFormats.insert(toGlCompressedTextureFormat(QRhiTexture::ASTC_12x12, f)); + } + } f->glGetIntegerv(GL_MAX_TEXTURE_SIZE, &caps.maxTextureSize); - if (caps.ctxMajor >= 3 || actualFormat.renderableType() == QSurfaceFormat::OpenGL) { + if (!caps.gles || caps.ctxMajor >= 3) { + // non-ES or ES 3.0+ f->glGetIntegerv(GL_MAX_DRAW_BUFFERS, &caps.maxDrawBuffers); + caps.hasDrawBuffersFunc = true; f->glGetIntegerv(GL_MAX_SAMPLES, &caps.maxSamples); caps.maxSamples = qMax(1, caps.maxSamples); } else { + // ES 2.0 / WebGL 1 caps.maxDrawBuffers = 1; - caps.maxSamples = 1; + caps.hasDrawBuffersFunc = false; + // This does not mean MSAA is not supported, just that we cannot query + // the supported sample counts. Assume that 4x is always supported. + caps.maxSamples = 4; } caps.msaaRenderBuffer = f->hasOpenGLExtension(QOpenGLExtensions::FramebufferMultisample) @@ -481,20 +864,27 @@ bool QRhiGles2::create(QRhi::Flags flags) caps.npotTextureFull = f->hasOpenGLFeature(QOpenGLFunctions::NPOTTextures) && f->hasOpenGLFeature(QOpenGLFunctions::NPOTTextureRepeat); - caps.gles = actualFormat.renderableType() == QSurfaceFormat::OpenGLES; if (caps.gles) caps.fixedIndexPrimitiveRestart = caps.ctxMajor >= 3; // ES 3.0 else caps.fixedIndexPrimitiveRestart = caps.ctxMajor > 4 || (caps.ctxMajor == 4 && caps.ctxMinor >= 3); // 4.3 - if (caps.fixedIndexPrimitiveRestart) + if (caps.fixedIndexPrimitiveRestart) { +#ifdef Q_OS_WASM + // WebGL 2 behaves as if GL_PRIMITIVE_RESTART_FIXED_INDEX was always + // enabled (i.e. matching D3D/Metal), and the value cannot be passed to + // glEnable, so skip the call. +#else f->glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX); +#endif + } caps.bgraExternalFormat = f->hasOpenGLExtension(QOpenGLExtensions::BGRATextureFormat); caps.bgraInternalFormat = caps.bgraExternalFormat && caps.gles; caps.r8Format = f->hasOpenGLFeature(QOpenGLFunctions::TextureRGFormats); caps.r16Format = f->hasOpenGLExtension(QOpenGLExtensions::Sized16Formats); caps.floatFormats = caps.ctxMajor >= 3; // 3.0 or ES 3.0 + caps.rgb10Formats = caps.ctxMajor >= 3; // 3.0 or ES 3.0 caps.depthTexture = caps.ctxMajor >= 3; // 3.0 or ES 3.0 caps.packedDepthStencil = f->hasOpenGLExtension(QOpenGLExtensions::PackedDepthStencil); #ifdef Q_OS_WASM @@ -502,7 +892,13 @@ bool QRhiGles2::create(QRhi::Flags flags) #else caps.needsDepthStencilCombinedAttach = false; #endif - caps.srgbCapableDefaultFramebuffer = f->hasOpenGLExtension(QOpenGLExtensions::SRGBFrameBuffer); + + // QOpenGLExtensions::SRGBFrameBuffer is not useful here. We need to know if + // controlling the sRGB-on-shader-write state is supported, not that if the + // default framebuffer is sRGB-capable. And there are two different + // extensions for desktop and ES. + caps.srgbWriteControl = ctx->hasExtension("GL_EXT_framebuffer_sRGB") || ctx->hasExtension("GL_EXT_sRGB_write_control"); + caps.coreProfile = actualFormat.profile() == QSurfaceFormat::CoreProfile; if (caps.gles) @@ -553,21 +949,153 @@ bool QRhiGles2::create(QRhi::Flags flags) caps.nonBaseLevelFramebufferTexture = true; caps.texelFetch = caps.ctxMajor >= 3; // 3.0 or ES 3.0 - caps.uintAttributes = caps.ctxMajor >= 3; // 3.0 or ES 3.0 + caps.intAttributes = caps.ctxMajor >= 3; // 3.0 or ES 3.0 caps.screenSpaceDerivatives = f->hasOpenGLExtension(QOpenGLExtensions::StandardDerivatives); - // TO DO: We could also check for ARB_texture_multisample but it is not - // currently in QOpenGLExtensions - // 3.0 or ES 3.1 - caps.multisampledTexture = caps.gles - ? (caps.ctxMajor > 3 || (caps.ctxMajor >= 3 && caps.ctxMinor >= 1)) - : (caps.ctxMajor >= 3); + if (caps.gles) + caps.multisampledTexture = caps.ctxMajor > 3 || (caps.ctxMajor == 3 && caps.ctxMinor >= 1); // ES 3.1 + else + caps.multisampledTexture = caps.ctxMajor >= 3; // 3.0 + + // Program binary support: only the core stuff, do not bother with the old + // extensions like GL_OES_get_program_binary + if (caps.gles) + caps.programBinary = caps.ctxMajor >= 3; // ES 3.0 + else + caps.programBinary = caps.ctxMajor > 4 || (caps.ctxMajor == 4 && caps.ctxMinor >= 1); // 4.1 + + if (caps.programBinary) { + GLint fmtCount = 0; + f->glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &fmtCount); + if (fmtCount < 1) + caps.programBinary = false; + } + + caps.texture3D = caps.ctxMajor >= 3; // 3.0 + + if (caps.gles) + caps.texture1D = false; // ES + else + caps.texture1D = glTexImage1D && (caps.ctxMajor >= 2); // 2.0 + + if (caps.gles) + caps.tessellation = caps.ctxMajor > 3 || (caps.ctxMajor == 3 && caps.ctxMinor >= 2); // ES 3.2 + else + caps.tessellation = caps.ctxMajor >= 4; // 4.0 + + if (caps.gles) + caps.geometryShader = caps.ctxMajor > 3 || (caps.ctxMajor == 3 && caps.ctxMinor >= 2); // ES 3.2 + else + caps.geometryShader = caps.ctxMajor > 3 || (caps.ctxMajor == 3 && caps.ctxMinor >= 2); // 3.2 + + if (caps.ctxMajor >= 3) { // 3.0 or ES 3.0 + GLint maxArraySize = 0; + f->glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxArraySize); + caps.maxTextureArraySize = maxArraySize; + } else { + caps.maxTextureArraySize = 0; + } + + // The ES 2.0 spec only has MAX_xxxx_VECTORS. ES 3.0 and up has both + // *VECTORS and *COMPONENTS. OpenGL 2.0-4.0 only has MAX_xxxx_COMPONENTS. + // 4.1 and above has both. What a mess. + if (caps.gles) { + GLint maxVertexUniformVectors = 0; + f->glGetIntegerv(GL_MAX_VERTEX_UNIFORM_VECTORS, &maxVertexUniformVectors); + GLint maxFragmentUniformVectors = 0; + f->glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_VECTORS, &maxFragmentUniformVectors); + caps.maxUniformVectors = qMin(maxVertexUniformVectors, maxFragmentUniformVectors); + } else { + GLint maxVertexUniformComponents = 0; + f->glGetIntegerv(GL_MAX_VERTEX_UNIFORM_COMPONENTS, &maxVertexUniformComponents); + GLint maxFragmentUniformComponents = 0; + f->glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &maxFragmentUniformComponents); + caps.maxUniformVectors = qMin(maxVertexUniformComponents, maxFragmentUniformComponents) / 4; + } + + f->glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &caps.maxVertexInputs); + + if (caps.gles) { + f->glGetIntegerv(GL_MAX_VARYING_VECTORS, &caps.maxVertexOutputs); + } else if (caps.ctxMajor >= 3) { + GLint components = 0; + f->glGetIntegerv(caps.coreProfile ? GL_MAX_VERTEX_OUTPUT_COMPONENTS : GL_MAX_VARYING_COMPONENTS, &components); + caps.maxVertexOutputs = components / 4; + } else { + // OpenGL before 3.0 only has this, and not the same as + // MAX_VARYING_COMPONENTS strictly speaking, but will do. + GLint components = 0; + f->glGetIntegerv(GL_MAX_VARYING_FLOATS, &components); + if (components > 0) + caps.maxVertexOutputs = components / 4; + } if (!caps.gles) { f->glEnable(GL_VERTEX_PROGRAM_POINT_SIZE); - f->glEnable(GL_POINT_SPRITE); + if (!caps.coreProfile) + f->glEnable(GL_POINT_SPRITE); } // else (with gles) these are always on + // Match D3D and others when it comes to seamless cubemap filtering. + // ES 3.0+ has this always enabled. (hopefully) + // ES 2.0 and GL < 3.2 will not have it. + if (!caps.gles && (caps.ctxMajor > 3 || (caps.ctxMajor == 3 && caps.ctxMinor >= 2))) + f->glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + + caps.halfAttributes = f->hasOpenGLExtension(QOpenGLExtensions::HalfFloatVertex); + + // We always require GL_OVR_multiview2 for symmetry with other backends. + caps.multiView = f->hasOpenGLExtension(QOpenGLExtensions::MultiView) + && f->hasOpenGLExtension(QOpenGLExtensions::MultiViewExtended); + if (caps.multiView) { + glFramebufferTextureMultiviewOVR = + reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLenum, GLuint, GLint, GLint, GLsizei)>( + ctx->getProcAddress(QByteArrayLiteral("glFramebufferTextureMultiviewOVR"))); + } + + // Only do timestamp queries on OpenGL 3.3+. + caps.timestamps = !caps.gles && (caps.ctxMajor > 3 || (caps.ctxMajor == 3 && caps.ctxMinor >= 3)); + if (caps.timestamps) { + glQueryCounter = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLuint, GLenum)>( + ctx->getProcAddress(QByteArrayLiteral("glQueryCounter"))); + glGetQueryObjectui64v = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLuint, GLenum, quint64 *)>( + ctx->getProcAddress(QByteArrayLiteral("glGetQueryObjectui64v"))); + if (!glQueryCounter || !glGetQueryObjectui64v) + caps.timestamps = false; + } + + // glObjectLabel is available on OpenGL ES 3.2+ and OpenGL 4.3+ + if (caps.gles) + caps.objectLabel = caps.ctxMajor > 3 || (caps.ctxMajor == 3 && caps.ctxMinor >= 2); + else + caps.objectLabel = caps.ctxMajor > 4 || (caps.ctxMajor == 4 && caps.ctxMinor >= 3); + if (caps.objectLabel) { + glObjectLabel = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLuint, GLsizei, const GLchar *)>( + ctx->getProcAddress(QByteArrayLiteral("glObjectLabel"))); + } + + if (caps.gles) { + // This is the third way to get multisample rendering with GLES. (1. is + // multisample render buffer -> resolve to texture; 2. is multisample + // texture with GLES 3.1; 3. is this, avoiding the explicit multisample + // buffer and should be more efficient with tiled architectures. + // Interesting also because 2. does not seem to work in practice on + // devices such as the Quest 3) + caps.glesMultisampleRenderToTexture = ctx->hasExtension("GL_EXT_multisampled_render_to_texture"); + if (caps.glesMultisampleRenderToTexture) { + glFramebufferTexture2DMultisampleEXT = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLenum, GLenum, GLuint, GLint, GLsizei)>( + ctx->getProcAddress(QByteArrayLiteral("glFramebufferTexture2DMultisampleEXT"))); + } + caps.glesMultiviewMultisampleRenderToTexture = ctx->hasExtension("GL_OVR_multiview_multisampled_render_to_texture"); + if (caps.glesMultiviewMultisampleRenderToTexture) { + glFramebufferTextureMultisampleMultiviewOVR = reinterpret_cast<void(QOPENGLF_APIENTRYP)(GLenum, GLenum, GLuint, GLint, GLsizei, GLint, GLsizei)>( + ctx->getProcAddress(QByteArrayLiteral("glFramebufferTextureMultisampleMultiviewOVR"))); + } + } else { + caps.glesMultisampleRenderToTexture = false; + caps.glesMultiviewMultisampleRenderToTexture = false; + } + nativeHandlesStruct.context = ctx; contextLost = false; @@ -583,6 +1111,11 @@ void QRhiGles2::destroy() ensureContext(); executeDeferredReleases(); + if (ofr.tsQueries[0]) { + f->glDeleteQueries(2, ofr.tsQueries); + ofr.tsQueries[0] = ofr.tsQueries[1] = 0; + } + if (vao) { f->glDeleteVertexArrays(1, &vao); vao = 0; @@ -602,7 +1135,7 @@ void QRhiGles2::destroy() void QRhiGles2::executeDeferredReleases() { - for (int i = releaseQueue.count() - 1; i >= 0; --i) { + for (int i = releaseQueue.size() - 1; i >= 0; --i) { const QRhiGles2::DeferredReleaseEntry &e(releaseQueue[i]); switch (e.type) { case QRhiGles2::DeferredReleaseEntry::Buffer: @@ -620,6 +1153,7 @@ void QRhiGles2::executeDeferredReleases() break; case QRhiGles2::DeferredReleaseEntry::TextureRenderTarget: f->glDeleteFramebuffers(1, &e.textureRenderTarget.framebuffer); + f->glDeleteTextures(1, &e.textureRenderTarget.nonMsaaThrowawayDepthTexture); break; default: Q_UNREACHABLE(); @@ -639,23 +1173,12 @@ QList<int> QRhiGles2::supportedSampleCounts() const return supportedSampleCountList; } -int QRhiGles2::effectiveSampleCount(int sampleCount) const -{ - // Stay compatible with QSurfaceFormat and friends where samples == 0 means the same as 1. - const int s = qBound(1, sampleCount, 64); - if (!supportedSampleCounts().contains(s)) { - qWarning("Attempted to set unsupported sample count %d", sampleCount); - return 1; - } - return s; -} - QRhiSwapChain *QRhiGles2::createSwapChain() { return new QGles2SwapChain(this); } -QRhiBuffer *QRhiGles2::createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, int size) +QRhiBuffer *QRhiGles2::createBuffer(QRhiBuffer::Type type, QRhiBuffer::UsageFlags usage, quint32 size) { return new QGles2Buffer(this, type, usage, size); } @@ -687,58 +1210,6 @@ QMatrix4x4 QRhiGles2::clipSpaceCorrMatrix() const return QMatrix4x4(); // identity } -static inline GLenum toGlCompressedTextureFormat(QRhiTexture::Format format, QRhiTexture::Flags flags) -{ - const bool srgb = flags.testFlag(QRhiTexture::sRGB); - switch (format) { - case QRhiTexture::BC1: - return srgb ? 0x8C4C : 0x83F0; - case QRhiTexture::BC2: - return srgb ? 0x8C4E : 0x83F2; - case QRhiTexture::BC3: - return srgb ? 0x8C4F : 0x83F3; - - case QRhiTexture::ETC2_RGB8: - return srgb ? 0x9275 : 0x9274; - case QRhiTexture::ETC2_RGB8A1: - return srgb ? 0x9277 : 0x9276; - case QRhiTexture::ETC2_RGBA8: - return srgb ? 0x9279 : 0x9278; - - case QRhiTexture::ASTC_4x4: - return srgb ? 0x93D0 : 0x93B0; - case QRhiTexture::ASTC_5x4: - return srgb ? 0x93D1 : 0x93B1; - case QRhiTexture::ASTC_5x5: - return srgb ? 0x93D2 : 0x93B2; - case QRhiTexture::ASTC_6x5: - return srgb ? 0x93D3 : 0x93B3; - case QRhiTexture::ASTC_6x6: - return srgb ? 0x93D4 : 0x93B4; - case QRhiTexture::ASTC_8x5: - return srgb ? 0x93D5 : 0x93B5; - case QRhiTexture::ASTC_8x6: - return srgb ? 0x93D6 : 0x93B6; - case QRhiTexture::ASTC_8x8: - return srgb ? 0x93D7 : 0x93B7; - case QRhiTexture::ASTC_10x5: - return srgb ? 0x93D8 : 0x93B8; - case QRhiTexture::ASTC_10x6: - return srgb ? 0x93D9 : 0x93B9; - case QRhiTexture::ASTC_10x8: - return srgb ? 0x93DA : 0x93BA; - case QRhiTexture::ASTC_10x10: - return srgb ? 0x93DB : 0x93BB; - case QRhiTexture::ASTC_12x10: - return srgb ? 0x93DC : 0x93BC; - case QRhiTexture::ASTC_12x12: - return srgb ? 0x93DD : 0x93BD; - - default: - return 0; // this is reachable, just return an invalid format - } -} - static inline void toGlTextureFormat(QRhiTexture::Format format, const QRhiGles2::Caps &caps, GLenum *glintformat, GLenum *glsizedintformat, GLenum *glformat, GLenum *gltype) @@ -762,6 +1233,12 @@ static inline void toGlTextureFormat(QRhiTexture::Format format, const QRhiGles2 *glformat = GL_RED; *gltype = GL_UNSIGNED_SHORT; break; + case QRhiTexture::RG16: + *glintformat = GL_RG16; + *glsizedintformat = *glintformat; + *glformat = GL_RG; + *gltype = GL_UNSIGNED_SHORT; + break; case QRhiTexture::R8: *glintformat = GL_R8; *glsizedintformat = *glintformat; @@ -804,6 +1281,12 @@ static inline void toGlTextureFormat(QRhiTexture::Format format, const QRhiGles2 *glformat = GL_RED; *gltype = GL_FLOAT; break; + case QRhiTexture::RGB10A2: + *glintformat = GL_RGB10_A2; + *glsizedintformat = *glintformat; + *glformat = GL_RGBA; + *gltype = GL_UNSIGNED_INT_2_10_10_10_REV; + break; case QRhiTexture::D16: *glintformat = GL_DEPTH_COMPONENT16; *glsizedintformat = *glintformat; @@ -814,13 +1297,13 @@ static inline void toGlTextureFormat(QRhiTexture::Format format, const QRhiGles2 *glintformat = GL_DEPTH_COMPONENT24; *glsizedintformat = *glintformat; *glformat = GL_DEPTH_COMPONENT; - *gltype = GL_UNSIGNED_SHORT; + *gltype = GL_UNSIGNED_INT; break; case QRhiTexture::D24S8: *glintformat = GL_DEPTH24_STENCIL8; *glsizedintformat = *glintformat; *glformat = GL_DEPTH_STENCIL; - *gltype = GL_UNSIGNED_SHORT; + *gltype = GL_UNSIGNED_INT_24_8; break; case QRhiTexture::D32F: *glintformat = GL_DEPTH_COMPONENT32F; @@ -866,6 +1349,9 @@ bool QRhiGles2::isTextureFormatSupported(QRhiTexture::Format format, QRhiTexture case QRhiTexture::R16: return caps.r16Format; + case QRhiTexture::RG16: + return caps.r16Format; + case QRhiTexture::RGBA16F: case QRhiTexture::RGBA32F: return caps.floatFormats; @@ -874,6 +1360,9 @@ bool QRhiGles2::isTextureFormatSupported(QRhiTexture::Format format, QRhiTexture case QRhiTexture::R32F: return caps.floatFormats; + case QRhiTexture::RGB10A2: + return caps.rgb10Formats; + default: break; } @@ -891,7 +1380,7 @@ bool QRhiGles2::isFeatureSupported(QRhi::Feature feature) const case QRhi::DebugMarkers: return false; case QRhi::Timestamps: - return false; + return caps.timestamps; case QRhi::Instancing: return caps.instancing; case QRhi::CustomInstanceStepRate: @@ -928,15 +1417,50 @@ bool QRhiGles2::isFeatureSupported(QRhi::Feature feature) const return caps.texelFetch; case QRhi::RenderToNonBaseMipLevel: return caps.nonBaseLevelFramebufferTexture; - case QRhi::UIntAttributes: - return caps.uintAttributes; + case QRhi::IntAttributes: + return caps.intAttributes; case QRhi::ScreenSpaceDerivatives: return caps.screenSpaceDerivatives; case QRhi::ReadBackAnyTextureFormat: return false; - default: - Q_UNREACHABLE(); + case QRhi::PipelineCacheDataLoadSave: + return caps.programBinary; + case QRhi::ImageDataStride: + return !caps.gles || caps.ctxMajor >= 3; + case QRhi::RenderBufferImport: + return true; + case QRhi::ThreeDimensionalTextures: + return caps.texture3D; + case QRhi::RenderTo3DTextureSlice: + return caps.texture3D; + case QRhi::TextureArrays: + return caps.maxTextureArraySize > 0; + case QRhi::Tessellation: + return caps.tessellation; + case QRhi::GeometryShader: + return caps.geometryShader; + case QRhi::TextureArrayRange: + return false; + case QRhi::NonFillPolygonMode: + return !caps.gles; + case QRhi::OneDimensionalTextures: + return caps.texture1D; + case QRhi::OneDimensionalTextureMipmaps: + return caps.texture1D; + case QRhi::HalfAttributes: + return caps.halfAttributes; + case QRhi::RenderToOneDimensionalTexture: + return caps.texture1D; + case QRhi::ThreeDimensionalTextureMipmaps: + return caps.texture3D; + case QRhi::MultiView: + return caps.multiView && caps.maxTextureArraySize > 0; + case QRhi::TextureViewFormat: return false; + case QRhi::ResolveDepthStencil: + return true; + default: + Q_UNREACHABLE_RETURN(false); } } @@ -965,9 +1489,16 @@ int QRhiGles2::resourceLimit(QRhi::ResourceLimit limit) const return caps.maxThreadGroupsY; case QRhi::MaxThreadGroupZ: return caps.maxThreadGroupsZ; + case QRhi::TextureArraySizeMax: + return 2048; + case QRhi::MaxUniformBufferRange: + return int(qMin<qint64>(INT_MAX, caps.maxUniformVectors * qint64(16))); + case QRhi::MaxVertexInputs: + return caps.maxVertexInputs; + case QRhi::MaxVertexOutputs: + return caps.maxVertexOutputs; default: - Q_UNREACHABLE(); - return 0; + Q_UNREACHABLE_RETURN(0); } } @@ -976,9 +1507,16 @@ const QRhiNativeHandles *QRhiGles2::nativeHandles() return &nativeHandlesStruct; } -void QRhiGles2::sendVMemStatsToProfiler() +QRhiDriverInfo QRhiGles2::driverInfo() const { - // nothing to do here + return driverInfoStruct; +} + +QRhiStats QRhiGles2::statistics() +{ + QRhiStats result; + result.totalPipelineCreationTime = totalPipelineCreationTime(); + return result; } bool QRhiGles2::makeThreadLocalNativeContextCurrent() @@ -998,6 +1536,8 @@ void QRhiGles2::releaseCachedResources() f->glDeleteShader(shader); m_shaderCache.clear(); + + m_pipelineCache.clear(); } bool QRhiGles2::isDeviceLost() const @@ -1005,6 +1545,137 @@ bool QRhiGles2::isDeviceLost() const return contextLost; } +struct QGles2PipelineCacheDataHeader +{ + quint32 rhiId; + quint32 arch; + quint32 programBinaryCount; + quint32 dataSize; + char driver[240]; +}; + +QByteArray QRhiGles2::pipelineCacheData() +{ + Q_STATIC_ASSERT(sizeof(QGles2PipelineCacheDataHeader) == 256); + + if (m_pipelineCache.isEmpty()) + return QByteArray(); + + QGles2PipelineCacheDataHeader header; + memset(&header, 0, sizeof(header)); + header.rhiId = pipelineCacheRhiId(); + header.arch = quint32(sizeof(void*)); + header.programBinaryCount = m_pipelineCache.size(); + const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.size())); + if (driverStrLen) + memcpy(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen); + header.driver[driverStrLen] = '\0'; + + const size_t dataOffset = sizeof(header); + size_t dataSize = 0; + for (auto it = m_pipelineCache.cbegin(), end = m_pipelineCache.cend(); it != end; ++it) { + dataSize += sizeof(quint32) + it.key().size() + + sizeof(quint32) + it->data.size() + + sizeof(quint32); + } + + QByteArray buf(dataOffset + dataSize, Qt::Uninitialized); + char *p = buf.data() + dataOffset; + for (auto it = m_pipelineCache.cbegin(), end = m_pipelineCache.cend(); it != end; ++it) { + const QByteArray key = it.key(); + const QByteArray data = it->data; + const quint32 format = it->format; + + quint32 i = key.size(); + memcpy(p, &i, 4); + p += 4; + memcpy(p, key.constData(), key.size()); + p += key.size(); + + i = data.size(); + memcpy(p, &i, 4); + p += 4; + memcpy(p, data.constData(), data.size()); + p += data.size(); + + memcpy(p, &format, 4); + p += 4; + } + Q_ASSERT(p == buf.data() + dataOffset + dataSize); + + header.dataSize = quint32(dataSize); + memcpy(buf.data(), &header, sizeof(header)); + + return buf; +} + +void QRhiGles2::setPipelineCacheData(const QByteArray &data) +{ + if (data.isEmpty()) + return; + + const size_t headerSize = sizeof(QGles2PipelineCacheDataHeader); + if (data.size() < qsizetype(headerSize)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (header incomplete)"); + return; + } + const size_t dataOffset = headerSize; + QGles2PipelineCacheDataHeader header; + memcpy(&header, data.constData(), headerSize); + + const quint32 rhiId = pipelineCacheRhiId(); + if (header.rhiId != rhiId) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: The data is for a different QRhi version or backend (%u, %u)", + rhiId, header.rhiId); + return; + } + const quint32 arch = quint32(sizeof(void*)); + if (header.arch != arch) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Architecture does not match (%u, %u)", + arch, header.arch); + return; + } + if (header.programBinaryCount == 0) + return; + + const size_t driverStrLen = qMin(sizeof(header.driver) - 1, size_t(driverInfoStruct.deviceName.size())); + if (strncmp(header.driver, driverInfoStruct.deviceName.constData(), driverStrLen)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: OpenGL vendor/renderer/version does not match"); + return; + } + + if (data.size() < qsizetype(dataOffset + header.dataSize)) { + qCDebug(QRHI_LOG_INFO, "setPipelineCacheData: Invalid blob size (data incomplete)"); + return; + } + + m_pipelineCache.clear(); + + const char *p = data.constData() + dataOffset; + for (quint32 i = 0; i < header.programBinaryCount; ++i) { + quint32 len = 0; + memcpy(&len, p, 4); + p += 4; + QByteArray key(len, Qt::Uninitialized); + memcpy(key.data(), p, len); + p += len; + + memcpy(&len, p, 4); + p += 4; + QByteArray data(len, Qt::Uninitialized); + memcpy(data.data(), p, len); + p += len; + + quint32 format; + memcpy(&format, p, 4); + p += 4; + + m_pipelineCache.insert(key, { format, data }); + } + + qCDebug(QRHI_LOG_INFO, "Seeded pipeline cache with %d program binaries", int(m_pipelineCache.size())); +} + QRhiRenderBuffer *QRhiGles2::createRenderBuffer(QRhiRenderBuffer::Type type, const QSize &pixelSize, int sampleCount, QRhiRenderBuffer::Flags flags, QRhiTexture::Format backingFormatHint) @@ -1012,10 +1683,11 @@ QRhiRenderBuffer *QRhiGles2::createRenderBuffer(QRhiRenderBuffer::Type type, con return new QGles2RenderBuffer(this, type, pixelSize, sampleCount, flags, backingFormatHint); } -QRhiTexture *QRhiGles2::createTexture(QRhiTexture::Format format, const QSize &pixelSize, +QRhiTexture *QRhiGles2::createTexture(QRhiTexture::Format format, + const QSize &pixelSize, int depth, int arraySize, int sampleCount, QRhiTexture::Flags flags) { - return new QGles2Texture(this, format, pixelSize, sampleCount, flags); + return new QGles2Texture(this, format, pixelSize, depth, arraySize, sampleCount, flags); } QRhiSampler *QRhiGles2::createSampler(QRhiSampler::Filter magFilter, QRhiSampler::Filter minFilter, @@ -1058,10 +1730,9 @@ void QRhiGles2::setGraphicsPipeline(QRhiCommandBuffer *cb, QRhiGraphicsPipeline cbD->currentComputePipeline = nullptr; cbD->currentPipelineGeneration = psD->generation; - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BindGraphicsPipeline; cmd.args.bindGraphicsPipeline.ps = ps; - cbD->commands.append(cmd); } } @@ -1081,66 +1752,86 @@ void QRhiGles2::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind srb = compPsD->m_shaderResourceBindings; } - QRhiPassResourceTracker &passResTracker(cbD->passResTrackers[cbD->currentPassResTrackerIndex]); QGles2ShaderResourceBindings *srbD = QRHI_RES(QGles2ShaderResourceBindings, srb); - bool hasDynamicOffsetInSrb = false; - for (int i = 0, ie = srbD->m_bindings.count(); i != ie; ++i) { - const QRhiShaderResourceBinding::Data *b = srbD->m_bindings.at(i).data(); - switch (b->type) { - case QRhiShaderResourceBinding::UniformBuffer: - // no BufUniformRead / AccessUniform because no real uniform buffers are used - if (b->u.ubuf.hasDynamicOffset) - hasDynamicOffsetInSrb = true; - break; - case QRhiShaderResourceBinding::SampledTexture: - for (int elem = 0; elem < b->u.stex.count; ++elem) { - trackedRegisterTexture(&passResTracker, - QRHI_RES(QGles2Texture, b->u.stex.texSamplers[elem].tex), - QRhiPassResourceTracker::TexSample, + if (cbD->passNeedsResourceTracking) { + QRhiPassResourceTracker &passResTracker(cbD->passResTrackers[cbD->currentPassResTrackerIndex]); + for (int i = 0, ie = srbD->m_bindings.size(); i != ie; ++i) { + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->m_bindings.at(i)); + switch (b->type) { + case QRhiShaderResourceBinding::UniformBuffer: + // no BufUniformRead / AccessUniform because no real uniform buffers are used + break; + case QRhiShaderResourceBinding::SampledTexture: + case QRhiShaderResourceBinding::Texture: + for (int elem = 0; elem < b->u.stex.count; ++elem) { + trackedRegisterTexture(&passResTracker, + QRHI_RES(QGles2Texture, b->u.stex.texSamplers[elem].tex), + QRhiPassResourceTracker::TexSample, + QRhiPassResourceTracker::toPassTrackerTextureStage(b->stage)); + } + break; + case QRhiShaderResourceBinding::ImageLoad: + case QRhiShaderResourceBinding::ImageStore: + case QRhiShaderResourceBinding::ImageLoadStore: + { + QGles2Texture *texD = QRHI_RES(QGles2Texture, b->u.simage.tex); + QRhiPassResourceTracker::TextureAccess access; + if (b->type == QRhiShaderResourceBinding::ImageLoad) + access = QRhiPassResourceTracker::TexStorageLoad; + else if (b->type == QRhiShaderResourceBinding::ImageStore) + access = QRhiPassResourceTracker::TexStorageStore; + else + access = QRhiPassResourceTracker::TexStorageLoadStore; + trackedRegisterTexture(&passResTracker, texD, access, QRhiPassResourceTracker::toPassTrackerTextureStage(b->stage)); } - break; - case QRhiShaderResourceBinding::ImageLoad: - case QRhiShaderResourceBinding::ImageStore: - case QRhiShaderResourceBinding::ImageLoadStore: - { - QGles2Texture *texD = QRHI_RES(QGles2Texture, b->u.simage.tex); - QRhiPassResourceTracker::TextureAccess access; - if (b->type == QRhiShaderResourceBinding::ImageLoad) - access = QRhiPassResourceTracker::TexStorageLoad; - else if (b->type == QRhiShaderResourceBinding::ImageStore) - access = QRhiPassResourceTracker::TexStorageStore; - else - access = QRhiPassResourceTracker::TexStorageLoadStore; - trackedRegisterTexture(&passResTracker, texD, access, - QRhiPassResourceTracker::toPassTrackerTextureStage(b->stage)); - } - break; - case QRhiShaderResourceBinding::BufferLoad: - case QRhiShaderResourceBinding::BufferStore: - case QRhiShaderResourceBinding::BufferLoadStore: - { - QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, b->u.sbuf.buf); - QRhiPassResourceTracker::BufferAccess access; - if (b->type == QRhiShaderResourceBinding::BufferLoad) - access = QRhiPassResourceTracker::BufStorageLoad; - else if (b->type == QRhiShaderResourceBinding::BufferStore) - access = QRhiPassResourceTracker::BufStorageStore; - else - access = QRhiPassResourceTracker::BufStorageLoadStore; - trackedRegisterBuffer(&passResTracker, bufD, access, - QRhiPassResourceTracker::toPassTrackerBufferStage(b->stage)); - } - break; - default: - break; + break; + case QRhiShaderResourceBinding::BufferLoad: + case QRhiShaderResourceBinding::BufferStore: + case QRhiShaderResourceBinding::BufferLoadStore: + { + QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, b->u.sbuf.buf); + QRhiPassResourceTracker::BufferAccess access; + if (b->type == QRhiShaderResourceBinding::BufferLoad) + access = QRhiPassResourceTracker::BufStorageLoad; + else if (b->type == QRhiShaderResourceBinding::BufferStore) + access = QRhiPassResourceTracker::BufStorageStore; + else + access = QRhiPassResourceTracker::BufStorageLoadStore; + trackedRegisterBuffer(&passResTracker, bufD, access, + QRhiPassResourceTracker::toPassTrackerBufferStage(b->stage)); + } + break; + default: + break; + } } } - const bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb); - const bool srbRebuilt = cbD->currentSrbGeneration != srbD->generation; + bool srbChanged = gfxPsD ? (cbD->currentGraphicsSrb != srb) : (cbD->currentComputeSrb != srb); + + // The Command::BindShaderResources command generated below is what will + // cause uniforms to be set (glUniformNxx). This needs some special + // handling here in this backend without real uniform buffers, because, + // like in other backends, we optimize out the setShaderResources when the + // srb that was set before is attempted to be set again on the command + // buffer, but that is incorrect if the same srb is now used with another + // pipeline. (because that could mean a glUseProgram not followed by + // up-to-date glUniform calls, i.e. with GL we have a strong dependency + // between the pipeline (== program) and the srb, unlike other APIs) This + // is the reason there is a second level of srb(+generation) tracking in + // the pipeline objects. + if (gfxPsD && (gfxPsD->currentSrb != srb || gfxPsD->currentSrbGeneration != srbD->generation)) { + srbChanged = true; + gfxPsD->currentSrb = srb; + gfxPsD->currentSrbGeneration = srbD->generation; + } else if (compPsD && (compPsD->currentSrb != srb || compPsD->currentSrbGeneration != srbD->generation)) { + srbChanged = true; + compPsD->currentSrb = srb; + compPsD->currentSrbGeneration = srbD->generation; + } - if (srbChanged || srbRebuilt || hasDynamicOffsetInSrb) { + if (srbChanged || cbD->currentSrbGeneration != srbD->generation || srbD->hasDynamicOffset) { if (gfxPsD) { cbD->currentGraphicsSrb = srb; cbD->currentComputeSrb = nullptr; @@ -1150,14 +1841,14 @@ void QRhiGles2::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind } cbD->currentSrbGeneration = srbD->generation; - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BindShaderResources; cmd.args.bindShaderResources.maybeGraphicsPs = gfxPsD; cmd.args.bindShaderResources.maybeComputePs = compPsD; cmd.args.bindShaderResources.srb = srb; cmd.args.bindShaderResources.dynamicOffsetCount = 0; - if (hasDynamicOffsetInSrb) { - if (dynamicOffsetCount < QGles2CommandBuffer::Command::MAX_UBUF_BINDINGS) { + if (srbD->hasDynamicOffset) { + if (dynamicOffsetCount < QGles2CommandBuffer::MAX_DYNAMIC_OFFSET_COUNT) { cmd.args.bindShaderResources.dynamicOffsetCount = dynamicOffsetCount; uint *p = cmd.args.bindShaderResources.dynamicOffsetPairs; for (int i = 0; i < dynamicOffsetCount; ++i) { @@ -1167,10 +1858,9 @@ void QRhiGles2::setShaderResources(QRhiCommandBuffer *cb, QRhiShaderResourceBind } } else { qWarning("Too many dynamic offsets (%d, max is %d)", - dynamicOffsetCount, QGles2CommandBuffer::Command::MAX_UBUF_BINDINGS); + dynamicOffsetCount, QGles2CommandBuffer::MAX_DYNAMIC_OFFSET_COUNT); } } - cbD->commands.append(cmd); } } @@ -1188,31 +1878,33 @@ void QRhiGles2::setVertexInput(QRhiCommandBuffer *cb, QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, buf); Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::VertexBuffer)); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BindVertexBuffer; cmd.args.bindVertexBuffer.ps = cbD->currentGraphicsPipeline; cmd.args.bindVertexBuffer.buffer = bufD->buffer; cmd.args.bindVertexBuffer.offset = ofs; cmd.args.bindVertexBuffer.binding = startBinding + i; - cbD->commands.append(cmd); - trackedRegisterBuffer(&passResTracker, bufD, QRhiPassResourceTracker::BufVertexInput, - QRhiPassResourceTracker::BufVertexInputStage); + if (cbD->passNeedsResourceTracking) { + trackedRegisterBuffer(&passResTracker, bufD, QRhiPassResourceTracker::BufVertexInput, + QRhiPassResourceTracker::BufVertexInputStage); + } } if (indexBuf) { QGles2Buffer *ibufD = QRHI_RES(QGles2Buffer, indexBuf); Q_ASSERT(ibufD->m_usage.testFlag(QRhiBuffer::IndexBuffer)); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BindIndexBuffer; cmd.args.bindIndexBuffer.buffer = ibufD->buffer; cmd.args.bindIndexBuffer.offset = indexOffset; cmd.args.bindIndexBuffer.type = indexFormat == QRhiCommandBuffer::IndexUInt16 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT; - cbD->commands.append(cmd); - trackedRegisterBuffer(&passResTracker, ibufD, QRhiPassResourceTracker::BufIndexRead, - QRhiPassResourceTracker::BufVertexInputStage); + if (cbD->passNeedsResourceTracking) { + trackedRegisterBuffer(&passResTracker, ibufD, QRhiPassResourceTracker::BufIndexRead, + QRhiPassResourceTracker::BufVertexInputStage); + } } } @@ -1221,20 +1913,19 @@ void QRhiGles2::setViewport(QRhiCommandBuffer *cb, const QRhiViewport &viewport) QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); - QGles2CommandBuffer::Command cmd; - cmd.cmd = QGles2CommandBuffer::Command::Viewport; const std::array<float, 4> r = viewport.viewport(); // A negative width or height is an error. A negative x or y is not. if (r[2] < 0.0f || r[3] < 0.0f) return; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); + cmd.cmd = QGles2CommandBuffer::Command::Viewport; cmd.args.viewport.x = r[0]; cmd.args.viewport.y = r[1]; cmd.args.viewport.w = r[2]; cmd.args.viewport.h = r[3]; cmd.args.viewport.d0 = viewport.minDepth(); cmd.args.viewport.d1 = viewport.maxDepth(); - cbD->commands.append(cmd); } void QRhiGles2::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) @@ -1242,18 +1933,17 @@ void QRhiGles2::setScissor(QRhiCommandBuffer *cb, const QRhiScissor &scissor) QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); - QGles2CommandBuffer::Command cmd; - cmd.cmd = QGles2CommandBuffer::Command::Scissor; const std::array<int, 4> r = scissor.scissor(); // A negative width or height is an error. A negative x or y is not. if (r[2] < 0 || r[3] < 0) return; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); + cmd.cmd = QGles2CommandBuffer::Command::Scissor; cmd.args.scissor.x = r[0]; cmd.args.scissor.y = r[1]; cmd.args.scissor.w = r[2]; cmd.args.scissor.h = r[3]; - cbD->commands.append(cmd); } void QRhiGles2::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) @@ -1261,13 +1951,12 @@ void QRhiGles2::setBlendConstants(QRhiCommandBuffer *cb, const QColor &c) QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BlendConstants; cmd.args.blendConstants.r = float(c.redF()); cmd.args.blendConstants.g = float(c.greenF()); cmd.args.blendConstants.b = float(c.blueF()); cmd.args.blendConstants.a = float(c.alphaF()); - cbD->commands.append(cmd); } void QRhiGles2::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) @@ -1275,11 +1964,10 @@ void QRhiGles2::setStencilRef(QRhiCommandBuffer *cb, quint32 refValue) QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::StencilRef; cmd.args.stencilRef.ref = refValue; cmd.args.stencilRef.ps = cbD->currentGraphicsPipeline; - cbD->commands.append(cmd); } void QRhiGles2::draw(QRhiCommandBuffer *cb, quint32 vertexCount, @@ -1288,14 +1976,13 @@ void QRhiGles2::draw(QRhiCommandBuffer *cb, quint32 vertexCount, QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::Draw; cmd.args.draw.ps = cbD->currentGraphicsPipeline; cmd.args.draw.vertexCount = vertexCount; cmd.args.draw.firstVertex = firstVertex; cmd.args.draw.instanceCount = instanceCount; cmd.args.draw.baseInstance = firstInstance; - cbD->commands.append(cmd); } void QRhiGles2::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, @@ -1304,7 +1991,7 @@ void QRhiGles2::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::RenderPass); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::DrawIndexed; cmd.args.drawIndexed.ps = cbD->currentGraphicsPipeline; cmd.args.drawIndexed.indexCount = indexCount; @@ -1312,7 +1999,6 @@ void QRhiGles2::drawIndexed(QRhiCommandBuffer *cb, quint32 indexCount, cmd.args.drawIndexed.instanceCount = instanceCount; cmd.args.drawIndexed.baseInstance = firstInstance; cmd.args.drawIndexed.baseVertex = vertexOffset; - cbD->commands.append(cmd); } void QRhiGles2::debugMarkBegin(QRhiCommandBuffer *cb, const QByteArray &name) @@ -1347,11 +2033,14 @@ const QRhiNativeHandles *QRhiGles2::nativeHandles(QRhiCommandBuffer *cb) return nullptr; } -static void addBoundaryCommand(QGles2CommandBuffer *cbD, QGles2CommandBuffer::Command::Cmd type) +static inline void addBoundaryCommand(QGles2CommandBuffer *cbD, QGles2CommandBuffer::Command::Cmd type, GLuint tsQuery = 0) { - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = type; - cbD->commands.append(cmd); + if (type == QGles2CommandBuffer::Command::BeginFrame) + cmd.args.beginFrame.timestampQuery = tsQuery; + else if (type == QGles2CommandBuffer::Command::EndFrame) + cmd.args.endFrame.timestampQuery = tsQuery; } void QRhiGles2::beginExternal(QRhiCommandBuffer *cb) @@ -1371,18 +2060,23 @@ void QRhiGles2::beginExternal(QRhiCommandBuffer *cb) if (cbD->recordingPass == QGles2CommandBuffer::ComputePass && !cbD->computePassState.writtenResources.isEmpty()) { - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::Barrier; cmd.args.barrier.barriers = GL_ALL_BARRIER_BITS; - cbD->commands.append(cmd); } executeCommandBuffer(cbD); cbD->resetCommands(); - if (vao) + if (vao) { f->glBindVertexArray(0); + } else { + f->glBindBuffer(GL_ARRAY_BUFFER, 0); + f->glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + if (caps.compute) + f->glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } } void QRhiGles2::endExternal(QRhiCommandBuffer *cb) @@ -1406,23 +2100,36 @@ void QRhiGles2::endExternal(QRhiCommandBuffer *cb) enqueueBindFramebuffer(cbD->currentTarget, cbD); } -QRhi::FrameOpResult QRhiGles2::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) +double QRhiGles2::lastCompletedGpuTime(QRhiCommandBuffer *cb) { - Q_UNUSED(flags); + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); + return cbD->lastGpuTime; +} +QRhi::FrameOpResult QRhiGles2::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags) +{ QGles2SwapChain *swapChainD = QRHI_RES(QGles2SwapChain, swapChain); if (!ensureContext(swapChainD->surface)) return contextLost ? QRhi::FrameOpDeviceLost : QRhi::FrameOpError; - currentSwapChain = swapChainD; + ctx->handle()->beginFrame(); - QRhiProfilerPrivate *rhiP = profilerPrivateOrNull(); - QRHI_PROF_F(beginSwapChainFrame(swapChain)); + currentSwapChain = swapChainD; executeDeferredReleases(); swapChainD->cb.resetState(); - addBoundaryCommand(&swapChainD->cb, QGles2CommandBuffer::Command::BeginFrame); + if (swapChainD->timestamps.active[swapChainD->currentTimestampPairIndex]) { + double elapsedSec = 0; + if (swapChainD->timestamps.tryQueryTimestamps(swapChainD->currentTimestampPairIndex, this, &elapsedSec)) + swapChainD->cb.lastGpuTime = elapsedSec; + } + + GLuint tsStart = swapChainD->timestamps.query[swapChainD->currentTimestampPairIndex * 2]; + GLuint tsEnd = swapChainD->timestamps.query[swapChainD->currentTimestampPairIndex * 2 + 1]; + const bool recordTimestamps = tsStart && tsEnd && !swapChainD->timestamps.active[swapChainD->currentTimestampPairIndex]; + + addBoundaryCommand(&swapChainD->cb, QGles2CommandBuffer::Command::BeginFrame, recordTimestamps ? tsStart : 0); return QRhi::FrameOpSuccess; } @@ -1432,32 +2139,38 @@ QRhi::FrameOpResult QRhiGles2::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame QGles2SwapChain *swapChainD = QRHI_RES(QGles2SwapChain, swapChain); Q_ASSERT(currentSwapChain == swapChainD); - addBoundaryCommand(&swapChainD->cb, QGles2CommandBuffer::Command::EndFrame); + GLuint tsStart = swapChainD->timestamps.query[swapChainD->currentTimestampPairIndex * 2]; + GLuint tsEnd = swapChainD->timestamps.query[swapChainD->currentTimestampPairIndex * 2 + 1]; + const bool recordTimestamps = tsStart && tsEnd && !swapChainD->timestamps.active[swapChainD->currentTimestampPairIndex]; + if (recordTimestamps) { + swapChainD->timestamps.active[swapChainD->currentTimestampPairIndex] = true; + swapChainD->currentTimestampPairIndex = (swapChainD->currentTimestampPairIndex + 1) % QGles2SwapChainTimestamps::TIMESTAMP_PAIRS; + } + + addBoundaryCommand(&swapChainD->cb, QGles2CommandBuffer::Command::EndFrame, recordTimestamps ? tsEnd : 0); if (!ensureContext(swapChainD->surface)) return contextLost ? QRhi::FrameOpDeviceLost : QRhi::FrameOpError; executeCommandBuffer(&swapChainD->cb); - QRhiProfilerPrivate *rhiP = profilerPrivateOrNull(); - // this must be done before the swap - QRHI_PROF_F(endSwapChainFrame(swapChain, swapChainD->frameCount + 1)); - if (swapChainD->surface && !flags.testFlag(QRhi::SkipPresent)) { ctx->swapBuffers(swapChainD->surface); - needsMakeCurrent = true; + needsMakeCurrentDueToSwap = true; } else { f->glFlush(); } swapChainD->frameCount += 1; currentSwapChain = nullptr; + + ctx->handle()->endFrame(); + return QRhi::FrameOpSuccess; } -QRhi::FrameOpResult QRhiGles2::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi::BeginFrameFlags flags) +QRhi::FrameOpResult QRhiGles2::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi::BeginFrameFlags) { - Q_UNUSED(flags); if (!ensureContext()) return contextLost ? QRhi::FrameOpDeviceLost : QRhi::FrameOpError; @@ -1466,7 +2179,12 @@ QRhi::FrameOpResult QRhiGles2::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi: executeDeferredReleases(); ofr.cbWrapper.resetState(); - addBoundaryCommand(&ofr.cbWrapper, QGles2CommandBuffer::Command::BeginFrame); + if (rhiFlags.testFlag(QRhi::EnableTimestamps) && caps.timestamps) { + if (!ofr.tsQueries[0]) + f->glGenQueries(2, ofr.tsQueries); + } + + addBoundaryCommand(&ofr.cbWrapper, QGles2CommandBuffer::Command::BeginFrame, ofr.tsQueries[0]); *cb = &ofr.cbWrapper; return QRhi::FrameOpSuccess; @@ -1478,13 +2196,29 @@ QRhi::FrameOpResult QRhiGles2::endOffscreenFrame(QRhi::EndFrameFlags flags) Q_ASSERT(ofr.active); ofr.active = false; - addBoundaryCommand(&ofr.cbWrapper, QGles2CommandBuffer::Command::EndFrame); + addBoundaryCommand(&ofr.cbWrapper, QGles2CommandBuffer::Command::EndFrame, ofr.tsQueries[1]); if (!ensureContext()) return contextLost ? QRhi::FrameOpDeviceLost : QRhi::FrameOpError; executeCommandBuffer(&ofr.cbWrapper); + // Just as endFrame() does a flush when skipping the swapBuffers(), do it + // here as well. This has the added benefit of playing nice when rendering + // to a texture from a context and then consuming that texture from + // another, sharing context. + f->glFlush(); + + if (ofr.tsQueries[0]) { + quint64 timestamps[2]; + glGetQueryObjectui64v(ofr.tsQueries[1], GL_QUERY_RESULT, ×tamps[1]); + glGetQueryObjectui64v(ofr.tsQueries[0], GL_QUERY_RESULT, ×tamps[0]); + if (timestamps[1] >= timestamps[0]) { + const quint64 nanoseconds = timestamps[1] - timestamps[0]; + ofr.cbWrapper.lastGpuTime = nanoseconds / 1000000000.0; // seconds + } + } + return QRhi::FrameOpSuccess; } @@ -1506,6 +2240,12 @@ QRhi::FrameOpResult QRhiGles2::finish() executeCommandBuffer(¤tSwapChain->cb); currentSwapChain->cb.resetCommands(); } + // Do an actual glFinish(). May seem superfluous, but this is what + // matches most other backends e.g. Vulkan/Metal that do a heavyweight + // wait-for-idle blocking in their finish(). More importantly, this + // allows clients simply call finish() in threaded or shared context + // situations where one explicitly needs to do a glFlush or Finish. + f->glFinish(); } return QRhi::FrameOpSuccess; } @@ -1525,9 +2265,30 @@ static bool textureAccessIsWrite(QGles2Texture::Access access) || access == QGles2Texture::AccessFramebuffer; } +static inline GLbitfield barriersForBuffer() +{ + return GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT + | GL_ELEMENT_ARRAY_BARRIER_BIT + | GL_UNIFORM_BARRIER_BIT + | GL_BUFFER_UPDATE_BARRIER_BIT + | GL_SHADER_STORAGE_BARRIER_BIT; +} + +static inline GLbitfield barriersForTexture() +{ + return GL_TEXTURE_FETCH_BARRIER_BIT + | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT + | GL_PIXEL_BUFFER_BARRIER_BIT + | GL_TEXTURE_UPDATE_BARRIER_BIT + | GL_FRAMEBUFFER_BARRIER_BIT; +} + void QRhiGles2::trackedBufferBarrier(QGles2CommandBuffer *cbD, QGles2Buffer *bufD, QGles2Buffer::Access access) { Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); // this is for resource updates only + if (!bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)) + return; + const QGles2Buffer::Access prevAccess = bufD->usageState.access; if (access == prevAccess) return; @@ -1537,10 +2298,9 @@ void QRhiGles2::trackedBufferBarrier(QGles2CommandBuffer *cbD, QGles2Buffer *buf // correctly (prevAccess is overwritten so we won't have proper // tracking across multiple passes) so setting all barrier bits will do // for now. - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::Barrier; - cmd.args.barrier.barriers = GL_ALL_BARRIER_BITS; - cbD->commands.append(cmd); + cmd.args.barrier.barriers = barriersForBuffer(); } bufD->usageState.access = access; @@ -1549,15 +2309,17 @@ void QRhiGles2::trackedBufferBarrier(QGles2CommandBuffer *cbD, QGles2Buffer *buf void QRhiGles2::trackedImageBarrier(QGles2CommandBuffer *cbD, QGles2Texture *texD, QGles2Texture::Access access) { Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); // this is for resource updates only + if (!texD->m_flags.testFlag(QRhiTexture::UsedWithLoadStore)) + return; + const QGles2Texture::Access prevAccess = texD->usageState.access; if (access == prevAccess) return; if (textureAccessIsWrite(prevAccess)) { - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::Barrier; - cmd.args.barrier.barriers = GL_ALL_BARRIER_BITS; - cbD->commands.append(cmd); + cmd.args.barrier.barriers = barriersForTexture(); } texD->usageState.access = access; @@ -1569,108 +2331,123 @@ void QRhiGles2::enqueueSubresUpload(QGles2Texture *texD, QGles2CommandBuffer *cb trackedImageBarrier(cbD, texD, QGles2Texture::AccessUpdate); const bool isCompressed = isCompressedFormat(texD->m_format); const bool isCubeMap = texD->m_flags.testFlag(QRhiTexture::CubeMap); + const bool is3D = texD->m_flags.testFlag(QRhiTexture::ThreeDimensional); + const bool is1D = texD->m_flags.testFlag(QRhiTexture::OneDimensional); + const bool isArray = texD->m_flags.testFlag(QRhiTexture::TextureArray); const GLenum faceTargetBase = isCubeMap ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : texD->target; + const GLenum effectiveTarget = faceTargetBase + (isCubeMap ? uint(layer) : 0u); const QPoint dp = subresDesc.destinationTopLeft(); const QByteArray rawData = subresDesc.data(); - if (!subresDesc.image().isNull()) { - QImage img = subresDesc.image(); - QSize size = img.size(); - QGles2CommandBuffer::Command cmd; + + auto setCmdByNotCompressedData = [&](const void* data, QSize size, quint32 dataStride) + { + quint32 bytesPerLine = 0; + quint32 bytesPerPixel = 0; + textureFormatInfo(texD->m_format, size, &bytesPerLine, nullptr, &bytesPerPixel); + + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::SubImage; - if (!subresDesc.sourceSize().isEmpty() || !subresDesc.sourceTopLeft().isNull()) { - const QPoint sp = subresDesc.sourceTopLeft(); - if (!subresDesc.sourceSize().isEmpty()) - size = subresDesc.sourceSize(); - img = img.copy(sp.x(), sp.y(), size.width(), size.height()); - } cmd.args.subImage.target = texD->target; cmd.args.subImage.texture = texD->texture; - cmd.args.subImage.faceTarget = faceTargetBase + uint(layer); + cmd.args.subImage.faceTarget = effectiveTarget; cmd.args.subImage.level = level; cmd.args.subImage.dx = dp.x(); - cmd.args.subImage.dy = dp.y(); + cmd.args.subImage.dy = is1D && isArray ? layer : dp.y(); + cmd.args.subImage.dz = is3D || isArray ? layer : 0; cmd.args.subImage.w = size.width(); cmd.args.subImage.h = size.height(); cmd.args.subImage.glformat = texD->glformat; cmd.args.subImage.gltype = texD->gltype; - cmd.args.subImage.rowStartAlign = 4; - cmd.args.subImage.data = cbD->retainImage(img); - cbD->commands.append(cmd); + + if (dataStride == 0) + dataStride = bytesPerLine; + + cmd.args.subImage.rowStartAlign = (dataStride & 3) ? 1 : 4; + cmd.args.subImage.rowLength = bytesPerPixel ? dataStride / bytesPerPixel : 0; + + cmd.args.subImage.data = data; + }; + + if (!subresDesc.image().isNull()) { + QImage img = subresDesc.image(); + QSize size = img.size(); + if (!subresDesc.sourceSize().isEmpty() || !subresDesc.sourceTopLeft().isNull()) { + const QPoint sp = subresDesc.sourceTopLeft(); + if (!subresDesc.sourceSize().isEmpty()) + size = subresDesc.sourceSize(); + img = img.copy(sp.x(), sp.y(), size.width(), size.height()); + } + + setCmdByNotCompressedData(cbD->retainImage(img), size, img.bytesPerLine()); } else if (!rawData.isEmpty() && isCompressed) { - if (!texD->compressedAtlasBuilt && (texD->flags() & QRhiTexture::UsedAsCompressedAtlas)) { - // Create on first upload since glCompressedTexImage2D cannot take nullptr data + const int depth = qMax(1, texD->m_depth); + const int arraySize = qMax(0, texD->m_arraySize); + if ((texD->flags().testFlag(QRhiTexture::UsedAsCompressedAtlas) || is3D || isArray) + && !texD->zeroInitialized) + { + // Create on first upload since glCompressedTexImage2D cannot take + // nullptr data. We have a rule in the QRhi docs that the first + // upload for a compressed texture must cover the entire image, but + // that is clearly not ideal when building a texture atlas, or when + // having a 3D texture with per-slice data. quint32 byteSize = 0; compressedFormatInfo(texD->m_format, texD->m_pixelSize, nullptr, &byteSize, nullptr); + if (is3D) + byteSize *= depth; + if (isArray) + byteSize *= arraySize; QByteArray zeroBuf(byteSize, 0); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::CompressedImage; cmd.args.compressedImage.target = texD->target; cmd.args.compressedImage.texture = texD->texture; - cmd.args.compressedImage.faceTarget = faceTargetBase + uint(layer); + cmd.args.compressedImage.faceTarget = effectiveTarget; cmd.args.compressedImage.level = level; cmd.args.compressedImage.glintformat = texD->glintformat; cmd.args.compressedImage.w = texD->m_pixelSize.width(); - cmd.args.compressedImage.h = texD->m_pixelSize.height(); + cmd.args.compressedImage.h = is1D && isArray ? arraySize : texD->m_pixelSize.height(); + cmd.args.compressedImage.depth = is3D ? depth : (isArray ? arraySize : 0); cmd.args.compressedImage.size = byteSize; cmd.args.compressedImage.data = cbD->retainData(zeroBuf); - cbD->commands.append(cmd); - texD->compressedAtlasBuilt = true; + texD->zeroInitialized = true; } const QSize size = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize) : subresDesc.sourceSize(); - if (texD->specified || texD->compressedAtlasBuilt) { - QGles2CommandBuffer::Command cmd; + if (texD->specified || texD->zeroInitialized) { + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::CompressedSubImage; cmd.args.compressedSubImage.target = texD->target; cmd.args.compressedSubImage.texture = texD->texture; - cmd.args.compressedSubImage.faceTarget = faceTargetBase + uint(layer); + cmd.args.compressedSubImage.faceTarget = effectiveTarget; cmd.args.compressedSubImage.level = level; cmd.args.compressedSubImage.dx = dp.x(); - cmd.args.compressedSubImage.dy = dp.y(); + cmd.args.compressedSubImage.dy = is1D && isArray ? layer : dp.y(); + cmd.args.compressedSubImage.dz = is3D || isArray ? layer : 0; cmd.args.compressedSubImage.w = size.width(); cmd.args.compressedSubImage.h = size.height(); cmd.args.compressedSubImage.glintformat = texD->glintformat; cmd.args.compressedSubImage.size = rawData.size(); cmd.args.compressedSubImage.data = cbD->retainData(rawData); - cbD->commands.append(cmd); } else { - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::CompressedImage; cmd.args.compressedImage.target = texD->target; cmd.args.compressedImage.texture = texD->texture; - cmd.args.compressedImage.faceTarget = faceTargetBase + uint(layer); + cmd.args.compressedImage.faceTarget = effectiveTarget; cmd.args.compressedImage.level = level; cmd.args.compressedImage.glintformat = texD->glintformat; cmd.args.compressedImage.w = size.width(); - cmd.args.compressedImage.h = size.height(); + cmd.args.compressedImage.h = is1D && isArray ? arraySize : size.height(); + cmd.args.compressedImage.depth = is3D ? depth : (isArray ? arraySize : 0); cmd.args.compressedImage.size = rawData.size(); cmd.args.compressedImage.data = cbD->retainData(rawData); - cbD->commands.append(cmd); } } else if (!rawData.isEmpty()) { const QSize size = subresDesc.sourceSize().isEmpty() ? q->sizeForMipLevel(level, texD->m_pixelSize) : subresDesc.sourceSize(); - quint32 bpl = 0; - textureFormatInfo(texD->m_format, size, &bpl, nullptr); - QGles2CommandBuffer::Command cmd; - cmd.cmd = QGles2CommandBuffer::Command::SubImage; - cmd.args.subImage.target = texD->target; - cmd.args.subImage.texture = texD->texture; - cmd.args.subImage.faceTarget = faceTargetBase + uint(layer); - cmd.args.subImage.level = level; - cmd.args.subImage.dx = dp.x(); - cmd.args.subImage.dy = dp.y(); - cmd.args.subImage.w = size.width(); - cmd.args.subImage.h = size.height(); - cmd.args.subImage.glformat = texD->glformat; - cmd.args.subImage.gltype = texD->gltype; - // Default unpack alignment (row start aligment - // requirement) is 4. QImage guarantees 4 byte aligned - // row starts, but our raw data here does not. - cmd.args.subImage.rowStartAlign = (bpl & 3) ? 1 : 4; - cmd.args.subImage.data = cbD->retainData(rawData); - cbD->commands.append(cmd); + + setCmdByNotCompressedData(cbD->retainData(rawData), size, subresDesc.dataStride()); } else { qWarning("Invalid texture upload for %p layer=%d mip=%d", texD, layer, level); } @@ -1681,66 +2458,65 @@ void QRhiGles2::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); QRhiResourceUpdateBatchPrivate *ud = QRhiResourceUpdateBatchPrivate::get(resourceUpdates); - for (const QRhiResourceUpdateBatchPrivate::BufferOp &u : ud->bufferOps) { + for (int opIdx = 0; opIdx < ud->activeBufferOpCount; ++opIdx) { + const QRhiResourceUpdateBatchPrivate::BufferOp &u(ud->bufferOps[opIdx]); if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::DynamicUpdate) { QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, u.buf); Q_ASSERT(bufD->m_type == QRhiBuffer::Dynamic); if (bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)) { - memcpy(bufD->ubuf.data() + u.offset, u.data.constData(), size_t(u.data.size())); + memcpy(bufD->data.data() + u.offset, u.data.constData(), size_t(u.data.size())); } else { trackedBufferBarrier(cbD, bufD, QGles2Buffer::AccessUpdate); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BufferSubData; cmd.args.bufferSubData.target = bufD->targetForDataOps; cmd.args.bufferSubData.buffer = bufD->buffer; cmd.args.bufferSubData.offset = u.offset; cmd.args.bufferSubData.size = u.data.size(); - cmd.args.bufferSubData.data = cbD->retainData(u.data); - cbD->commands.append(cmd); + cmd.args.bufferSubData.data = cbD->retainBufferData(u.data); } } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::StaticUpload) { QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, u.buf); Q_ASSERT(bufD->m_type != QRhiBuffer::Dynamic); Q_ASSERT(u.offset + u.data.size() <= bufD->m_size); if (bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)) { - memcpy(bufD->ubuf.data() + u.offset, u.data.constData(), size_t(u.data.size())); + memcpy(bufD->data.data() + u.offset, u.data.constData(), size_t(u.data.size())); } else { trackedBufferBarrier(cbD, bufD, QGles2Buffer::AccessUpdate); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BufferSubData; cmd.args.bufferSubData.target = bufD->targetForDataOps; cmd.args.bufferSubData.buffer = bufD->buffer; cmd.args.bufferSubData.offset = u.offset; cmd.args.bufferSubData.size = u.data.size(); - cmd.args.bufferSubData.data = cbD->retainData(u.data); - cbD->commands.append(cmd); + cmd.args.bufferSubData.data = cbD->retainBufferData(u.data); } } else if (u.type == QRhiResourceUpdateBatchPrivate::BufferOp::Read) { QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, u.buf); if (bufD->m_usage.testFlag(QRhiBuffer::UniformBuffer)) { u.result->data.resize(u.readSize); - memcpy(u.result->data.data(), bufD->ubuf.constData() + u.offset, size_t(u.readSize)); + memcpy(u.result->data.data(), bufD->data.constData() + u.offset, size_t(u.readSize)); if (u.result->completed) u.result->completed(); } else { - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::GetBufferSubData; cmd.args.getBufferSubData.result = u.result; cmd.args.getBufferSubData.target = bufD->targetForDataOps; cmd.args.getBufferSubData.buffer = bufD->buffer; cmd.args.getBufferSubData.offset = u.offset; cmd.args.getBufferSubData.size = u.readSize; - cbD->commands.append(cmd); } } } - for (const QRhiResourceUpdateBatchPrivate::TextureOp &u : ud->textureOps) { + for (int opIdx = 0; opIdx < ud->activeTextureOpCount; ++opIdx) { + const QRhiResourceUpdateBatchPrivate::TextureOp &u(ud->textureOps[opIdx]); if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Upload) { QGles2Texture *texD = QRHI_RES(QGles2Texture, u.dst); - for (int layer = 0; layer < QRhi::MAX_LAYERS; ++layer) { - for (int level = 0; level < QRhi::MAX_LEVELS; ++level) { - for (const QRhiTextureSubresourceUploadDescription &subresDesc : qAsConst(u.subresDesc[layer][level])) + for (int layer = 0, maxLayer = u.subresDesc.size(); layer < maxLayer; ++layer) { + for (int level = 0; level < QRhi::MAX_MIP_LEVELS; ++level) { + for (const QRhiTextureSubresourceUploadDescription &subresDesc : std::as_const(u.subresDesc[layer][level])) enqueueSubresUpload(texD, cbD, layer, level, subresDesc); } } @@ -1764,53 +2540,65 @@ void QRhiGles2::enqueueResourceUpdates(QRhiCommandBuffer *cb, QRhiResourceUpdate const GLenum dstFaceTargetBase = dstD->m_flags.testFlag(QRhiTexture::CubeMap) ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : dstD->target; - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::CopyTex; - cmd.args.copyTex.srcFaceTarget = srcFaceTargetBase + uint(u.desc.sourceLayer()); + const bool srcHasZ = srcD->m_flags.testFlag(QRhiTexture::ThreeDimensional) || srcD->m_flags.testFlag(QRhiTexture::TextureArray); + const bool dstHasZ = dstD->m_flags.testFlag(QRhiTexture::ThreeDimensional) || dstD->m_flags.testFlag(QRhiTexture::TextureArray); + const bool dstIs1dArray = dstD->m_flags.testFlag(QRhiTexture::OneDimensional) + && dstD->m_flags.testFlag(QRhiTexture::TextureArray); + + cmd.args.copyTex.srcTarget = srcD->target; + cmd.args.copyTex.srcFaceTarget = srcFaceTargetBase + (srcHasZ ? 0u : uint(u.desc.sourceLayer())); cmd.args.copyTex.srcTexture = srcD->texture; cmd.args.copyTex.srcLevel = u.desc.sourceLevel(); cmd.args.copyTex.srcX = sp.x(); cmd.args.copyTex.srcY = sp.y(); + cmd.args.copyTex.srcZ = srcHasZ ? u.desc.sourceLayer() : 0; cmd.args.copyTex.dstTarget = dstD->target; + cmd.args.copyTex.dstFaceTarget = dstFaceTargetBase + (dstHasZ ? 0u : uint(u.desc.destinationLayer())); cmd.args.copyTex.dstTexture = dstD->texture; - cmd.args.copyTex.dstFaceTarget = dstFaceTargetBase + uint(u.desc.destinationLayer()); cmd.args.copyTex.dstLevel = u.desc.destinationLevel(); cmd.args.copyTex.dstX = dp.x(); - cmd.args.copyTex.dstY = dp.y(); + cmd.args.copyTex.dstY = dstIs1dArray ? u.desc.destinationLayer() : dp.y(); + cmd.args.copyTex.dstZ = dstHasZ ? u.desc.destinationLayer() : 0; cmd.args.copyTex.w = copySize.width(); cmd.args.copyTex.h = copySize.height(); - - cbD->commands.append(cmd); } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::Read) { - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::ReadPixels; cmd.args.readPixels.result = u.result; QGles2Texture *texD = QRHI_RES(QGles2Texture, u.rb.texture()); if (texD) trackedImageBarrier(cbD, texD, QGles2Texture::AccessRead); cmd.args.readPixels.texture = texD ? texD->texture : 0; + cmd.args.readPixels.slice3D = -1; if (texD) { const QSize readImageSize = q->sizeForMipLevel(u.rb.level(), texD->m_pixelSize); cmd.args.readPixels.w = readImageSize.width(); cmd.args.readPixels.h = readImageSize.height(); cmd.args.readPixels.format = texD->m_format; - const GLenum faceTargetBase = texD->m_flags.testFlag(QRhiTexture::CubeMap) - ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : texD->target; - cmd.args.readPixels.readTarget = faceTargetBase + uint(u.rb.layer()); + if (texD->m_flags.testFlag(QRhiTexture::ThreeDimensional) + || texD->m_flags.testFlag(QRhiTexture::TextureArray)) + { + cmd.args.readPixels.readTarget = texD->target; + cmd.args.readPixels.slice3D = u.rb.layer(); + } else { + const GLenum faceTargetBase = texD->m_flags.testFlag(QRhiTexture::CubeMap) + ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : texD->target; + cmd.args.readPixels.readTarget = faceTargetBase + uint(u.rb.layer()); + } cmd.args.readPixels.level = u.rb.level(); } - cbD->commands.append(cmd); } else if (u.type == QRhiResourceUpdateBatchPrivate::TextureOp::GenMips) { QGles2Texture *texD = QRHI_RES(QGles2Texture, u.dst); trackedImageBarrier(cbD, texD, QGles2Texture::AccessFramebuffer); - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::GenMip; cmd.args.genMip.target = texD->target; cmd.args.genMip.texture = texD->texture; - cbD->commands.append(cmd); } } @@ -1832,9 +2620,10 @@ static inline GLenum toGlTopology(QRhiGraphicsPipeline::Topology t) return GL_LINE_STRIP; case QRhiGraphicsPipeline::Points: return GL_POINTS; + case QRhiGraphicsPipeline::Patches: + return GL_PATCHES; default: - Q_UNREACHABLE(); - return GL_TRIANGLES; + Q_UNREACHABLE_RETURN(GL_TRIANGLES); } } @@ -1846,8 +2635,7 @@ static inline GLenum toGlCullMode(QRhiGraphicsPipeline::CullMode c) case QRhiGraphicsPipeline::Back: return GL_BACK; default: - Q_UNREACHABLE(); - return GL_BACK; + Q_UNREACHABLE_RETURN(GL_BACK); } } @@ -1859,8 +2647,7 @@ static inline GLenum toGlFrontFace(QRhiGraphicsPipeline::FrontFace f) case QRhiGraphicsPipeline::CW: return GL_CW; default: - Q_UNREACHABLE(); - return GL_CCW; + Q_UNREACHABLE_RETURN(GL_CCW); } } @@ -1904,8 +2691,7 @@ static inline GLenum toGlBlendFactor(QRhiGraphicsPipeline::BlendFactor f) qWarning("Unsupported blend factor %d", f); return GL_ZERO; default: - Q_UNREACHABLE(); - return GL_ZERO; + Q_UNREACHABLE_RETURN(GL_ZERO); } } @@ -1923,8 +2709,7 @@ static inline GLenum toGlBlendOp(QRhiGraphicsPipeline::BlendOp op) case QRhiGraphicsPipeline::Max: return GL_MAX; default: - Q_UNREACHABLE(); - return GL_FUNC_ADD; + Q_UNREACHABLE_RETURN(GL_FUNC_ADD); } } @@ -1948,8 +2733,7 @@ static inline GLenum toGlCompareOp(QRhiGraphicsPipeline::CompareOp op) case QRhiGraphicsPipeline::Always: return GL_ALWAYS; default: - Q_UNREACHABLE(); - return GL_ALWAYS; + Q_UNREACHABLE_RETURN(GL_ALWAYS); } } @@ -1973,8 +2757,19 @@ static inline GLenum toGlStencilOp(QRhiGraphicsPipeline::StencilOp op) case QRhiGraphicsPipeline::DecrementAndWrap: return GL_DECR_WRAP; default: - Q_UNREACHABLE(); - return GL_KEEP; + Q_UNREACHABLE_RETURN(GL_KEEP); + } +} + +static inline GLenum toGlPolygonMode(QRhiGraphicsPipeline::PolygonMode mode) +{ + switch (mode) { + case QRhiGraphicsPipeline::PolygonMode::Fill: + return GL_FILL; + case QRhiGraphicsPipeline::PolygonMode::Line: + return GL_LINE; + default: + Q_UNREACHABLE_RETURN(GL_FILL); } } @@ -1992,8 +2787,7 @@ static inline GLenum toGlMinFilter(QRhiSampler::Filter f, QRhiSampler::Filter m) else return m == QRhiSampler::Nearest ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR_MIPMAP_LINEAR; default: - Q_UNREACHABLE(); - return GL_LINEAR; + Q_UNREACHABLE_RETURN(GL_LINEAR); } } @@ -2005,8 +2799,7 @@ static inline GLenum toGlMagFilter(QRhiSampler::Filter f) case QRhiSampler::Linear: return GL_LINEAR; default: - Q_UNREACHABLE(); - return GL_LINEAR; + Q_UNREACHABLE_RETURN(GL_LINEAR); } } @@ -2020,8 +2813,7 @@ static inline GLenum toGlWrapMode(QRhiSampler::AddressMode m) case QRhiSampler::Mirror: return GL_MIRRORED_REPEAT; default: - Q_UNREACHABLE(); - return GL_CLAMP_TO_EDGE; + Q_UNREACHABLE_RETURN(GL_CLAMP_TO_EDGE); } } @@ -2045,8 +2837,7 @@ static inline GLenum toGlTextureCompareFunc(QRhiSampler::CompareOp op) case QRhiSampler::Always: return GL_ALWAYS; default: - Q_UNREACHABLE(); - return GL_NEVER; + Q_UNREACHABLE_RETURN(GL_NEVER); } } @@ -2132,20 +2923,51 @@ void QRhiGles2::trackedRegisterTexture(QRhiPassResourceTracker *passResTracker, u.access = toGlAccess(access); } -void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) +struct CommandBufferExecTrackedState { - QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); GLenum indexType = GL_UNSIGNED_SHORT; quint32 indexStride = sizeof(quint16); quint32 indexOffset = 0; GLuint currentArrayBuffer = 0; + GLuint currentElementArrayBuffer = 0; + struct { + QRhiGraphicsPipeline *ps = nullptr; + GLuint buffer = 0; + quint32 offset = 0; + int binding = 0; + } lastBindVertexBuffer; static const int TRACKED_ATTRIB_COUNT = 16; - bool enabledAttribArrays[TRACKED_ATTRIB_COUNT]; - memset(enabledAttribArrays, 0, sizeof(enabledAttribArrays)); + bool enabledAttribArrays[TRACKED_ATTRIB_COUNT] = {}; + bool nonzeroAttribDivisor[TRACKED_ATTRIB_COUNT] = {}; + bool instancedAttributesUsed = false; + int maxUntrackedInstancedAttribute = 0; +}; + +// Helper that must be used in executeCommandBuffer() whenever changing the +// ARRAY or ELEMENT_ARRAY buffer binding outside of Command::BindVertexBuffer +// and Command::BindIndexBuffer. +static inline void bindVertexIndexBufferWithStateReset(CommandBufferExecTrackedState *state, + QOpenGLExtensions *f, + GLenum target, + GLuint buffer) +{ + state->currentArrayBuffer = 0; + state->currentElementArrayBuffer = 0; + state->lastBindVertexBuffer.buffer = 0; + f->glBindBuffer(target, buffer); +} + +void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) +{ + CommandBufferExecTrackedState state; + QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); - for (const QGles2CommandBuffer::Command &cmd : qAsConst(cbD->commands)) { + for (auto it = cbD->commands.cbegin(), end = cbD->commands.cend(); it != end; ++it) { + const QGles2CommandBuffer::Command &cmd(*it); switch (cmd.cmd) { case QGles2CommandBuffer::Command::BeginFrame: + if (cmd.args.beginFrame.timestampQuery) + glQueryCounter(cmd.args.beginFrame.timestampQuery, GL_TIMESTAMP); if (caps.coreProfile) { if (!vao) f->glGenVertexArrays(1, &vao); @@ -2153,8 +2975,27 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) } break; case QGles2CommandBuffer::Command::EndFrame: + if (state.instancedAttributesUsed) { + for (int i = 0; i < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT; ++i) { + if (state.nonzeroAttribDivisor[i]) + f->glVertexAttribDivisor(GLuint(i), 0); + } + for (int i = CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT; i <= state.maxUntrackedInstancedAttribute; ++i) + f->glVertexAttribDivisor(GLuint(i), 0); + state.instancedAttributesUsed = false; + } +#ifdef Q_OS_WASM + for (int i = 0; i < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT; ++i) { + if (state.enabledAttribArrays[i]) { + f->glDisableVertexAttribArray(GLuint(i)); + state.enabledAttribArrays[i] = false; + } + } +#endif if (vao) f->glBindVertexArray(0); + if (cmd.args.endFrame.timestampQuery) + glQueryCounter(cmd.args.endFrame.timestampQuery, GL_TIMESTAMP); break; case QGles2CommandBuffer::Command::ResetFrame: if (vao) @@ -2187,6 +3028,26 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) { QGles2GraphicsPipeline *psD = QRHI_RES(QGles2GraphicsPipeline, cmd.args.bindVertexBuffer.ps); if (psD) { + if (state.lastBindVertexBuffer.ps == psD + && state.lastBindVertexBuffer.buffer == cmd.args.bindVertexBuffer.buffer + && state.lastBindVertexBuffer.offset == cmd.args.bindVertexBuffer.offset + && state.lastBindVertexBuffer.binding == cmd.args.bindVertexBuffer.binding) + { + // The pipeline and so the vertex input layout is + // immutable, no point in issuing the exact same set of + // glVertexAttribPointer again and again for the same buffer. + break; + } + state.lastBindVertexBuffer.ps = psD; + state.lastBindVertexBuffer.buffer = cmd.args.bindVertexBuffer.buffer; + state.lastBindVertexBuffer.offset = cmd.args.bindVertexBuffer.offset; + state.lastBindVertexBuffer.binding = cmd.args.bindVertexBuffer.binding; + + if (cmd.args.bindVertexBuffer.buffer != state.currentArrayBuffer) { + state.currentArrayBuffer = cmd.args.bindVertexBuffer.buffer; + // we do not support more than one vertex buffer + f->glBindBuffer(GL_ARRAY_BUFFER, state.currentArrayBuffer); + } for (auto it = psD->m_vertexInputLayout.cbeginAttributes(), itEnd = psD->m_vertexInputLayout.cendAttributes(); it != itEnd; ++it) { @@ -2194,12 +3055,6 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) if (bindingIdx != cmd.args.bindVertexBuffer.binding) continue; - if (cmd.args.bindVertexBuffer.buffer != currentArrayBuffer) { - currentArrayBuffer = cmd.args.bindVertexBuffer.buffer; - // we do not support more than one vertex buffer - f->glBindBuffer(GL_ARRAY_BUFFER, currentArrayBuffer); - } - const QRhiVertexInputBinding *inputBinding = psD->m_vertexInputLayout.bindingAt(bindingIdx); const int stride = int(inputBinding->stride()); int size = 1; @@ -2253,33 +3108,111 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) type = GL_UNSIGNED_INT; size = 1; break; + case QRhiVertexInputAttribute::SInt4: + type = GL_INT; + size = 4; + break; + case QRhiVertexInputAttribute::SInt3: + type = GL_INT; + size = 3; + break; + case QRhiVertexInputAttribute::SInt2: + type = GL_INT; + size = 2; + break; + case QRhiVertexInputAttribute::SInt: + type = GL_INT; + size = 1; + break; + case QRhiVertexInputAttribute::Half4: + type = GL_HALF_FLOAT; + size = 4; + break; + case QRhiVertexInputAttribute::Half3: + type = GL_HALF_FLOAT; + size = 3; + break; + case QRhiVertexInputAttribute::Half2: + type = GL_HALF_FLOAT; + size = 2; + break; + case QRhiVertexInputAttribute::Half: + type = GL_HALF_FLOAT; + size = 1; + break; + case QRhiVertexInputAttribute::UShort4: + type = GL_UNSIGNED_SHORT; + size = 4; + break; + case QRhiVertexInputAttribute::UShort3: + type = GL_UNSIGNED_SHORT; + size = 3; + break; + case QRhiVertexInputAttribute::UShort2: + type = GL_UNSIGNED_SHORT; + size = 2; + break; + case QRhiVertexInputAttribute::UShort: + type = GL_UNSIGNED_SHORT; + size = 1; + break; + case QRhiVertexInputAttribute::SShort4: + type = GL_SHORT; + size = 4; + break; + case QRhiVertexInputAttribute::SShort3: + type = GL_SHORT; + size = 3; + break; + case QRhiVertexInputAttribute::SShort2: + type = GL_SHORT; + size = 2; + break; + case QRhiVertexInputAttribute::SShort: + type = GL_SHORT; + size = 1; + break; default: break; } const int locationIdx = it->location(); quint32 ofs = it->offset() + cmd.args.bindVertexBuffer.offset; - if (type == GL_UNSIGNED_INT) { - if (caps.uintAttributes) { + if (type == GL_UNSIGNED_INT || type == GL_INT) { + if (caps.intAttributes) { f->glVertexAttribIPointer(GLuint(locationIdx), size, type, stride, reinterpret_cast<const GLvoid *>(quintptr(ofs))); } else { - qWarning("Current RHI backend does not support UIntAttributes. Check supported features."); + qWarning("Current RHI backend does not support IntAttributes. Check supported features."); // This is a trick to disable this attribute - if (locationIdx < TRACKED_ATTRIB_COUNT) - enabledAttribArrays[locationIdx] = true; + if (locationIdx < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT) + state.enabledAttribArrays[locationIdx] = true; } } else { f->glVertexAttribPointer(GLuint(locationIdx), size, type, normalize, stride, reinterpret_cast<const GLvoid *>(quintptr(ofs))); } - if (locationIdx >= TRACKED_ATTRIB_COUNT || !enabledAttribArrays[locationIdx]) { - if (locationIdx < TRACKED_ATTRIB_COUNT) - enabledAttribArrays[locationIdx] = true; + if (locationIdx >= CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT || !state.enabledAttribArrays[locationIdx]) { + if (locationIdx < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT) + state.enabledAttribArrays[locationIdx] = true; f->glEnableVertexAttribArray(GLuint(locationIdx)); } - if (inputBinding->classification() == QRhiVertexInputBinding::PerInstance && caps.instancing) - f->glVertexAttribDivisor(GLuint(locationIdx), GLuint(inputBinding->instanceStepRate())); + if (inputBinding->classification() == QRhiVertexInputBinding::PerInstance && caps.instancing) { + f->glVertexAttribDivisor(GLuint(locationIdx), inputBinding->instanceStepRate()); + if (Q_LIKELY(locationIdx < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT)) + state.nonzeroAttribDivisor[locationIdx] = true; + else + state.maxUntrackedInstancedAttribute = qMax(state.maxUntrackedInstancedAttribute, locationIdx); + state.instancedAttributesUsed = true; + } else if ((locationIdx < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT + && state.nonzeroAttribDivisor[locationIdx]) + || Q_UNLIKELY(locationIdx >= CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT + && locationIdx <= state.maxUntrackedInstancedAttribute)) + { + f->glVertexAttribDivisor(GLuint(locationIdx), 0); + if (locationIdx < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT) + state.nonzeroAttribDivisor[locationIdx] = false; + } } } else { qWarning("No graphics pipeline active for setVertexInput; ignored"); @@ -2287,10 +3220,13 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) } break; case QGles2CommandBuffer::Command::BindIndexBuffer: - indexType = cmd.args.bindIndexBuffer.type; - indexStride = indexType == GL_UNSIGNED_SHORT ? sizeof(quint16) : sizeof(quint32); - indexOffset = cmd.args.bindIndexBuffer.offset; - f->glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, cmd.args.bindIndexBuffer.buffer); + state.indexType = cmd.args.bindIndexBuffer.type; + state.indexStride = state.indexType == GL_UNSIGNED_SHORT ? sizeof(quint16) : sizeof(quint32); + state.indexOffset = cmd.args.bindIndexBuffer.offset; + if (state.currentElementArrayBuffer != cmd.args.bindIndexBuffer.buffer) { + state.currentElementArrayBuffer = cmd.args.bindIndexBuffer.buffer; + f->glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, state.currentElementArrayBuffer); + } break; case QGles2CommandBuffer::Command::Draw: { @@ -2312,32 +3248,32 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) QGles2GraphicsPipeline *psD = QRHI_RES(QGles2GraphicsPipeline, cmd.args.drawIndexed.ps); if (psD) { const GLvoid *ofs = reinterpret_cast<const GLvoid *>( - quintptr(cmd.args.drawIndexed.firstIndex * indexStride + indexOffset)); + quintptr(cmd.args.drawIndexed.firstIndex * state.indexStride + state.indexOffset)); if (cmd.args.drawIndexed.instanceCount == 1 || !caps.instancing) { if (cmd.args.drawIndexed.baseVertex != 0 && caps.baseVertex) { f->glDrawElementsBaseVertex(psD->drawMode, GLsizei(cmd.args.drawIndexed.indexCount), - indexType, + state.indexType, ofs, cmd.args.drawIndexed.baseVertex); } else { f->glDrawElements(psD->drawMode, GLsizei(cmd.args.drawIndexed.indexCount), - indexType, + state.indexType, ofs); } } else { if (cmd.args.drawIndexed.baseVertex != 0 && caps.baseVertex) { f->glDrawElementsInstancedBaseVertex(psD->drawMode, GLsizei(cmd.args.drawIndexed.indexCount), - indexType, + state.indexType, ofs, GLsizei(cmd.args.drawIndexed.instanceCount), cmd.args.drawIndexed.baseVertex); } else { f->glDrawElementsInstanced(psD->drawMode, GLsizei(cmd.args.drawIndexed.indexCount), - indexType, + state.indexType, ofs, GLsizei(cmd.args.drawIndexed.instanceCount)); } @@ -2351,35 +3287,40 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) executeBindGraphicsPipeline(cbD, QRHI_RES(QGles2GraphicsPipeline, cmd.args.bindGraphicsPipeline.ps)); break; case QGles2CommandBuffer::Command::BindShaderResources: - bindShaderResources(cmd.args.bindShaderResources.maybeGraphicsPs, + bindShaderResources(cbD, + cmd.args.bindShaderResources.maybeGraphicsPs, cmd.args.bindShaderResources.maybeComputePs, cmd.args.bindShaderResources.srb, cmd.args.bindShaderResources.dynamicOffsetPairs, cmd.args.bindShaderResources.dynamicOffsetCount); break; case QGles2CommandBuffer::Command::BindFramebuffer: + { + QVarLengthArray<GLenum, 8> bufs; if (cmd.args.bindFramebuffer.fbo) { f->glBindFramebuffer(GL_FRAMEBUFFER, cmd.args.bindFramebuffer.fbo); + const int colorAttCount = cmd.args.bindFramebuffer.colorAttCount; + bufs.append(colorAttCount > 0 ? GL_COLOR_ATTACHMENT0 : GL_NONE); if (caps.maxDrawBuffers > 1) { - const int colorAttCount = cmd.args.bindFramebuffer.colorAttCount; - QVarLengthArray<GLenum, 8> bufs; - for (int i = 0; i < colorAttCount; ++i) + for (int i = 1; i < colorAttCount; ++i) bufs.append(GL_COLOR_ATTACHMENT0 + uint(i)); - f->glDrawBuffers(colorAttCount, bufs.constData()); } } else { f->glBindFramebuffer(GL_FRAMEBUFFER, ctx->defaultFramebufferObject()); - if (caps.maxDrawBuffers > 1) { - GLenum bufs = GL_BACK; - f->glDrawBuffers(1, &bufs); - } + if (cmd.args.bindFramebuffer.stereo && cmd.args.bindFramebuffer.stereoTarget == QRhiSwapChain::RightBuffer) + bufs.append(GL_BACK_RIGHT); + else + bufs.append(caps.gles ? GL_BACK : GL_BACK_LEFT); } - if (caps.srgbCapableDefaultFramebuffer) { + if (caps.hasDrawBuffersFunc) + f->glDrawBuffers(bufs.count(), bufs.constData()); + if (caps.srgbWriteControl) { if (cmd.args.bindFramebuffer.srgb) f->glEnable(GL_FRAMEBUFFER_SRGB); else f->glDisable(GL_FRAMEBUFFER_SRGB); } + } break; case QGles2CommandBuffer::Command::Clear: f->glDisable(GL_SCISSOR_TEST); @@ -2391,20 +3332,22 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) f->glDepthMask(GL_TRUE); f->glClearDepthf(cmd.args.clear.d); } - if (cmd.args.clear.mask & GL_STENCIL_BUFFER_BIT) + if (cmd.args.clear.mask & GL_STENCIL_BUFFER_BIT) { + f->glStencilMask(0xFF); f->glClearStencil(GLint(cmd.args.clear.s)); + } f->glClear(cmd.args.clear.mask); cbD->graphicsPassState.reset(); // altered depth/color write, invalidate in order to avoid confusing the state tracking break; case QGles2CommandBuffer::Command::BufferSubData: - f->glBindBuffer(cmd.args.bufferSubData.target, cmd.args.bufferSubData.buffer); + bindVertexIndexBufferWithStateReset(&state, f, cmd.args.bufferSubData.target, cmd.args.bufferSubData.buffer); f->glBufferSubData(cmd.args.bufferSubData.target, cmd.args.bufferSubData.offset, cmd.args.bufferSubData.size, cmd.args.bufferSubData.data); break; case QGles2CommandBuffer::Command::GetBufferSubData: { - QRhiBufferReadbackResult *result = cmd.args.getBufferSubData.result; - f->glBindBuffer(cmd.args.getBufferSubData.target, cmd.args.getBufferSubData.buffer); + QRhiReadbackResult *result = cmd.args.getBufferSubData.result; + bindVertexIndexBufferWithStateReset(&state, f, cmd.args.getBufferSubData.target, cmd.args.getBufferSubData.buffer); if (caps.gles) { if (caps.properMapBuffer) { void *p = f->glMapBufferRange(cmd.args.getBufferSubData.target, @@ -2433,13 +3376,35 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) GLuint fbo; f->glGenFramebuffers(1, &fbo); f->glBindFramebuffer(GL_FRAMEBUFFER, fbo); - f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - cmd.args.copyTex.srcFaceTarget, cmd.args.copyTex.srcTexture, cmd.args.copyTex.srcLevel); + if (cmd.args.copyTex.srcTarget == GL_TEXTURE_3D + || cmd.args.copyTex.srcTarget == GL_TEXTURE_2D_ARRAY + || cmd.args.copyTex.srcTarget == GL_TEXTURE_1D_ARRAY) { + f->glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, cmd.args.copyTex.srcTexture, + cmd.args.copyTex.srcLevel, cmd.args.copyTex.srcZ); + } else if (cmd.args.copyTex.srcTarget == GL_TEXTURE_1D) { + glFramebufferTexture1D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + cmd.args.copyTex.srcTarget, cmd.args.copyTex.srcTexture, + cmd.args.copyTex.srcLevel); + } else { + f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + cmd.args.copyTex.srcFaceTarget, cmd.args.copyTex.srcTexture, cmd.args.copyTex.srcLevel); + } f->glBindTexture(cmd.args.copyTex.dstTarget, cmd.args.copyTex.dstTexture); - f->glCopyTexSubImage2D(cmd.args.copyTex.dstFaceTarget, cmd.args.copyTex.dstLevel, - cmd.args.copyTex.dstX, cmd.args.copyTex.dstY, - cmd.args.copyTex.srcX, cmd.args.copyTex.srcY, - cmd.args.copyTex.w, cmd.args.copyTex.h); + if (cmd.args.copyTex.dstTarget == GL_TEXTURE_3D || cmd.args.copyTex.dstTarget == GL_TEXTURE_2D_ARRAY) { + f->glCopyTexSubImage3D(cmd.args.copyTex.dstTarget, cmd.args.copyTex.dstLevel, + cmd.args.copyTex.dstX, cmd.args.copyTex.dstY, cmd.args.copyTex.dstZ, + cmd.args.copyTex.srcX, cmd.args.copyTex.srcY, + cmd.args.copyTex.w, cmd.args.copyTex.h); + } else if (cmd.args.copyTex.dstTarget == GL_TEXTURE_1D) { + glCopyTexSubImage1D(cmd.args.copyTex.dstTarget, cmd.args.copyTex.dstLevel, + cmd.args.copyTex.dstX, cmd.args.copyTex.srcX, + cmd.args.copyTex.srcY, cmd.args.copyTex.w); + } else { + f->glCopyTexSubImage2D(cmd.args.copyTex.dstFaceTarget, cmd.args.copyTex.dstLevel, + cmd.args.copyTex.dstX, cmd.args.copyTex.dstY, + cmd.args.copyTex.srcX, cmd.args.copyTex.srcY, + cmd.args.copyTex.w, cmd.args.copyTex.h); + } f->glBindFramebuffer(GL_FRAMEBUFFER, ctx->defaultFramebufferObject()); f->glDeleteFramebuffers(1, &fbo); } @@ -2457,8 +3422,16 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) if (mipLevel == 0 || caps.nonBaseLevelFramebufferTexture) { f->glGenFramebuffers(1, &fbo); f->glBindFramebuffer(GL_FRAMEBUFFER, fbo); - f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - cmd.args.readPixels.readTarget, cmd.args.readPixels.texture, mipLevel); + if (cmd.args.readPixels.slice3D >= 0) { + f->glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + tex, mipLevel, cmd.args.readPixels.slice3D); + } else if (cmd.args.readPixels.readTarget == GL_TEXTURE_1D) { + glFramebufferTexture1D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + cmd.args.readPixels.readTarget, tex, mipLevel); + } else { + f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + cmd.args.readPixels.readTarget, tex, mipLevel); + } } } else { result->pixelSize = currentSwapChain->pixelSize; @@ -2488,8 +3461,35 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) } } } else { - result->data.resize(w * h * 4); - f->glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, result->data.data()); + switch (result->format) { + // For floating point formats try it because this can be + // relevant for some use cases; if it works, then fine, if + // not, there's nothing we can do. + case QRhiTexture::RGBA16F: + result->data.resize(w * h * 8); + f->glReadPixels(0, 0, w, h, GL_RGBA, GL_HALF_FLOAT, result->data.data()); + break; + case QRhiTexture::R16F: + result->data.resize(w * h * 2); + f->glReadPixels(0, 0, w, h, GL_RED, GL_HALF_FLOAT, result->data.data()); + break; + case QRhiTexture::R32F: + result->data.resize(w * h * 4); + f->glReadPixels(0, 0, w, h, GL_RED, GL_FLOAT, result->data.data()); + break; + case QRhiTexture::RGBA32F: + result->data.resize(w * h * 16); + f->glReadPixels(0, 0, w, h, GL_RGBA, GL_FLOAT, result->data.data()); + break; + case QRhiTexture::RGB10A2: + result->data.resize(w * h * 4); + f->glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, result->data.data()); + break; + default: + result->data.resize(w * h * 4); + f->glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, result->data.data()); + break; + } } } else { result->data.resize(w * h * 4); @@ -2507,44 +3507,196 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) f->glBindTexture(cmd.args.subImage.target, cmd.args.subImage.texture); if (cmd.args.subImage.rowStartAlign != 4) f->glPixelStorei(GL_UNPACK_ALIGNMENT, cmd.args.subImage.rowStartAlign); - f->glTexSubImage2D(cmd.args.subImage.faceTarget, cmd.args.subImage.level, - cmd.args.subImage.dx, cmd.args.subImage.dy, - cmd.args.subImage.w, cmd.args.subImage.h, - cmd.args.subImage.glformat, cmd.args.subImage.gltype, - cmd.args.subImage.data); + if (cmd.args.subImage.rowLength != 0) + f->glPixelStorei(GL_UNPACK_ROW_LENGTH, cmd.args.subImage.rowLength); + if (cmd.args.subImage.target == GL_TEXTURE_3D || cmd.args.subImage.target == GL_TEXTURE_2D_ARRAY) { + f->glTexSubImage3D(cmd.args.subImage.target, cmd.args.subImage.level, + cmd.args.subImage.dx, cmd.args.subImage.dy, cmd.args.subImage.dz, + cmd.args.subImage.w, cmd.args.subImage.h, 1, + cmd.args.subImage.glformat, cmd.args.subImage.gltype, + cmd.args.subImage.data); + } else if (cmd.args.subImage.target == GL_TEXTURE_1D) { + glTexSubImage1D(cmd.args.subImage.target, cmd.args.subImage.level, + cmd.args.subImage.dx, cmd.args.subImage.w, + cmd.args.subImage.glformat, cmd.args.subImage.gltype, + cmd.args.subImage.data); + } else { + f->glTexSubImage2D(cmd.args.subImage.faceTarget, cmd.args.subImage.level, + cmd.args.subImage.dx, cmd.args.subImage.dy, + cmd.args.subImage.w, cmd.args.subImage.h, + cmd.args.subImage.glformat, cmd.args.subImage.gltype, + cmd.args.subImage.data); + } if (cmd.args.subImage.rowStartAlign != 4) f->glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + if (cmd.args.subImage.rowLength != 0) + f->glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); break; case QGles2CommandBuffer::Command::CompressedImage: f->glBindTexture(cmd.args.compressedImage.target, cmd.args.compressedImage.texture); - f->glCompressedTexImage2D(cmd.args.compressedImage.faceTarget, cmd.args.compressedImage.level, - cmd.args.compressedImage.glintformat, - cmd.args.compressedImage.w, cmd.args.compressedImage.h, 0, - cmd.args.compressedImage.size, cmd.args.compressedImage.data); + if (cmd.args.compressedImage.target == GL_TEXTURE_3D || cmd.args.compressedImage.target == GL_TEXTURE_2D_ARRAY) { + f->glCompressedTexImage3D(cmd.args.compressedImage.target, cmd.args.compressedImage.level, + cmd.args.compressedImage.glintformat, + cmd.args.compressedImage.w, cmd.args.compressedImage.h, cmd.args.compressedImage.depth, + 0, cmd.args.compressedImage.size, cmd.args.compressedImage.data); + } else if (cmd.args.compressedImage.target == GL_TEXTURE_1D) { + glCompressedTexImage1D( + cmd.args.compressedImage.target, cmd.args.compressedImage.level, + cmd.args.compressedImage.glintformat, cmd.args.compressedImage.w, 0, + cmd.args.compressedImage.size, cmd.args.compressedImage.data); + } else { + f->glCompressedTexImage2D(cmd.args.compressedImage.faceTarget, cmd.args.compressedImage.level, + cmd.args.compressedImage.glintformat, + cmd.args.compressedImage.w, cmd.args.compressedImage.h, + 0, cmd.args.compressedImage.size, cmd.args.compressedImage.data); + } break; case QGles2CommandBuffer::Command::CompressedSubImage: f->glBindTexture(cmd.args.compressedSubImage.target, cmd.args.compressedSubImage.texture); - f->glCompressedTexSubImage2D(cmd.args.compressedSubImage.faceTarget, cmd.args.compressedSubImage.level, - cmd.args.compressedSubImage.dx, cmd.args.compressedSubImage.dy, - cmd.args.compressedSubImage.w, cmd.args.compressedSubImage.h, - cmd.args.compressedSubImage.glintformat, - cmd.args.compressedSubImage.size, cmd.args.compressedSubImage.data); + if (cmd.args.compressedSubImage.target == GL_TEXTURE_3D || cmd.args.compressedSubImage.target == GL_TEXTURE_2D_ARRAY) { + f->glCompressedTexSubImage3D(cmd.args.compressedSubImage.target, cmd.args.compressedSubImage.level, + cmd.args.compressedSubImage.dx, cmd.args.compressedSubImage.dy, cmd.args.compressedSubImage.dz, + cmd.args.compressedSubImage.w, cmd.args.compressedSubImage.h, 1, + cmd.args.compressedSubImage.glintformat, + cmd.args.compressedSubImage.size, cmd.args.compressedSubImage.data); + } else if (cmd.args.compressedImage.target == GL_TEXTURE_1D) { + glCompressedTexSubImage1D( + cmd.args.compressedSubImage.target, cmd.args.compressedSubImage.level, + cmd.args.compressedSubImage.dx, cmd.args.compressedSubImage.w, + cmd.args.compressedSubImage.glintformat, cmd.args.compressedSubImage.size, + cmd.args.compressedSubImage.data); + } else { + f->glCompressedTexSubImage2D(cmd.args.compressedSubImage.faceTarget, cmd.args.compressedSubImage.level, + cmd.args.compressedSubImage.dx, cmd.args.compressedSubImage.dy, + cmd.args.compressedSubImage.w, cmd.args.compressedSubImage.h, + cmd.args.compressedSubImage.glintformat, + cmd.args.compressedSubImage.size, cmd.args.compressedSubImage.data); + } break; case QGles2CommandBuffer::Command::BlitFromRenderbuffer: { + // Altering the scissor state, so reset the stored state, although + // not strictly required as long as blit is done in endPass() only. + cbD->graphicsPassState.reset(); + f->glDisable(GL_SCISSOR_TEST); GLuint fbo[2]; f->glGenFramebuffers(2, fbo); f->glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo[0]); - f->glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_RENDERBUFFER, cmd.args.blitFromRb.renderbuffer); + const bool ds = cmd.args.blitFromRenderbuffer.isDepthStencil; + if (ds) { + f->glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, cmd.args.blitFromRenderbuffer.renderbuffer); + f->glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + GL_RENDERBUFFER, cmd.args.blitFromRenderbuffer.renderbuffer); + } else { + f->glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, cmd.args.blitFromRenderbuffer.renderbuffer); + } f->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo[1]); - - f->glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, cmd.args.blitFromRb.target, - cmd.args.blitFromRb.texture, cmd.args.blitFromRb.dstLevel); - f->glBlitFramebuffer(0, 0, cmd.args.blitFromRb.w, cmd.args.blitFromRb.h, - 0, 0, cmd.args.blitFromRb.w, cmd.args.blitFromRb.h, - GL_COLOR_BUFFER_BIT, - GL_LINEAR); + if (cmd.args.blitFromRenderbuffer.target == GL_TEXTURE_3D || cmd.args.blitFromRenderbuffer.target == GL_TEXTURE_2D_ARRAY) { + if (ds) { + f->glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + cmd.args.blitFromRenderbuffer.dstTexture, + cmd.args.blitFromRenderbuffer.dstLevel, + cmd.args.blitFromRenderbuffer.dstLayer); + f->glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + cmd.args.blitFromRenderbuffer.dstTexture, + cmd.args.blitFromRenderbuffer.dstLevel, + cmd.args.blitFromRenderbuffer.dstLayer); + } else { + f->glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + cmd.args.blitFromRenderbuffer.dstTexture, + cmd.args.blitFromRenderbuffer.dstLevel, + cmd.args.blitFromRenderbuffer.dstLayer); + } + } else { + if (ds) { + f->glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, cmd.args.blitFromRenderbuffer.target, + cmd.args.blitFromRenderbuffer.dstTexture, cmd.args.blitFromRenderbuffer.dstLevel); + f->glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, cmd.args.blitFromRenderbuffer.target, + cmd.args.blitFromRenderbuffer.dstTexture, cmd.args.blitFromRenderbuffer.dstLevel); + } else { + f->glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, cmd.args.blitFromRenderbuffer.target, + cmd.args.blitFromRenderbuffer.dstTexture, cmd.args.blitFromRenderbuffer.dstLevel); + } + } + f->glBlitFramebuffer(0, 0, cmd.args.blitFromRenderbuffer.w, cmd.args.blitFromRenderbuffer.h, + 0, 0, cmd.args.blitFromRenderbuffer.w, cmd.args.blitFromRenderbuffer.h, + ds ? GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT : GL_COLOR_BUFFER_BIT, + GL_NEAREST); // Qt 5 used Nearest when resolving samples, stick to that + f->glBindFramebuffer(GL_FRAMEBUFFER, ctx->defaultFramebufferObject()); + f->glDeleteFramebuffers(2, fbo); + } + break; + case QGles2CommandBuffer::Command::BlitFromTexture: + { + // Altering the scissor state, so reset the stored state, although + // not strictly required as long as blit is done in endPass() only. + cbD->graphicsPassState.reset(); + f->glDisable(GL_SCISSOR_TEST); + GLuint fbo[2]; + f->glGenFramebuffers(2, fbo); + f->glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo[0]); + const bool ds = cmd.args.blitFromTexture.isDepthStencil; + if (cmd.args.blitFromTexture.srcTarget == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { + if (ds) { + f->glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + cmd.args.blitFromTexture.srcTexture, + cmd.args.blitFromTexture.srcLevel, + cmd.args.blitFromTexture.srcLayer); + f->glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + cmd.args.blitFromTexture.srcTexture, + cmd.args.blitFromTexture.srcLevel, + cmd.args.blitFromTexture.srcLayer); + } else { + f->glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + cmd.args.blitFromTexture.srcTexture, + cmd.args.blitFromTexture.srcLevel, + cmd.args.blitFromTexture.srcLayer); + } + } else { + if (ds) { + f->glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, cmd.args.blitFromTexture.srcTarget, + cmd.args.blitFromTexture.srcTexture, cmd.args.blitFromTexture.srcLevel); + f->glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, cmd.args.blitFromTexture.srcTarget, + cmd.args.blitFromTexture.srcTexture, cmd.args.blitFromTexture.srcLevel); + } else { + f->glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, cmd.args.blitFromTexture.srcTarget, + cmd.args.blitFromTexture.srcTexture, cmd.args.blitFromTexture.srcLevel); + } + } + f->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo[1]); + if (cmd.args.blitFromTexture.dstTarget == GL_TEXTURE_3D || cmd.args.blitFromTexture.dstTarget == GL_TEXTURE_2D_ARRAY) { + if (ds) { + f->glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + cmd.args.blitFromTexture.dstTexture, + cmd.args.blitFromTexture.dstLevel, + cmd.args.blitFromTexture.dstLayer); + f->glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + cmd.args.blitFromTexture.dstTexture, + cmd.args.blitFromTexture.dstLevel, + cmd.args.blitFromTexture.dstLayer); + } else { + f->glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + cmd.args.blitFromTexture.dstTexture, + cmd.args.blitFromTexture.dstLevel, + cmd.args.blitFromTexture.dstLayer); + } + } else { + if (ds) { + f->glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, cmd.args.blitFromTexture.dstTarget, + cmd.args.blitFromTexture.dstTexture, cmd.args.blitFromTexture.dstLevel); + f->glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, cmd.args.blitFromTexture.dstTarget, + cmd.args.blitFromTexture.dstTexture, cmd.args.blitFromTexture.dstLevel); + } else { + f->glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, cmd.args.blitFromTexture.dstTarget, + cmd.args.blitFromTexture.dstTexture, cmd.args.blitFromTexture.dstLevel); + } + } + f->glBlitFramebuffer(0, 0, cmd.args.blitFromTexture.w, cmd.args.blitFromTexture.h, + 0, 0, cmd.args.blitFromTexture.w, cmd.args.blitFromTexture.h, + ds ? GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT : GL_COLOR_BUFFER_BIT, + GL_NEAREST); // Qt 5 used Nearest when resolving samples, stick to that f->glBindFramebuffer(GL_FRAMEBUFFER, ctx->defaultFramebufferObject()); f->glDeleteFramebuffers(2, fbo); } @@ -2564,6 +3716,8 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) break; case QGles2CommandBuffer::Command::BarriersForPass: { + if (!caps.compute) + break; GLbitfield barriers = 0; QRhiPassResourceTracker &tracker(cbD->passResTrackers[cmd.args.barriersForPass.trackerIndex]); // we only care about after-write, not any other accesses, and @@ -2576,14 +3730,14 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) for (auto it = tracker.cbeginBuffers(), itEnd = tracker.cendBuffers(); it != itEnd; ++it) { QGles2Buffer::Access accessBeforePass = QGles2Buffer::Access(it->stateAtPassBegin.access); if (bufferAccessIsWrite(accessBeforePass)) - barriers |= GL_ALL_BARRIER_BITS; + barriers |= barriersForBuffer(); } for (auto it = tracker.cbeginTextures(), itEnd = tracker.cendTextures(); it != itEnd; ++it) { QGles2Texture::Access accessBeforePass = QGles2Texture::Access(it->stateAtPassBegin.access); if (textureAccessIsWrite(accessBeforePass)) - barriers |= GL_ALL_BARRIER_BITS; + barriers |= barriersForTexture(); } - if (barriers && caps.compute) + if (barriers) f->glMemoryBarrier(barriers); } break; @@ -2591,10 +3745,25 @@ void QRhiGles2::executeCommandBuffer(QRhiCommandBuffer *cb) if (caps.compute) f->glMemoryBarrier(cmd.args.barrier.barriers); break; + case QGles2CommandBuffer::Command::InvalidateFramebuffer: + if (caps.gles && caps.ctxMajor >= 3) { + f->glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, + cmd.args.invalidateFramebuffer.attCount, + cmd.args.invalidateFramebuffer.att); + } + break; default: break; } } + if (state.instancedAttributesUsed) { + for (int i = 0; i < CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT; ++i) { + if (state.nonzeroAttribDivisor[i]) + f->glVertexAttribDivisor(GLuint(i), 0); + } + for (int i = CommandBufferExecTrackedState::TRACKED_ATTRIB_COUNT; i <= state.maxUntrackedInstancedAttribute; ++i) + f->glVertexAttribDivisor(GLuint(i), 0); + } } void QRhiGles2::executeBindGraphicsPipeline(QGles2CommandBuffer *cbD, QGles2GraphicsPipeline *psD) @@ -2631,6 +3800,12 @@ void QRhiGles2::executeBindGraphicsPipeline(QGles2CommandBuffer *cbD, QGles2Grap f->glFrontFace(frontFace); } + const GLenum polygonMode = toGlPolygonMode(psD->m_polygonMode); + if (glPolygonMode && (forceUpdate || polygonMode != state.polygonMode)) { + state.polygonMode = polygonMode; + glPolygonMode(GL_FRONT_AND_BACK, polygonMode); + } + if (!psD->m_targetBlends.isEmpty()) { // We do not have MRT support here, meaning all targets use the blend // params from the first one. This is technically incorrect, even if @@ -2771,12 +3946,21 @@ void QRhiGles2::executeBindGraphicsPipeline(QGles2CommandBuffer *cbD, QGles2Grap } } + if (psD->m_topology == QRhiGraphicsPipeline::Patches) { + const int cpCount = psD->m_patchControlPointCount; + if (forceUpdate || cpCount != state.cpCount) { + state.cpCount = cpCount; + f->glPatchParameteri(GL_PATCH_VERTICES, qMax(1, cpCount)); + } + } + f->glUseProgram(psD->program); } -static inline void qrhi_std140_to_packed(float *dst, int vecSize, int elemCount, const void *src) +template <typename T> +static inline void qrhi_std140_to_packed(T *dst, int vecSize, int elemCount, const void *src) { - const float *p = reinterpret_cast<const float *>(src); + const T *p = reinterpret_cast<const T *>(src); for (int i = 0; i < elemCount; ++i) { for (int j = 0; j < vecSize; ++j) dst[vecSize * i + j] = *p++; @@ -2784,53 +3968,129 @@ static inline void qrhi_std140_to_packed(float *dst, int vecSize, int elemCount, } } -void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiComputePipeline *maybeComputePs, +void QRhiGles2::bindCombinedSampler(QGles2CommandBuffer *cbD, QGles2Texture *texD, QGles2Sampler *samplerD, + void *ps, uint psGeneration, int glslLocation, + int *texUnit, bool *activeTexUnitAltered) +{ + const bool samplerStateValid = texD->samplerState == samplerD->d; + const bool cachedStateInRange = *texUnit < 16; + bool updateTextureBinding = true; + if (samplerStateValid && cachedStateInRange) { + // If we already encountered the same texture with + // the same pipeline for this texture unit in the + // current pass, then the shader program already + // has the uniform set. As in a 3D scene one model + // often has more than one associated texture map, + // the savings here can become significant, + // depending on the scene. + if (cbD->textureUnitState[*texUnit].ps == ps + && cbD->textureUnitState[*texUnit].psGeneration == psGeneration + && cbD->textureUnitState[*texUnit].texture == texD->texture) + { + updateTextureBinding = false; + } + } + if (updateTextureBinding) { + f->glActiveTexture(GL_TEXTURE0 + uint(*texUnit)); + *activeTexUnitAltered = true; + f->glBindTexture(texD->target, texD->texture); + f->glUniform1i(glslLocation, *texUnit); + if (cachedStateInRange) { + cbD->textureUnitState[*texUnit].ps = ps; + cbD->textureUnitState[*texUnit].psGeneration = psGeneration; + cbD->textureUnitState[*texUnit].texture = texD->texture; + } + } + ++(*texUnit); + if (!samplerStateValid) { + f->glTexParameteri(texD->target, GL_TEXTURE_MIN_FILTER, GLint(samplerD->d.glminfilter)); + f->glTexParameteri(texD->target, GL_TEXTURE_MAG_FILTER, GLint(samplerD->d.glmagfilter)); + f->glTexParameteri(texD->target, GL_TEXTURE_WRAP_S, GLint(samplerD->d.glwraps)); + f->glTexParameteri(texD->target, GL_TEXTURE_WRAP_T, GLint(samplerD->d.glwrapt)); + if (caps.texture3D) + f->glTexParameteri(texD->target, GL_TEXTURE_WRAP_R, GLint(samplerD->d.glwrapr)); + if (caps.textureCompareMode) { + if (samplerD->d.gltexcomparefunc != GL_NEVER) { + f->glTexParameteri(texD->target, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); + f->glTexParameteri(texD->target, GL_TEXTURE_COMPARE_FUNC, GLint(samplerD->d.gltexcomparefunc)); + } else { + f->glTexParameteri(texD->target, GL_TEXTURE_COMPARE_MODE, GL_NONE); + } + } + texD->samplerState = samplerD->d; + } +} + +void QRhiGles2::bindShaderResources(QGles2CommandBuffer *cbD, + QRhiGraphicsPipeline *maybeGraphicsPs, QRhiComputePipeline *maybeComputePs, QRhiShaderResourceBindings *srb, const uint *dynOfsPairs, int dynOfsCount) { QGles2ShaderResourceBindings *srbD = QRHI_RES(QGles2ShaderResourceBindings, srb); - int texUnit = 0; - QVarLengthArray<float, 256> packedFloatArray; + int texUnit = 1; // start from unit 1, keep 0 for resource mgmt stuff to avoid clashes + bool activeTexUnitAltered = false; + union data32_t { + float f; + qint32 i; + }; + QVarLengthArray<data32_t, 256> packedArray; + QGles2UniformDescriptionVector &uniforms(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniforms + : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniforms); + QGles2UniformState *uniformState = maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniformState + : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniformState; + struct SeparateTexture { + QGles2Texture *texture; + int binding; + int elem; + }; + QVarLengthArray<SeparateTexture, 8> separateTextureBindings; + struct SeparateSampler { + QGles2Sampler *sampler; + int binding; + }; + QVarLengthArray<SeparateSampler, 4> separateSamplerBindings; - for (int i = 0, ie = srbD->m_bindings.count(); i != ie; ++i) { - const QRhiShaderResourceBinding::Data *b = srbD->m_bindings.at(i).data(); + for (int i = 0, ie = srbD->m_bindings.size(); i != ie; ++i) { + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->m_bindings.at(i)); switch (b->type) { case QRhiShaderResourceBinding::UniformBuffer: { int viewOffset = b->u.ubuf.offset; - if (dynOfsCount) { - for (int j = 0; j < dynOfsCount; ++j) { - if (dynOfsPairs[2 * j] == uint(b->binding)) { - viewOffset = int(dynOfsPairs[2 * j + 1]); - break; - } + for (int j = 0; j < dynOfsCount; ++j) { + if (dynOfsPairs[2 * j] == uint(b->binding)) { + viewOffset = int(dynOfsPairs[2 * j + 1]); + break; } } QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, b->u.ubuf.buf); - const QByteArray bufView = QByteArray::fromRawData(bufD->ubuf.constData() + viewOffset, - b->u.ubuf.maybeSize ? b->u.ubuf.maybeSize : bufD->m_size); - QGles2UniformDescriptionVector &uniforms(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->uniforms - : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->uniforms); - for (QGles2UniformDescription &uniform : uniforms) { + const char *bufView = bufD->data.constData() + viewOffset; + for (const QGles2UniformDescription &uniform : std::as_const(uniforms)) { if (uniform.binding == b->binding) { // in a uniform buffer everything is at least 4 byte aligned // so this should not cause unaligned reads - const void *src = bufView.constData() + uniform.offset; + const void *src = bufView + uniform.offset; +#ifndef QT_NO_DEBUG if (uniform.arrayDim > 0 && uniform.type != QShaderDescription::Float && uniform.type != QShaderDescription::Vec2 && uniform.type != QShaderDescription::Vec3 && uniform.type != QShaderDescription::Vec4 + && uniform.type != QShaderDescription::Int + && uniform.type != QShaderDescription::Int2 + && uniform.type != QShaderDescription::Int3 + && uniform.type != QShaderDescription::Int4 && uniform.type != QShaderDescription::Mat3 && uniform.type != QShaderDescription::Mat4) { qWarning("Uniform with buffer binding %d, buffer offset %d, type %d is an array, " - "but arrays are only supported for float, vec2, vec3, vec4, mat3 and mat4. " + "but arrays are only supported for float, vec2, vec3, vec4, int, " + "ivec2, ivec3, ivec4, mat3 and mat4. " "Only the first element will be set.", uniform.binding, uniform.offset, uniform.type); } +#endif // Our input is an std140 layout uniform block. See // "Standard Uniform Block Layout" in section 7.6.2.2 of @@ -2843,12 +4103,22 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform1f(uniform.glslLocation, *reinterpret_cast<const float *>(src)); + const float v = *reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 1 || thisUniformState.v[0] != v) { + thisUniformState.componentCount = 1; + thisUniformState.v[0] = v; + f->glUniform1f(uniform.glslLocation, v); + } + } else { + f->glUniform1f(uniform.glslLocation, v); + } } else { // input is 16 bytes per element as per std140, have to convert to packed - packedFloatArray.resize(elemCount); - qrhi_std140_to_packed(packedFloatArray.data(), 1, elemCount, src); - f->glUniform1fv(uniform.glslLocation, elemCount, packedFloatArray.constData()); + packedArray.resize(elemCount); + qrhi_std140_to_packed(&packedArray.data()->f, 1, elemCount, src); + f->glUniform1fv(uniform.glslLocation, elemCount, &packedArray.constData()->f); } } break; @@ -2856,11 +4126,25 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform2fv(uniform.glslLocation, 1, reinterpret_cast<const float *>(src)); + const float *v = reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 2 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1]) + { + thisUniformState.componentCount = 2; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + f->glUniform2fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform2fv(uniform.glslLocation, 1, v); + } } else { - packedFloatArray.resize(elemCount * 2); - qrhi_std140_to_packed(packedFloatArray.data(), 2, elemCount, src); - f->glUniform2fv(uniform.glslLocation, elemCount, packedFloatArray.constData()); + packedArray.resize(elemCount * 2); + qrhi_std140_to_packed(&packedArray.data()->f, 2, elemCount, src); + f->glUniform2fv(uniform.glslLocation, elemCount, &packedArray.constData()->f); } } break; @@ -2868,16 +4152,57 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC { const int elemCount = uniform.arrayDim; if (elemCount < 1) { - f->glUniform3fv(uniform.glslLocation, 1, reinterpret_cast<const float *>(src)); + const float *v = reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 3 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1] + || thisUniformState.v[2] != v[2]) + { + thisUniformState.componentCount = 3; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + thisUniformState.v[2] = v[2]; + f->glUniform3fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform3fv(uniform.glslLocation, 1, v); + } } else { - packedFloatArray.resize(elemCount * 3); - qrhi_std140_to_packed(packedFloatArray.data(), 3, elemCount, src); - f->glUniform3fv(uniform.glslLocation, elemCount, packedFloatArray.constData()); + packedArray.resize(elemCount * 3); + qrhi_std140_to_packed(&packedArray.data()->f, 3, elemCount, src); + f->glUniform3fv(uniform.glslLocation, elemCount, &packedArray.constData()->f); } } break; case QShaderDescription::Vec4: - f->glUniform4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast<const float *>(src)); + { + const int elemCount = uniform.arrayDim; + if (elemCount < 1) { + const float *v = reinterpret_cast<const float *>(src); + if (uniform.glslLocation <= QGles2UniformState::MAX_TRACKED_LOCATION) { + QGles2UniformState &thisUniformState(uniformState[uniform.glslLocation]); + if (thisUniformState.componentCount != 4 + || thisUniformState.v[0] != v[0] + || thisUniformState.v[1] != v[1] + || thisUniformState.v[2] != v[2] + || thisUniformState.v[3] != v[3]) + { + thisUniformState.componentCount = 4; + thisUniformState.v[0] = v[0]; + thisUniformState.v[1] = v[1]; + thisUniformState.v[2] = v[2]; + thisUniformState.v[3] = v[3]; + f->glUniform4fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform4fv(uniform.glslLocation, 1, v); + } + } else { + f->glUniform4fv(uniform.glslLocation, elemCount, reinterpret_cast<const float *>(src)); + } + } break; case QShaderDescription::Mat2: f->glUniformMatrix2fv(uniform.glslLocation, 1, GL_FALSE, reinterpret_cast<const float *>(src)); @@ -2894,9 +4219,9 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC memcpy(mat + 6, srcMat + 8, 3 * sizeof(float)); f->glUniformMatrix3fv(uniform.glslLocation, 1, GL_FALSE, mat); } else { - packedFloatArray.resize(elemCount * 9); - qrhi_std140_to_packed(packedFloatArray.data(), 3, elemCount * 3, src); - f->glUniformMatrix3fv(uniform.glslLocation, elemCount, GL_FALSE, packedFloatArray.constData()); + packedArray.resize(elemCount * 9); + qrhi_std140_to_packed(&packedArray.data()->f, 3, elemCount * 3, src); + f->glUniformMatrix3fv(uniform.glslLocation, elemCount, GL_FALSE, &packedArray.constData()->f); } } break; @@ -2904,16 +4229,43 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC f->glUniformMatrix4fv(uniform.glslLocation, qMax(1, uniform.arrayDim), GL_FALSE, reinterpret_cast<const float *>(src)); break; case QShaderDescription::Int: - f->glUniform1i(uniform.glslLocation, *reinterpret_cast<const qint32 *>(src)); + { + const int elemCount = uniform.arrayDim; + if (elemCount < 1) { + f->glUniform1i(uniform.glslLocation, *reinterpret_cast<const qint32 *>(src)); + } else { + packedArray.resize(elemCount); + qrhi_std140_to_packed(&packedArray.data()->i, 1, elemCount, src); + f->glUniform1iv(uniform.glslLocation, elemCount, &packedArray.constData()->i); + } + } break; case QShaderDescription::Int2: - f->glUniform2iv(uniform.glslLocation, 1, reinterpret_cast<const qint32 *>(src)); + { + const int elemCount = uniform.arrayDim; + if (elemCount < 1) { + f->glUniform2iv(uniform.glslLocation, 1, reinterpret_cast<const qint32 *>(src)); + } else { + packedArray.resize(elemCount * 2); + qrhi_std140_to_packed(&packedArray.data()->i, 2, elemCount, src); + f->glUniform2iv(uniform.glslLocation, elemCount, &packedArray.constData()->i); + } + } break; case QShaderDescription::Int3: - f->glUniform3iv(uniform.glslLocation, 1, reinterpret_cast<const qint32 *>(src)); + { + const int elemCount = uniform.arrayDim; + if (elemCount < 1) { + f->glUniform3iv(uniform.glslLocation, 1, reinterpret_cast<const qint32 *>(src)); + } else { + packedArray.resize(elemCount * 3); + qrhi_std140_to_packed(&packedArray.data()->i, 3, elemCount, src); + f->glUniform3iv(uniform.glslLocation, elemCount, &packedArray.constData()->i); + } + } break; case QShaderDescription::Int4: - f->glUniform4iv(uniform.glslLocation, 1, reinterpret_cast<const qint32 *>(src)); + f->glUniform4iv(uniform.glslLocation, qMax(1, uniform.arrayDim), reinterpret_cast<const qint32 *>(src)); break; case QShaderDescription::Uint: f->glUniform1ui(uniform.glslLocation, *reinterpret_cast<const quint32 *>(src)); @@ -2950,46 +4302,48 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC break; case QRhiShaderResourceBinding::SampledTexture: { - QGles2SamplerDescriptionVector &samplers(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->samplers - : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->samplers); + const QGles2SamplerDescriptionVector &samplers(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->samplers + : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->samplers); + void *ps; + uint psGeneration; + if (maybeGraphicsPs) { + ps = maybeGraphicsPs; + psGeneration = QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->generation; + } else { + ps = maybeComputePs; + psGeneration = QRHI_RES(QGles2ComputePipeline, maybeComputePs)->generation; + } for (int elem = 0; elem < b->u.stex.count; ++elem) { QGles2Texture *texD = QRHI_RES(QGles2Texture, b->u.stex.texSamplers[elem].tex); QGles2Sampler *samplerD = QRHI_RES(QGles2Sampler, b->u.stex.texSamplers[elem].sampler); - for (QGles2SamplerDescription &sampler : samplers) { - if (sampler.binding == b->binding) { - f->glActiveTexture(GL_TEXTURE0 + uint(texUnit)); - f->glBindTexture(texD->target, texD->texture); - - if (texD->samplerState != samplerD->d) { - f->glTexParameteri(texD->target, GL_TEXTURE_MIN_FILTER, GLint(samplerD->d.glminfilter)); - f->glTexParameteri(texD->target, GL_TEXTURE_MAG_FILTER, GLint(samplerD->d.glmagfilter)); - f->glTexParameteri(texD->target, GL_TEXTURE_WRAP_S, GLint(samplerD->d.glwraps)); - f->glTexParameteri(texD->target, GL_TEXTURE_WRAP_T, GLint(samplerD->d.glwrapt)); - // 3D textures not supported by GLES 2.0 or by us atm... - //f->glTexParameteri(texD->target, GL_TEXTURE_WRAP_R, samplerD->d.glwrapr); - if (caps.textureCompareMode) { - if (samplerD->d.gltexcomparefunc != GL_NEVER) { - f->glTexParameteri(texD->target, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); - f->glTexParameteri(texD->target, GL_TEXTURE_COMPARE_FUNC, GLint(samplerD->d.gltexcomparefunc)); - } else { - f->glTexParameteri(texD->target, GL_TEXTURE_COMPARE_MODE, GL_NONE); - } - } - texD->samplerState = samplerD->d; - } - - f->glUniform1i(sampler.glslLocation + elem, texUnit); - ++texUnit; + for (const QGles2SamplerDescription &shaderSampler : samplers) { + if (shaderSampler.combinedBinding == b->binding) { + const int loc = shaderSampler.glslLocation + elem; + bindCombinedSampler(cbD, texD, samplerD, ps, psGeneration, loc, &texUnit, &activeTexUnitAltered); + break; } } } } break; + case QRhiShaderResourceBinding::Texture: + for (int elem = 0; elem < b->u.stex.count; ++elem) { + QGles2Texture *texD = QRHI_RES(QGles2Texture, b->u.stex.texSamplers[elem].tex); + separateTextureBindings.append({ texD, b->binding, elem }); + } + break; + case QRhiShaderResourceBinding::Sampler: + { + QGles2Sampler *samplerD = QRHI_RES(QGles2Sampler, b->u.stex.texSamplers[0].sampler); + separateSamplerBindings.append({ samplerD, b->binding }); + } + break; case QRhiShaderResourceBinding::ImageLoad: case QRhiShaderResourceBinding::ImageStore: case QRhiShaderResourceBinding::ImageLoadStore: { QGles2Texture *texD = QRHI_RES(QGles2Texture, b->u.simage.tex); + Q_ASSERT(texD->m_flags.testFlag(QRhiTexture::UsedWithLoadStore)); const bool layered = texD->m_flags.testFlag(QRhiTexture::CubeMap); GLenum access = GL_READ_WRITE; if (b->type == QRhiShaderResourceBinding::ImageLoad) @@ -3006,6 +4360,7 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC case QRhiShaderResourceBinding::BufferLoadStore: { QGles2Buffer *bufD = QRHI_RES(QGles2Buffer, b->u.sbuf.buf); + Q_ASSERT(bufD->m_usage.testFlag(QRhiBuffer::StorageBuffer)); if (b->u.sbuf.offset == 0 && b->u.sbuf.maybeSize == 0) f->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, GLuint(b->binding), bufD->buffer); else @@ -3019,7 +4374,36 @@ void QRhiGles2::bindShaderResources(QRhiGraphicsPipeline *maybeGraphicsPs, QRhiC } } - if (texUnit > 1) + if (!separateTextureBindings.isEmpty() || !separateSamplerBindings.isEmpty()) { + const QGles2SamplerDescriptionVector &samplers(maybeGraphicsPs ? QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->samplers + : QRHI_RES(QGles2ComputePipeline, maybeComputePs)->samplers); + void *ps; + uint psGeneration; + if (maybeGraphicsPs) { + ps = maybeGraphicsPs; + psGeneration = QRHI_RES(QGles2GraphicsPipeline, maybeGraphicsPs)->generation; + } else { + ps = maybeComputePs; + psGeneration = QRHI_RES(QGles2ComputePipeline, maybeComputePs)->generation; + } + for (const QGles2SamplerDescription &shaderSampler : samplers) { + if (shaderSampler.combinedBinding >= 0) + continue; + for (const SeparateSampler &sepSampler : separateSamplerBindings) { + if (sepSampler.binding != shaderSampler.sbinding) + continue; + for (const SeparateTexture &sepTex : separateTextureBindings) { + if (sepTex.binding != shaderSampler.tbinding) + continue; + const int loc = shaderSampler.glslLocation + sepTex.elem; + bindCombinedSampler(cbD, sepTex.texture, sepSampler.sampler, ps, psGeneration, + loc, &texUnit, &activeTexUnitAltered); + } + } + } + } + + if (activeTexUnitAltered) f->glActiveTexture(GL_TEXTURE0); } @@ -3036,18 +4420,24 @@ QGles2RenderTargetData *QRhiGles2::enqueueBindFramebuffer(QRhiRenderTarget *rt, QGles2RenderTargetData *rtD = nullptr; QRhiPassResourceTracker &passResTracker(cbD->passResTrackers[cbD->currentPassResTrackerIndex]); - QGles2CommandBuffer::Command fbCmd; + QGles2CommandBuffer::Command &fbCmd(cbD->commands.get()); fbCmd.cmd = QGles2CommandBuffer::Command::BindFramebuffer; + static const bool doClearBuffers = qEnvironmentVariableIntValue("QT_GL_NO_CLEAR_BUFFERS") == 0; + static const bool doClearColorBuffer = qEnvironmentVariableIntValue("QT_GL_NO_CLEAR_COLOR_BUFFER") == 0; + switch (rt->resourceType()) { - case QRhiResource::RenderTarget: - rtD = &QRHI_RES(QGles2ReferenceRenderTarget, rt)->d; + case QRhiResource::SwapChainRenderTarget: + rtD = &QRHI_RES(QGles2SwapChainRenderTarget, rt)->d; if (wantsColorClear) - *wantsColorClear = true; + *wantsColorClear = doClearBuffers && doClearColorBuffer; if (wantsDsClear) - *wantsDsClear = true; + *wantsDsClear = doClearBuffers; fbCmd.args.bindFramebuffer.fbo = 0; fbCmd.args.bindFramebuffer.colorAttCount = 1; + fbCmd.args.bindFramebuffer.stereo = rtD->stereoTarget.has_value(); + if (fbCmd.args.bindFramebuffer.stereo) + fbCmd.args.bindFramebuffer.stereoTarget = rtD->stereoTarget.value(); break; case QRhiResource::TextureRenderTarget: { @@ -3059,6 +4449,7 @@ QGles2RenderTargetData *QRhiGles2::enqueueBindFramebuffer(QRhiRenderTarget *rt, *wantsDsClear = !rtTex->m_flags.testFlag(QRhiTextureRenderTarget::PreserveDepthStencilContents); fbCmd.args.bindFramebuffer.fbo = rtTex->framebuffer; fbCmd.args.bindFramebuffer.colorAttCount = rtD->colorAttCount; + fbCmd.args.bindFramebuffer.stereo = false; for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments(); it != itEnd; ++it) @@ -3066,12 +4457,12 @@ QGles2RenderTargetData *QRhiGles2::enqueueBindFramebuffer(QRhiRenderTarget *rt, const QRhiColorAttachment &colorAtt(*it); QGles2Texture *texD = QRHI_RES(QGles2Texture, colorAtt.texture()); QGles2Texture *resolveTexD = QRHI_RES(QGles2Texture, colorAtt.resolveTexture()); - if (texD) { + if (texD && cbD->passNeedsResourceTracking) { trackedRegisterTexture(&passResTracker, texD, QRhiPassResourceTracker::TexColorOutput, QRhiPassResourceTracker::TexColorOutputStage); } - if (resolveTexD) { + if (resolveTexD && cbD->passNeedsResourceTracking) { trackedRegisterTexture(&passResTracker, resolveTexD, QRhiPassResourceTracker::TexColorOutput, QRhiPassResourceTracker::TexColorOutputStage); @@ -3079,7 +4470,7 @@ QGles2RenderTargetData *QRhiGles2::enqueueBindFramebuffer(QRhiRenderTarget *rt, // renderbuffers cannot be written in shaders (no image store) so // they do not matter here } - if (rtTex->m_desc.depthTexture()) { + if (rtTex->m_desc.depthTexture() && cbD->passNeedsResourceTracking) { trackedRegisterTexture(&passResTracker, QRHI_RES(QGles2Texture, rtTex->m_desc.depthTexture()), QRhiPassResourceTracker::TexDepthOutput, QRhiPassResourceTracker::TexDepthOutputStage); @@ -3092,7 +4483,6 @@ QGles2RenderTargetData *QRhiGles2::enqueueBindFramebuffer(QRhiRenderTarget *rt, } fbCmd.args.bindFramebuffer.srgb = rtD->srgbUpdateAndBlend; - cbD->commands.append(fbCmd); return rtD; } @@ -3100,18 +4490,18 @@ QGles2RenderTargetData *QRhiGles2::enqueueBindFramebuffer(QRhiRenderTarget *rt, void QRhiGles2::enqueueBarriersForPass(QGles2CommandBuffer *cbD) { cbD->passResTrackers.append(QRhiPassResourceTracker()); - cbD->currentPassResTrackerIndex = cbD->passResTrackers.count() - 1; - QGles2CommandBuffer::Command cmd; + cbD->currentPassResTrackerIndex = cbD->passResTrackers.size() - 1; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BarriersForPass; cmd.args.barriersForPass.trackerIndex = cbD->currentPassResTrackerIndex; - cbD->commands.append(cmd); } void QRhiGles2::beginPass(QRhiCommandBuffer *cb, QRhiRenderTarget *rt, const QColor &colorClearValue, const QRhiDepthStencilClearValue &depthStencilClearValue, - QRhiResourceUpdateBatch *resourceUpdates) + QRhiResourceUpdateBatch *resourceUpdates, + QRhiCommandBuffer::BeginPassFlags flags) { QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); @@ -3123,10 +4513,16 @@ void QRhiGles2::beginPass(QRhiCommandBuffer *cb, // glMemoryBarrier() calls based on that tracker when submitted. enqueueBarriersForPass(cbD); + if (rt->resourceType() == QRhiRenderTarget::TextureRenderTarget) { + QGles2TextureRenderTarget *rtTex = QRHI_RES(QGles2TextureRenderTarget, rt); + if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QGles2Texture, QGles2RenderBuffer>(rtTex->description(), rtTex->d.currentResIdList)) + rtTex->create(); + } + bool wantsColorClear, wantsDsClear; QGles2RenderTargetData *rtD = enqueueBindFramebuffer(rt, cbD, &wantsColorClear, &wantsDsClear); - QGles2CommandBuffer::Command clearCmd; + QGles2CommandBuffer::Command &clearCmd(cbD->commands.get()); clearCmd.cmd = QGles2CommandBuffer::Command::Clear; clearCmd.args.clear.mask = 0; if (rtD->colorAttCount && wantsColorClear) @@ -3139,9 +4535,9 @@ void QRhiGles2::beginPass(QRhiCommandBuffer *cb, clearCmd.args.clear.c[3] = float(colorClearValue.alphaF()); clearCmd.args.clear.d = depthStencilClearValue.depthClearValue(); clearCmd.args.clear.s = depthStencilClearValue.stencilClearValue(); - cbD->commands.append(clearCmd); cbD->recordingPass = QGles2CommandBuffer::RenderPass; + cbD->passNeedsResourceTracking = !flags.testFlag(QRhiCommandBuffer::DoNotTrackResourcesForCompute); cbD->currentTarget = rt; cbD->resetCachedState(); @@ -3154,29 +4550,129 @@ void QRhiGles2::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resource if (cbD->currentTarget->resourceType() == QRhiResource::TextureRenderTarget) { QGles2TextureRenderTarget *rtTex = QRHI_RES(QGles2TextureRenderTarget, cbD->currentTarget); - if (rtTex->m_desc.cbeginColorAttachments() != rtTex->m_desc.cendColorAttachments()) { - // handle only 1 color attachment and only (msaa) renderbuffer - const QRhiColorAttachment &colorAtt(*rtTex->m_desc.cbeginColorAttachments()); - if (colorAtt.resolveTexture()) { - Q_ASSERT(colorAtt.renderBuffer()); + for (auto it = rtTex->m_desc.cbeginColorAttachments(), itEnd = rtTex->m_desc.cendColorAttachments(); + it != itEnd; ++it) + { + const QRhiColorAttachment &colorAtt(*it); + if (!colorAtt.resolveTexture()) + continue; + + QGles2Texture *resolveTexD = QRHI_RES(QGles2Texture, colorAtt.resolveTexture()); + const QSize size = resolveTexD->pixelSize(); + if (colorAtt.renderBuffer()) { QGles2RenderBuffer *rbD = QRHI_RES(QGles2RenderBuffer, colorAtt.renderBuffer()); - const QSize size = colorAtt.resolveTexture()->pixelSize(); if (rbD->pixelSize() != size) { qWarning("Resolve source (%dx%d) and target (%dx%d) size does not match", rbD->pixelSize().width(), rbD->pixelSize().height(), size.width(), size.height()); } - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BlitFromRenderbuffer; - cmd.args.blitFromRb.renderbuffer = rbD->renderbuffer; - cmd.args.blitFromRb.w = size.width(); - cmd.args.blitFromRb.h = size.height(); - QGles2Texture *colorTexD = QRHI_RES(QGles2Texture, colorAtt.resolveTexture()); - const GLenum faceTargetBase = colorTexD->m_flags.testFlag(QRhiTexture::CubeMap) ? GL_TEXTURE_CUBE_MAP_POSITIVE_X - : colorTexD->target; - cmd.args.blitFromRb.target = faceTargetBase + uint(colorAtt.resolveLayer()); - cmd.args.blitFromRb.texture = colorTexD->texture; - cmd.args.blitFromRb.dstLevel = colorAtt.resolveLevel(); - cbD->commands.append(cmd); + cmd.args.blitFromRenderbuffer.renderbuffer = rbD->renderbuffer; + cmd.args.blitFromRenderbuffer.w = size.width(); + cmd.args.blitFromRenderbuffer.h = size.height(); + if (resolveTexD->m_flags.testFlag(QRhiTexture::CubeMap)) + cmd.args.blitFromRenderbuffer.target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + uint(colorAtt.resolveLayer()); + else + cmd.args.blitFromRenderbuffer.target = resolveTexD->target; + cmd.args.blitFromRenderbuffer.dstTexture = resolveTexD->texture; + cmd.args.blitFromRenderbuffer.dstLevel = colorAtt.resolveLevel(); + const bool hasZ = resolveTexD->m_flags.testFlag(QRhiTexture::ThreeDimensional) + || resolveTexD->m_flags.testFlag(QRhiTexture::TextureArray); + cmd.args.blitFromRenderbuffer.dstLayer = hasZ ? colorAtt.resolveLayer() : 0; + cmd.args.blitFromRenderbuffer.isDepthStencil = false; + } else if (caps.glesMultisampleRenderToTexture) { + // Nothing to do, resolving into colorAtt.resolveTexture() is automatic, + // colorAtt.texture() is in fact not used for anything. + } else { + Q_ASSERT(colorAtt.texture()); + QGles2Texture *texD = QRHI_RES(QGles2Texture, colorAtt.texture()); + if (texD->pixelSize() != size) { + qWarning("Resolve source (%dx%d) and target (%dx%d) size does not match", + texD->pixelSize().width(), texD->pixelSize().height(), size.width(), size.height()); + } + const int resolveCount = colorAtt.multiViewCount() >= 2 ? colorAtt.multiViewCount() : 1; + for (int resolveIdx = 0; resolveIdx < resolveCount; ++resolveIdx) { + const int srcLayer = colorAtt.layer() + resolveIdx; + const int dstLayer = colorAtt.resolveLayer() + resolveIdx; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); + cmd.cmd = QGles2CommandBuffer::Command::BlitFromTexture; + if (texD->m_flags.testFlag(QRhiTexture::CubeMap)) + cmd.args.blitFromTexture.srcTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + uint(srcLayer); + else + cmd.args.blitFromTexture.srcTarget = texD->target; + cmd.args.blitFromTexture.srcTexture = texD->texture; + cmd.args.blitFromTexture.srcLevel = colorAtt.level(); + cmd.args.blitFromTexture.srcLayer = 0; + if (texD->m_flags.testFlag(QRhiTexture::ThreeDimensional) || texD->m_flags.testFlag(QRhiTexture::TextureArray)) + cmd.args.blitFromTexture.srcLayer = srcLayer; + cmd.args.blitFromTexture.w = size.width(); + cmd.args.blitFromTexture.h = size.height(); + if (resolveTexD->m_flags.testFlag(QRhiTexture::CubeMap)) + cmd.args.blitFromTexture.dstTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + uint(dstLayer); + else + cmd.args.blitFromTexture.dstTarget = resolveTexD->target; + cmd.args.blitFromTexture.dstTexture = resolveTexD->texture; + cmd.args.blitFromTexture.dstLevel = colorAtt.resolveLevel(); + cmd.args.blitFromTexture.dstLayer = 0; + if (resolveTexD->m_flags.testFlag(QRhiTexture::ThreeDimensional) || resolveTexD->m_flags.testFlag(QRhiTexture::TextureArray)) + cmd.args.blitFromTexture.dstLayer = dstLayer; + cmd.args.blitFromTexture.isDepthStencil = false; + } + } + } + + if (rtTex->m_desc.depthResolveTexture()) { + QGles2Texture *depthResolveTexD = QRHI_RES(QGles2Texture, rtTex->m_desc.depthResolveTexture()); + const QSize size = depthResolveTexD->pixelSize(); + if (rtTex->m_desc.depthStencilBuffer()) { + QGles2RenderBuffer *rbD = QRHI_RES(QGles2RenderBuffer, rtTex->m_desc.depthStencilBuffer()); + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); + cmd.cmd = QGles2CommandBuffer::Command::BlitFromRenderbuffer; + cmd.args.blitFromRenderbuffer.renderbuffer = rbD->renderbuffer; + cmd.args.blitFromRenderbuffer.w = size.width(); + cmd.args.blitFromRenderbuffer.h = size.height(); + cmd.args.blitFromRenderbuffer.target = depthResolveTexD->target; + cmd.args.blitFromRenderbuffer.dstTexture = depthResolveTexD->texture; + cmd.args.blitFromRenderbuffer.dstLevel = 0; + cmd.args.blitFromRenderbuffer.dstLayer = 0; + cmd.args.blitFromRenderbuffer.isDepthStencil = true; + } else if (caps.glesMultisampleRenderToTexture) { + // Nothing to do, resolving into depthResolveTexture() is automatic. + } else { + QGles2Texture *depthTexD = QRHI_RES(QGles2Texture, rtTex->m_desc.depthTexture()); + const int resolveCount = depthTexD->arraySize() >= 2 ? depthTexD->arraySize() : 1; + for (int resolveIdx = 0; resolveIdx < resolveCount; ++resolveIdx) { + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); + cmd.cmd = QGles2CommandBuffer::Command::BlitFromTexture; + cmd.args.blitFromTexture.srcTarget = depthTexD->target; + cmd.args.blitFromTexture.srcTexture = depthTexD->texture; + cmd.args.blitFromTexture.srcLevel = 0; + cmd.args.blitFromTexture.srcLayer = resolveIdx; + cmd.args.blitFromTexture.w = size.width(); + cmd.args.blitFromTexture.h = size.height(); + cmd.args.blitFromTexture.dstTarget = depthResolveTexD->target; + cmd.args.blitFromTexture.dstTexture = depthResolveTexD->texture; + cmd.args.blitFromTexture.dstLevel = 0; + cmd.args.blitFromTexture.dstLayer = resolveIdx; + cmd.args.blitFromTexture.isDepthStencil = true; + } + } + } + + const bool mayDiscardDepthStencil = + (rtTex->m_desc.depthStencilBuffer() + || (rtTex->m_desc.depthTexture() && rtTex->m_flags.testFlag(QRhiTextureRenderTarget::DoNotStoreDepthStencilContents))) + && !rtTex->m_desc.depthResolveTexture(); + if (mayDiscardDepthStencil) { + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); + cmd.cmd = QGles2CommandBuffer::Command::InvalidateFramebuffer; + if (caps.needsDepthStencilCombinedAttach) { + cmd.args.invalidateFramebuffer.attCount = 1; + cmd.args.invalidateFramebuffer.att[0] = GL_DEPTH_STENCIL_ATTACHMENT; + } else { + cmd.args.invalidateFramebuffer.attCount = 2; + cmd.args.invalidateFramebuffer.att[0] = GL_DEPTH_ATTACHMENT; + cmd.args.invalidateFramebuffer.att[1] = GL_STENCIL_ATTACHMENT; } } } @@ -3188,7 +4684,9 @@ void QRhiGles2::endPass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resource enqueueResourceUpdates(cb, resourceUpdates); } -void QRhiGles2::beginComputePass(QRhiCommandBuffer *cb, QRhiResourceUpdateBatch *resourceUpdates) +void QRhiGles2::beginComputePass(QRhiCommandBuffer *cb, + QRhiResourceUpdateBatch *resourceUpdates, + QRhiCommandBuffer::BeginPassFlags) { QGles2CommandBuffer *cbD = QRHI_RES(QGles2CommandBuffer, cb); Q_ASSERT(cbD->recordingPass == QGles2CommandBuffer::NoPass); @@ -3226,10 +4724,9 @@ void QRhiGles2::setComputePipeline(QRhiCommandBuffer *cb, QRhiComputePipeline *p cbD->currentComputePipeline = ps; cbD->currentPipelineGeneration = psD->generation; - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::BindComputePipeline; cmd.args.bindComputePipeline.ps = ps; - cbD->commands.append(cmd); } } @@ -3268,9 +4765,9 @@ void QRhiGles2::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) accessAndIsNewFlag = { 0, false }; QGles2ShaderResourceBindings *srbD = QRHI_RES(QGles2ShaderResourceBindings, cbD->currentComputeSrb); - const int bindingCount = srbD->m_bindings.count(); + const int bindingCount = srbD->m_bindings.size(); for (int i = 0; i < bindingCount; ++i) { - const QRhiShaderResourceBinding::Data *b = srbD->m_bindings.at(i).data(); + const QRhiShaderResourceBinding::Data *b = shaderResourceBindingData(srbD->m_bindings.at(i)); switch (b->type) { case QRhiShaderResourceBinding::ImageLoad: case QRhiShaderResourceBinding::ImageStore: @@ -3316,19 +4813,17 @@ void QRhiGles2::dispatch(QRhiCommandBuffer *cb, int x, int y, int z) } if (barriers) { - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::Barrier; cmd.args.barrier.barriers = barriers; - cbD->commands.append(cmd); } } - QGles2CommandBuffer::Command cmd; + QGles2CommandBuffer::Command &cmd(cbD->commands.get()); cmd.cmd = QGles2CommandBuffer::Command::Dispatch; cmd.args.dispatch.x = GLuint(x); cmd.args.dispatch.y = GLuint(y); cmd.args.dispatch.z = GLuint(z); - cbD->commands.append(cmd); } static inline GLenum toGlShaderType(QRhiShaderStage::Type type) @@ -3336,17 +4831,22 @@ static inline GLenum toGlShaderType(QRhiShaderStage::Type type) switch (type) { case QRhiShaderStage::Vertex: return GL_VERTEX_SHADER; + case QRhiShaderStage::TessellationControl: + return GL_TESS_CONTROL_SHADER; + case QRhiShaderStage::TessellationEvaluation: + return GL_TESS_EVALUATION_SHADER; + case QRhiShaderStage::Geometry: + return GL_GEOMETRY_SHADER; case QRhiShaderStage::Fragment: return GL_FRAGMENT_SHADER; case QRhiShaderStage::Compute: return GL_COMPUTE_SHADER; default: - Q_UNREACHABLE(); - return GL_VERTEX_SHADER; + Q_UNREACHABLE_RETURN(GL_VERTEX_SHADER); } } -QByteArray QRhiGles2::shaderSource(const QRhiShaderStage &shaderStage, int *glslVersion) +QByteArray QRhiGles2::shaderSource(const QRhiShaderStage &shaderStage, QShaderVersion *shaderVersion) { const QShader bakedShader = shaderStage.shader(); QList<int> versionsToTry; @@ -3365,8 +4865,8 @@ QByteArray QRhiGles2::shaderSource(const QRhiShaderStage &shaderStage, int *glsl QShaderVersion ver(v, QShaderVersion::GlslEs); source = bakedShader.shader({ QShader::GlslShader, ver, shaderStage.shaderVariant() }).shader(); if (!source.isEmpty()) { - if (glslVersion) - *glslVersion = v; + if (shaderVersion) + *shaderVersion = ver; break; } } @@ -3399,8 +4899,8 @@ QByteArray QRhiGles2::shaderSource(const QRhiShaderStage &shaderStage, int *glsl for (int v : versionsToTry) { source = bakedShader.shader({ QShader::GlslShader, v, shaderStage.shaderVariant() }).shader(); if (!source.isEmpty()) { - if (glslVersion) - *glslVersion = v; + if (shaderVersion) + *shaderVersion = v; break; } } @@ -3412,9 +4912,9 @@ QByteArray QRhiGles2::shaderSource(const QRhiShaderStage &shaderStage, int *glsl return source; } -bool QRhiGles2::compileShader(GLuint program, const QRhiShaderStage &shaderStage, int *glslVersion) +bool QRhiGles2::compileShader(GLuint program, const QRhiShaderStage &shaderStage, QShaderVersion *shaderVersion) { - const QByteArray source = shaderSource(shaderStage, glslVersion); + const QByteArray source = shaderSource(shaderStage, shaderVersion); if (source.isEmpty()) return false; @@ -3425,7 +4925,7 @@ bool QRhiGles2::compileShader(GLuint program, const QRhiShaderStage &shaderStage } else { shader = f->glCreateShader(toGlShaderType(shaderStage.type())); const char *srcStr = source.constData(); - const GLint srcLength = source.count(); + const GLint srcLength = source.size(); f->glShaderSource(shader, 1, &srcStr, &srcLength); f->glCompileShader(shader); GLint compiled = 0; @@ -3442,7 +4942,7 @@ bool QRhiGles2::compileShader(GLuint program, const QRhiShaderStage &shaderStage qWarning("Failed to compile shader: %s\nSource was:\n%s", log.constData(), source.constData()); return false; } - if (m_shaderCache.count() >= MAX_SHADER_CACHE_ENTRIES) { + if (m_shaderCache.size() >= MAX_SHADER_CACHE_ENTRIES) { // Use the simplest strategy: too many cached shaders -> drop them all. for (uint shader : m_shaderCache) f->glDeleteShader(shader); // does not actually get released yet when attached to a not-yet-released program @@ -3481,7 +4981,7 @@ void QRhiGles2::registerUniformIfActive(const QShaderDescription::BlockVariable int binding, int baseOffset, GLuint program, - QSet<int> *activeUniformLocations, + QDuplicateTracker<int, 256> *activeUniformLocations, QGles2UniformDescriptionVector *dst) { if (var.type == QShaderDescription::Struct) { @@ -3498,9 +4998,8 @@ void QRhiGles2::registerUniformIfActive(const QShaderDescription::BlockVariable // that is not the case, it won't break anything, but we'll generate // unnecessary glUniform* calls then. uniform.glslLocation = f->glGetUniformLocation(program, name.constData()); - if (uniform.glslLocation >= 0 && !activeUniformLocations->contains(uniform.glslLocation)) { - activeUniformLocations->insert(uniform.glslLocation); - if (var.arrayDims.count() > 1) { + if (uniform.glslLocation >= 0 && !activeUniformLocations->hasSeen(uniform.glslLocation)) { + if (var.arrayDims.size() > 1) { qWarning("Array '%s' has more than one dimension. This is not supported.", var.name.constData()); return; @@ -3515,7 +5014,7 @@ void QRhiGles2::registerUniformIfActive(const QShaderDescription::BlockVariable void QRhiGles2::gatherUniforms(GLuint program, const QShaderDescription::UniformBlock &ub, - QSet<int> *activeUniformLocations, + QDuplicateTracker<int, 256> *activeUniformLocations, QGles2UniformDescriptionVector *dst) { QByteArray prefix = ub.structName + '.'; @@ -3529,7 +5028,7 @@ void QRhiGles2::gatherUniforms(GLuint program, registerUniformIfActive(structMember, structPrefix + ".", ub.binding, baseOffset, program, activeUniformLocations, dst); } else { - if (blockMember.arrayDims.count() > 1) { + if (blockMember.arrayDims.size() > 1) { qWarning("Array of struct '%s' has more than one dimension. Only the first " "dimension is used.", blockMember.name.constData()); @@ -3557,11 +5056,50 @@ void QRhiGles2::gatherSamplers(GLuint program, QGles2SamplerDescription sampler; sampler.glslLocation = f->glGetUniformLocation(program, v.name.constData()); if (sampler.glslLocation >= 0) { - sampler.binding = v.binding; + sampler.combinedBinding = v.binding; + sampler.tbinding = -1; + sampler.sbinding = -1; + dst->append(sampler); + } +} + +void QRhiGles2::gatherGeneratedSamplers(GLuint program, + const QShader::SeparateToCombinedImageSamplerMapping &mapping, + QGles2SamplerDescriptionVector *dst) +{ + QGles2SamplerDescription sampler; + sampler.glslLocation = f->glGetUniformLocation(program, mapping.combinedSamplerName.constData()); + if (sampler.glslLocation >= 0) { + sampler.combinedBinding = -1; + sampler.tbinding = mapping.textureBinding; + sampler.sbinding = mapping.samplerBinding; dst->append(sampler); } } +void QRhiGles2::sanityCheckVertexFragmentInterface(const QShaderDescription &vsDesc, const QShaderDescription &fsDesc) +{ + if (!vsDesc.isValid() || !fsDesc.isValid()) + return; + + // Print a warning if the fragment shader input for a given location uses a + // name that does not match the vertex shader output at the same location. + // This is not an error with any other API and not with GLSL >= 330 either, + // but matters for older GLSL code that has no location qualifiers. + for (const QShaderDescription::InOutVariable &outVar : vsDesc.outputVariables()) { + for (const QShaderDescription::InOutVariable &inVar : fsDesc.inputVariables()) { + if (inVar.location == outVar.location) { + if (inVar.name != outVar.name) { + qWarning("Vertex output name '%s' does not match fragment input '%s'. " + "This should be avoided because it causes problems with older GLSL versions.", + outVar.name.constData(), inVar.name.constData()); + } + break; + } + } + } +} + bool QRhiGles2::isProgramBinaryDiskCacheEnabled() const { static QOpenGLProgramBinarySupportCheckWrapper checker; @@ -3575,32 +5113,43 @@ static inline QShader::Stage toShaderStage(QRhiShaderStage::Type type) switch (type) { case QRhiShaderStage::Vertex: return QShader::VertexStage; + case QRhiShaderStage::TessellationControl: + return QShader::TessellationControlStage; + case QRhiShaderStage::TessellationEvaluation: + return QShader::TessellationEvaluationStage; + case QRhiShaderStage::Geometry: + return QShader::GeometryStage; case QRhiShaderStage::Fragment: return QShader::FragmentStage; case QRhiShaderStage::Compute: return QShader::ComputeStage; default: - Q_UNREACHABLE(); - return QShader::VertexStage; + Q_UNREACHABLE_RETURN(QShader::VertexStage); } } -QRhiGles2::DiskCacheResult QRhiGles2::tryLoadFromDiskCache(const QRhiShaderStage *stages, - int stageCount, - GLuint program, - const QVector<QShaderDescription::InOutVariable> &inputVars, - QByteArray *cacheKey) +QRhiGles2::ProgramCacheResult QRhiGles2::tryLoadFromDiskOrPipelineCache(const QRhiShaderStage *stages, + int stageCount, + GLuint program, + const QVector<QShaderDescription::InOutVariable> &inputVars, + QByteArray *cacheKey) { - QRhiGles2::DiskCacheResult result = QRhiGles2::DiskCacheMiss; - QByteArray diskCacheKey; + Q_ASSERT(cacheKey); - if (isProgramBinaryDiskCacheEnabled()) { + // the traditional QOpenGL disk cache since Qt 5.9 + const bool legacyDiskCacheEnabled = isProgramBinaryDiskCacheEnabled(); + + // QRhi's own (set)PipelineCacheData() + const bool pipelineCacheEnabled = caps.programBinary && !m_pipelineCache.isEmpty(); + + // calculating the cache key based on the source code is common for both types of caches + if (legacyDiskCacheEnabled || pipelineCacheEnabled) { QOpenGLProgramBinaryCache::ProgramDesc binaryProgram; for (int i = 0; i < stageCount; ++i) { const QRhiShaderStage &stage(stages[i]); QByteArray source = shaderSource(stage, nullptr); if (source.isEmpty()) - return QRhiGles2::DiskCacheError; + return QRhiGles2::ProgramCacheError; if (stage.type() == QRhiShaderStage::Vertex) { // Now add something to the key that indicates the vertex input locations. @@ -3633,31 +5182,72 @@ QRhiGles2::DiskCacheResult QRhiGles2::tryLoadFromDiskCache(const QRhiShaderStage binaryProgram.shaders.append(QOpenGLProgramBinaryCache::ShaderDesc(toShaderStage(stage.type()), source)); } - diskCacheKey = binaryProgram.cacheKey(); + *cacheKey = binaryProgram.cacheKey(); + + // Try our pipeline cache simulation first, if it got seeded with + // setPipelineCacheData and there's a hit, then no need to go to the + // filesystem at all. + if (pipelineCacheEnabled) { + auto it = m_pipelineCache.constFind(*cacheKey); + if (it != m_pipelineCache.constEnd()) { + GLenum err; + for ( ; ; ) { + err = f->glGetError(); + if (err == GL_NO_ERROR || err == GL_CONTEXT_LOST) + break; + } + f->glProgramBinary(program, it->format, it->data.constData(), it->data.size()); + err = f->glGetError(); + if (err == GL_NO_ERROR) { + GLint linkStatus = 0; + f->glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); + if (linkStatus == GL_TRUE) + return QRhiGles2::ProgramCacheHit; + } + } + } - if (qrhi_programBinaryCache()->load(diskCacheKey, program)) { + if (legacyDiskCacheEnabled && qrhi_programBinaryCache()->load(*cacheKey, program)) { + // use the logging category QOpenGLShaderProgram would qCDebug(lcOpenGLProgramDiskCache, "Program binary received from cache, program %u, key %s", - program, diskCacheKey.constData()); - result = QRhiGles2::DiskCacheHit; + program, cacheKey->constData()); + return QRhiGles2::ProgramCacheHit; } } - if (cacheKey) - *cacheKey = diskCacheKey; - - return result; + return QRhiGles2::ProgramCacheMiss; } void QRhiGles2::trySaveToDiskCache(GLuint program, const QByteArray &cacheKey) { + // This is only for the traditional QOpenGL disk cache since Qt 5.9. + if (isProgramBinaryDiskCacheEnabled()) { + // use the logging category QOpenGLShaderProgram would qCDebug(lcOpenGLProgramDiskCache, "Saving program binary, program %u, key %s", program, cacheKey.constData()); qrhi_programBinaryCache()->save(cacheKey, program); } } -QGles2Buffer::QGles2Buffer(QRhiImplementation *rhi, Type type, UsageFlags usage, int size) +void QRhiGles2::trySaveToPipelineCache(GLuint program, const QByteArray &cacheKey, bool force) +{ + // This handles our own simulated "pipeline cache". (specific to QRhi, not + // shared with legacy QOpenGL* stuff) + + if (caps.programBinary && (force || !m_pipelineCache.contains(cacheKey))) { + GLint blobSize = 0; + f->glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &blobSize); + QByteArray blob(blobSize, Qt::Uninitialized); + GLint outSize = 0; + GLenum binaryFormat = 0; + f->glGetProgramBinary(program, blobSize, &outSize, &binaryFormat, blob.data()); + if (blobSize == outSize) + m_pipelineCache.insert(cacheKey, { binaryFormat, blob }); + } +} + +QGles2Buffer::QGles2Buffer(QRhiImplementation *rhi, Type type, UsageFlags usage, quint32 size) : QRhiBuffer(rhi, type, usage, size) { } @@ -3669,6 +5259,7 @@ QGles2Buffer::~QGles2Buffer() void QGles2Buffer::destroy() { + data.clear(); if (!buffer) return; @@ -3676,14 +5267,13 @@ void QGles2Buffer::destroy() e.type = QRhiGles2::DeferredReleaseEntry::Buffer; e.buffer.buffer = buffer; - buffer = 0; QRHI_RES_RHI(QRhiGles2); - rhiD->releaseQueue.append(e); - QRHI_PROF; - QRHI_PROF_F(releaseBuffer(this)); - rhiD->unregisterResource(this); + if (rhiD) { + rhiD->releaseQueue.append(e); + rhiD->unregisterResource(this); + } } bool QGles2Buffer::create() @@ -3692,17 +5282,15 @@ bool QGles2Buffer::create() destroy(); QRHI_RES_RHI(QRhiGles2); - QRHI_PROF; - const int nonZeroSize = m_size <= 0 ? 256 : m_size; + nonZeroSize = m_size <= 0 ? 256 : m_size; if (m_usage.testFlag(QRhiBuffer::UniformBuffer)) { if (int(m_usage) != QRhiBuffer::UniformBuffer) { qWarning("Uniform buffer: multiple usages specified, this is not supported by the OpenGL backend"); return false; } - ubuf.resize(nonZeroSize); - QRHI_PROF_F(newBuffer(this, uint(nonZeroSize), 0, 1)); + data.resize(nonZeroSize); return true; } @@ -3719,9 +5307,11 @@ bool QGles2Buffer::create() rhiD->f->glBindBuffer(targetForDataOps, buffer); rhiD->f->glBufferData(targetForDataOps, nonZeroSize, nullptr, m_type == Dynamic ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW); + if (rhiD->glObjectLabel) + rhiD->glObjectLabel(GL_BUFFER, buffer, -1, m_objectName.constData()); + usageState.access = AccessNone; - QRHI_PROF_F(newBuffer(this, uint(nonZeroSize), 1, 0)); rhiD->registerResource(this); return true; } @@ -3734,6 +5324,35 @@ QRhiBuffer::NativeBuffer QGles2Buffer::nativeBuffer() return { { &buffer }, 1 }; } +char *QGles2Buffer::beginFullDynamicBufferUpdateForCurrentFrame() +{ + Q_ASSERT(m_type == Dynamic); + if (!m_usage.testFlag(UniformBuffer)) { + QRHI_RES_RHI(QRhiGles2); + rhiD->f->glBindBuffer(targetForDataOps, buffer); + if (rhiD->caps.properMapBuffer) { + return static_cast<char *>(rhiD->f->glMapBufferRange(targetForDataOps, 0, nonZeroSize, + GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); + } else { + // Need some storage for the data, use the otherwise unused 'data' member. + if (data.isEmpty()) + data.resize(nonZeroSize); + } + } + return data.data(); +} + +void QGles2Buffer::endFullDynamicBufferUpdateForCurrentFrame() +{ + if (!m_usage.testFlag(UniformBuffer)) { + QRHI_RES_RHI(QRhiGles2); + if (rhiD->caps.properMapBuffer) + rhiD->f->glUnmapBuffer(targetForDataOps); + else + rhiD->f->glBufferSubData(targetForDataOps, 0, nonZeroSize, data.data()); + } +} + QGles2RenderBuffer::QGles2RenderBuffer(QRhiImplementation *rhi, Type type, const QSize &pixelSize, int sampleCount, QRhiRenderBuffer::Flags flags, QRhiTexture::Format backingFormatHint) @@ -3761,10 +5380,11 @@ void QGles2RenderBuffer::destroy() stencilRenderbuffer = 0; QRHI_RES_RHI(QRhiGles2); - rhiD->releaseQueue.append(e); - QRHI_PROF; - QRHI_PROF_F(releaseRenderBuffer(this)); - rhiD->unregisterResource(this); + if (rhiD) { + if (owns) + rhiD->releaseQueue.append(e); + rhiD->unregisterResource(this); + } } bool QGles2RenderBuffer::create() @@ -3773,14 +5393,11 @@ bool QGles2RenderBuffer::create() destroy(); QRHI_RES_RHI(QRhiGles2); - QRHI_PROF; samples = rhiD->effectiveSampleCount(m_sampleCount); if (m_flags.testFlag(UsedWithSwapChainOnly)) { - if (m_type == DepthStencil) { - QRHI_PROF_F(newRenderBuffer(this, false, true, samples)); + if (m_type == DepthStencil) return true; - } qWarning("RenderBuffer: UsedWithSwapChainOnly is meaningless in combination with Color"); } @@ -3820,7 +5437,6 @@ bool QGles2RenderBuffer::create() rhiD->f->glRenderbufferStorage(GL_RENDERBUFFER, stencilStorage, size.width(), size.height()); } - QRHI_PROF_F(newRenderBuffer(this, false, false, samples)); break; case QRhiRenderBuffer::Color: { @@ -3841,7 +5457,6 @@ bool QGles2RenderBuffer::create() rhiD->f->glRenderbufferStorage(GL_RENDERBUFFER, internalFormat, size.width(), size.height()); } - QRHI_PROF_F(newRenderBuffer(this, false, false, samples)); } break; default: @@ -3849,6 +5464,36 @@ bool QGles2RenderBuffer::create() break; } + if (rhiD->glObjectLabel) + rhiD->glObjectLabel(GL_RENDERBUFFER, renderbuffer, -1, m_objectName.constData()); + + owns = true; + generation += 1; + rhiD->registerResource(this); + return true; +} + +bool QGles2RenderBuffer::createFrom(NativeRenderBuffer src) +{ + if (!src.object) + return false; + + if (renderbuffer) + destroy(); + + QRHI_RES_RHI(QRhiGles2); + samples = rhiD->effectiveSampleCount(m_sampleCount); + + if (m_flags.testFlag(UsedWithSwapChainOnly)) + qWarning("RenderBuffer: UsedWithSwapChainOnly is meaningless when importing an existing native object"); + + if (!rhiD->ensureContext()) + return false; + + renderbuffer = src.object; + + owns = false; + generation += 1; rhiD->registerResource(this); return true; } @@ -3861,9 +5506,9 @@ QRhiTexture::Format QGles2RenderBuffer::backingFormat() const return m_type == Color ? QRhiTexture::RGBA8 : QRhiTexture::UnknownFormat; } -QGles2Texture::QGles2Texture(QRhiImplementation *rhi, Format format, const QSize &pixelSize, - int sampleCount, Flags flags) - : QRhiTexture(rhi, format, pixelSize, sampleCount, flags) +QGles2Texture::QGles2Texture(QRhiImplementation *rhi, Format format, const QSize &pixelSize, int depth, + int arraySize, int sampleCount, Flags flags) + : QRhiTexture(rhi, format, pixelSize, depth, arraySize, sampleCount, flags) { } @@ -3884,14 +5529,14 @@ void QGles2Texture::destroy() texture = 0; specified = false; - compressedAtlasBuilt = false; + zeroInitialized = false; QRHI_RES_RHI(QRhiGles2); - if (owns) - rhiD->releaseQueue.append(e); - QRHI_PROF; - QRHI_PROF_F(releaseTexture(this)); - rhiD->unregisterResource(this); + if (rhiD) { + if (owns) + rhiD->releaseQueue.append(e); + rhiD->unregisterResource(this); + } } bool QGles2Texture::prepareCreate(QSize *adjustedSize) @@ -3903,14 +5548,65 @@ bool QGles2Texture::prepareCreate(QSize *adjustedSize) if (!rhiD->ensureContext()) return false; - const QSize size = m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize; - const bool isCube = m_flags.testFlag(CubeMap); + const bool isArray = m_flags.testFlag(QRhiTexture::TextureArray); + const bool is3D = m_flags.testFlag(ThreeDimensional); const bool hasMipMaps = m_flags.testFlag(MipMapped); const bool isCompressed = rhiD->isCompressedFormat(m_format); + const bool is1D = m_flags.testFlag(OneDimensional); + + const QSize size = is1D ? QSize(qMax(1, m_pixelSize.width()), 1) + : (m_pixelSize.isEmpty() ? QSize(1, 1) : m_pixelSize); + + if (is3D && !rhiD->caps.texture3D) { + qWarning("3D textures are not supported"); + return false; + } + if (isCube && is3D) { + qWarning("Texture cannot be both cube and 3D"); + return false; + } + if (isArray && is3D) { + qWarning("Texture cannot be both array and 3D"); + return false; + } + if (is1D && !rhiD->caps.texture1D) { + qWarning("1D textures are not supported"); + return false; + } + if (is1D && is3D) { + qWarning("Texture cannot be both 1D and 3D"); + return false; + } + if (is1D && isCube) { + qWarning("Texture cannot be both 1D and cube"); + return false; + } + + if (m_depth > 1 && !is3D) { + qWarning("Texture cannot have a depth of %d when it is not 3D", m_depth); + return false; + } + if (m_arraySize > 0 && !isArray) { + qWarning("Texture cannot have an array size of %d when it is not an array", m_arraySize); + return false; + } + if (m_arraySize < 1 && isArray) { + qWarning("Texture is an array but array size is %d", m_arraySize); + return false; + } + + target = isCube ? GL_TEXTURE_CUBE_MAP + : m_sampleCount > 1 ? (isArray ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_MULTISAMPLE) + : (is3D ? GL_TEXTURE_3D + : (is1D ? (isArray ? GL_TEXTURE_1D_ARRAY : GL_TEXTURE_1D) + : (isArray ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D))); + + if (m_flags.testFlag(ExternalOES)) + target = GL_TEXTURE_EXTERNAL_OES; + else if (m_flags.testFlag(TextureRectangleGL)) + target = GL_TEXTURE_RECTANGLE; - target = isCube ? GL_TEXTURE_CUBE_MAP - : m_sampleCount > 1 ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; mipLevelCount = hasMipMaps ? rhiD->q->mipLevelsForSize(size) : 1; gltype = GL_UNSIGNED_BYTE; @@ -3951,12 +5647,38 @@ bool QGles2Texture::create() rhiD->f->glGenTextures(1, &texture); const bool isCube = m_flags.testFlag(CubeMap); + const bool isArray = m_flags.testFlag(QRhiTexture::TextureArray); + const bool is3D = m_flags.testFlag(ThreeDimensional); const bool hasMipMaps = m_flags.testFlag(MipMapped); const bool isCompressed = rhiD->isCompressedFormat(m_format); + const bool is1D = m_flags.testFlag(OneDimensional); + if (!isCompressed) { rhiD->f->glBindTexture(target, texture); if (!m_flags.testFlag(UsedWithLoadStore)) { - if (hasMipMaps || isCube) { + if (is1D) { + for (int level = 0; level < mipLevelCount; ++level) { + const QSize mipSize = rhiD->q->sizeForMipLevel(level, size); + if (isArray) + rhiD->f->glTexImage2D(target, level, GLint(glintformat), mipSize.width(), + qMax(0, m_arraySize), 0, glformat, gltype, nullptr); + else + rhiD->glTexImage1D(target, level, GLint(glintformat), mipSize.width(), 0, + glformat, gltype, nullptr); + } + } else if (is3D || isArray) { + const int layerCount = is3D ? qMax(1, m_depth) : qMax(0, m_arraySize); + if (hasMipMaps) { + for (int level = 0; level != mipLevelCount; ++level) { + const QSize mipSize = rhiD->q->sizeForMipLevel(level, size); + rhiD->f->glTexImage3D(target, level, GLint(glintformat), mipSize.width(), mipSize.height(), layerCount, + 0, glformat, gltype, nullptr); + } + } else { + rhiD->f->glTexImage3D(target, 0, GLint(glintformat), size.width(), size.height(), layerCount, + 0, glformat, gltype, nullptr); + } + } else if (hasMipMaps || isCube) { const GLenum faceTargetBase = isCube ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : target; for (int layer = 0, layerCount = isCube ? 6 : 1; layer != layerCount; ++layer) { for (int level = 0; level != mipLevelCount; ++level) { @@ -3967,14 +5689,32 @@ bool QGles2Texture::create() } } } else { - rhiD->f->glTexImage2D(target, 0, GLint(glintformat), size.width(), size.height(), - 0, glformat, gltype, nullptr); + // 2D texture. For multisample textures the GLES 3.1 + // glStorage2DMultisample must be used for portability. + if (m_sampleCount > 1 && rhiD->caps.multisampledTexture) { + // internal format must be sized + rhiD->f->glTexStorage2DMultisample(target, m_sampleCount, glsizedintformat, + size.width(), size.height(), GL_TRUE); + } else { + rhiD->f->glTexImage2D(target, 0, GLint(glintformat), size.width(), size.height(), + 0, glformat, gltype, nullptr); + } } } else { // Must be specified with immutable storage functions otherwise // bindImageTexture may fail. Also, the internal format must be a // sized format here. - rhiD->f->glTexStorage2D(target, mipLevelCount, glsizedintformat, size.width(), size.height()); + if (is1D && !isArray) + rhiD->glTexStorage1D(target, mipLevelCount, glsizedintformat, size.width()); + else if (!is1D && (is3D || isArray)) + rhiD->f->glTexStorage3D(target, mipLevelCount, glsizedintformat, size.width(), size.height(), + is3D ? qMax(1, m_depth) : qMax(0, m_arraySize)); + else if (m_sampleCount > 1) + rhiD->f->glTexStorage2DMultisample(target, m_sampleCount, glsizedintformat, + size.width(), size.height(), GL_TRUE); + else + rhiD->f->glTexStorage2D(target, mipLevelCount, glsizedintformat, size.width(), + is1D ? qMax(0, m_arraySize) : size.height()); } specified = true; } else { @@ -3984,8 +5724,8 @@ bool QGles2Texture::create() specified = false; } - QRHI_PROF; - QRHI_PROF_F(newTexture(this, true, mipLevelCount, isCube ? 6 : 1, 1)); + if (rhiD->glObjectLabel) + rhiD->glObjectLabel(GL_TEXTURE, texture, -1, m_objectName.constData()); owns = true; @@ -4005,15 +5745,12 @@ bool QGles2Texture::createFrom(QRhiTexture::NativeTexture src) texture = textureId; specified = true; - compressedAtlasBuilt = true; - - QRHI_RES_RHI(QRhiGles2); - QRHI_PROF; - QRHI_PROF_F(newTexture(this, false, mipLevelCount, m_flags.testFlag(CubeMap) ? 6 : 1, 1)); + zeroInitialized = true; owns = false; generation += 1; + QRHI_RES_RHI(QRhiGles2); rhiD->registerResource(this); return true; } @@ -4036,7 +5773,9 @@ QGles2Sampler::~QGles2Sampler() void QGles2Sampler::destroy() { - // nothing to do here + QRHI_RES_RHI(QRhiGles2); + if (rhiD) + rhiD->unregisterResource(this); } bool QGles2Sampler::create() @@ -4049,6 +5788,8 @@ bool QGles2Sampler::create() d.gltexcomparefunc = toGlTextureCompareFunc(m_compareOp); generation += 1; + QRHI_RES_RHI(QRhiGles2); + rhiD->registerResource(this, false); return true; } @@ -4065,7 +5806,9 @@ QGles2RenderPassDescriptor::~QGles2RenderPassDescriptor() void QGles2RenderPassDescriptor::destroy() { - // nothing to do here + QRHI_RES_RHI(QRhiGles2); + if (rhiD) + rhiD->unregisterResource(this); } bool QGles2RenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *other) const @@ -4074,33 +5817,46 @@ bool QGles2RenderPassDescriptor::isCompatible(const QRhiRenderPassDescriptor *ot return true; } -QGles2ReferenceRenderTarget::QGles2ReferenceRenderTarget(QRhiImplementation *rhi) - : QRhiRenderTarget(rhi), +QRhiRenderPassDescriptor *QGles2RenderPassDescriptor::newCompatibleRenderPassDescriptor() const +{ + QGles2RenderPassDescriptor *rpD = new QGles2RenderPassDescriptor(m_rhi); + QRHI_RES_RHI(QRhiGles2); + rhiD->registerResource(rpD, false); + return rpD; +} + +QVector<quint32> QGles2RenderPassDescriptor::serializedFormat() const +{ + return {}; +} + +QGles2SwapChainRenderTarget::QGles2SwapChainRenderTarget(QRhiImplementation *rhi, QRhiSwapChain *swapchain) + : QRhiSwapChainRenderTarget(rhi, swapchain), d(rhi) { } -QGles2ReferenceRenderTarget::~QGles2ReferenceRenderTarget() +QGles2SwapChainRenderTarget::~QGles2SwapChainRenderTarget() { destroy(); } -void QGles2ReferenceRenderTarget::destroy() +void QGles2SwapChainRenderTarget::destroy() { // nothing to do here } -QSize QGles2ReferenceRenderTarget::pixelSize() const +QSize QGles2SwapChainRenderTarget::pixelSize() const { return d.pixelSize; } -float QGles2ReferenceRenderTarget::devicePixelRatio() const +float QGles2SwapChainRenderTarget::devicePixelRatio() const { return d.dpr; } -int QGles2ReferenceRenderTarget::sampleCount() const +int QGles2SwapChainRenderTarget::sampleCount() const { return d.sampleCount; } @@ -4127,18 +5883,24 @@ void QGles2TextureRenderTarget::destroy() e.type = QRhiGles2::DeferredReleaseEntry::TextureRenderTarget; e.textureRenderTarget.framebuffer = framebuffer; + e.textureRenderTarget.nonMsaaThrowawayDepthTexture = nonMsaaThrowawayDepthTexture; framebuffer = 0; + nonMsaaThrowawayDepthTexture = 0; QRHI_RES_RHI(QRhiGles2); - rhiD->releaseQueue.append(e); - - rhiD->unregisterResource(this); + if (rhiD) { + rhiD->releaseQueue.append(e); + rhiD->unregisterResource(this); + } } QRhiRenderPassDescriptor *QGles2TextureRenderTarget::newCompatibleRenderPassDescriptor() { - return new QGles2RenderPassDescriptor(m_rhi); + QGles2RenderPassDescriptor *rpD = new QGles2RenderPassDescriptor(m_rhi); + QRHI_RES_RHI(QRhiGles2); + rhiD->registerResource(rpD, false); + return rpD; } bool QGles2TextureRenderTarget::create() @@ -4148,13 +5910,13 @@ bool QGles2TextureRenderTarget::create() if (framebuffer) destroy(); - const bool hasColorAttachments = m_desc.cbeginColorAttachments() != m_desc.cendColorAttachments(); + const bool hasColorAttachments = m_desc.colorAttachmentCount() > 0; Q_ASSERT(hasColorAttachments || m_desc.depthTexture()); Q_ASSERT(!m_desc.depthStencilBuffer() || !m_desc.depthTexture()); const bool hasDepthStencil = m_desc.depthStencilBuffer() || m_desc.depthTexture(); if (hasColorAttachments) { - const int count = m_desc.cendColorAttachments() - m_desc.cbeginColorAttachments(); + const int count = int(m_desc.colorAttachmentCount()); if (count > rhiD->caps.maxDrawBuffers) { qWarning("QGles2TextureRenderTarget: Too many color attachments (%d, max is %d)", count, rhiD->caps.maxDrawBuffers); @@ -4171,6 +5933,7 @@ bool QGles2TextureRenderTarget::create() d.colorAttCount = 0; int attIndex = 0; + int multiViewCount = 0; for (auto it = m_desc.cbeginColorAttachments(), itEnd = m_desc.cendColorAttachments(); it != itEnd; ++it, ++attIndex) { d.colorAttCount += 1; const QRhiColorAttachment &colorAtt(*it); @@ -4180,12 +5943,57 @@ bool QGles2TextureRenderTarget::create() if (texture) { QGles2Texture *texD = QRHI_RES(QGles2Texture, texture); Q_ASSERT(texD->texture && texD->specified); - const GLenum faceTargetBase = texD->flags().testFlag(QRhiTexture::CubeMap) ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : texD->target; - rhiD->f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + uint(attIndex), faceTargetBase + uint(colorAtt.layer()), - texD->texture, colorAtt.level()); + if (texD->flags().testFlag(QRhiTexture::ThreeDimensional) || texD->flags().testFlag(QRhiTexture::TextureArray)) { + if (colorAtt.multiViewCount() < 2) { + rhiD->f->glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + uint(attIndex), texD->texture, + colorAtt.level(), colorAtt.layer()); + } else { + multiViewCount = colorAtt.multiViewCount(); + if (texD->sampleCount() > 1 && rhiD->caps.glesMultiviewMultisampleRenderToTexture && colorAtt.resolveTexture()) { + // Special path for GLES and GL_OVR_multiview_multisampled_render_to_texture: + // ignore the color attachment's (multisample) texture + // array and give the resolve texture array to GL. (no + // explicit resolving is needed by us later on) + QGles2Texture *resolveTexD = QRHI_RES(QGles2Texture, colorAtt.resolveTexture()); + rhiD->glFramebufferTextureMultisampleMultiviewOVR(GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0 + uint(attIndex), + resolveTexD->texture, + colorAtt.resolveLevel(), + texD->sampleCount(), + colorAtt.resolveLayer(), + multiViewCount); + } else { + rhiD->glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0 + uint(attIndex), + texD->texture, + colorAtt.level(), + colorAtt.layer(), + multiViewCount); + } + } + } else if (texD->flags().testFlag(QRhiTexture::OneDimensional)) { + rhiD->glFramebufferTexture1D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + uint(attIndex), + texD->target + uint(colorAtt.layer()), texD->texture, + colorAtt.level()); + } else { + if (texD->sampleCount() > 1 && rhiD->caps.glesMultisampleRenderToTexture && colorAtt.resolveTexture()) { + // Special path for GLES and GL_EXT_multisampled_render_to_texture: + // ignore the color attachment's (multisample) texture and + // give the resolve texture to GL. (no explicit resolving is + // needed by us later on) + QGles2Texture *resolveTexD = QRHI_RES(QGles2Texture, colorAtt.resolveTexture()); + const GLenum faceTargetBase = resolveTexD->flags().testFlag(QRhiTexture::CubeMap) ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : resolveTexD->target; + rhiD->glFramebufferTexture2DMultisampleEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + uint(attIndex), faceTargetBase + uint(colorAtt.resolveLayer()), + resolveTexD->texture, colorAtt.level(), texD->sampleCount()); + } else { + const GLenum faceTargetBase = texD->flags().testFlag(QRhiTexture::CubeMap) ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : texD->target; + rhiD->f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + uint(attIndex), faceTargetBase + uint(colorAtt.layer()), + texD->texture, colorAtt.level()); + } + } if (attIndex == 0) { d.pixelSize = rhiD->q->sizeForMipLevel(colorAtt.level(), texD->pixelSize()); - d.sampleCount = 1; + d.sampleCount = texD->sampleCount(); } } else if (renderBuffer) { QGles2RenderBuffer *rbD = QRHI_RES(QGles2RenderBuffer, renderBuffer); @@ -4206,12 +6014,14 @@ bool QGles2TextureRenderTarget::create() } else { rhiD->f->glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depthRbD->renderbuffer); - if (depthRbD->stencilRenderbuffer) + if (depthRbD->stencilRenderbuffer) { rhiD->f->glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthRbD->stencilRenderbuffer); - else // packed + } else { + // packed depth-stencil rhiD->f->glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, depthRbD->renderbuffer); + } } if (d.colorAttCount == 0) { d.pixelSize = depthRbD->pixelSize(); @@ -4219,11 +6029,105 @@ bool QGles2TextureRenderTarget::create() } } else { QGles2Texture *depthTexD = QRHI_RES(QGles2Texture, m_desc.depthTexture()); - rhiD->f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, depthTexD->target, - depthTexD->texture, 0); + if (multiViewCount < 2) { + if (depthTexD->sampleCount() > 1 && rhiD->caps.glesMultisampleRenderToTexture && m_desc.depthResolveTexture()) { + // Special path for GLES and + // GL_EXT_multisampled_render_to_texture, for depth-stencil. + // Relevant only when depthResolveTexture is set. + QGles2Texture *depthResolveTexD = QRHI_RES(QGles2Texture, m_desc.depthResolveTexture()); + rhiD->glFramebufferTexture2DMultisampleEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, depthResolveTexD->target, + depthResolveTexD->texture, 0, depthTexD->sampleCount()); + if (rhiD->isStencilSupportingFormat(depthResolveTexD->format())) { + rhiD->glFramebufferTexture2DMultisampleEXT(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, depthResolveTexD->target, + depthResolveTexD->texture, 0, depthTexD->sampleCount()); + } + } else { + rhiD->f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, depthTexD->target, + depthTexD->texture, 0); + if (rhiD->isStencilSupportingFormat(depthTexD->format())) { + rhiD->f->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, depthTexD->target, + depthTexD->texture, 0); + } + } + } else { + if (depthTexD->sampleCount() > 1 && rhiD->caps.glesMultiviewMultisampleRenderToTexture) { + // And so it turns out + // https://registry.khronos.org/OpenGL/extensions/OVR/OVR_multiview.txt + // does not work with multisample 2D texture arrays. (at least + // that's what Issue 30 in the extension spec seems to imply) + // + // There is https://registry.khronos.org/OpenGL/extensions/EXT/EXT_multiview_texture_multisample.txt + // that seems to resolve that, but that does not seem to + // work (or not available) on GLES devices such as the Quest 3. + // + // So instead, on GLES we can use the + // multisample-multiview-auto-resolving version (which in + // turn is not supported on desktop GL e.g. by NVIDIA), too + // bad we have a multisample depth texture array here as + // every other API out there requires that. So, in absence + // of a depthResolveTexture, create a temporary one ignoring + // what the user has already created. + // + if (!m_flags.testFlag(DoNotStoreDepthStencilContents) && !m_desc.depthResolveTexture()) { + qWarning("Attempted to create a multiview+multisample QRhiTextureRenderTarget, but DoNotStoreDepthStencilContents was not set." + " This path has no choice but to behave as if DoNotStoreDepthStencilContents was set, because QRhi is forced to create" + " a throwaway non-multisample depth texture here. Set the flag to silence this warning, or set a depthResolveTexture."); + } + if (m_desc.depthResolveTexture()) { + QGles2Texture *depthResolveTexD = QRHI_RES(QGles2Texture, m_desc.depthResolveTexture()); + rhiD->glFramebufferTextureMultisampleMultiviewOVR(GL_FRAMEBUFFER, + GL_DEPTH_ATTACHMENT, + depthResolveTexD->texture, + 0, + depthTexD->sampleCount(), + 0, + multiViewCount); + if (rhiD->isStencilSupportingFormat(depthResolveTexD->format())) { + rhiD->glFramebufferTextureMultisampleMultiviewOVR(GL_FRAMEBUFFER, + GL_STENCIL_ATTACHMENT, + depthResolveTexD->texture, + 0, + depthTexD->sampleCount(), + 0, + multiViewCount); + } + } else { + if (!nonMsaaThrowawayDepthTexture) { + rhiD->f->glGenTextures(1, &nonMsaaThrowawayDepthTexture); + rhiD->f->glBindTexture(GL_TEXTURE_2D_ARRAY, nonMsaaThrowawayDepthTexture); + rhiD->f->glTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_DEPTH24_STENCIL8, + depthTexD->pixelSize().width(), depthTexD->pixelSize().height(), multiViewCount); + } + rhiD->glFramebufferTextureMultisampleMultiviewOVR(GL_FRAMEBUFFER, + GL_DEPTH_ATTACHMENT, + nonMsaaThrowawayDepthTexture, + 0, + depthTexD->sampleCount(), + 0, + multiViewCount); + rhiD->glFramebufferTextureMultisampleMultiviewOVR(GL_FRAMEBUFFER, + GL_STENCIL_ATTACHMENT, + nonMsaaThrowawayDepthTexture, + 0, + depthTexD->sampleCount(), + 0, + multiViewCount); + } + } else { + // The depth texture here must be an array with at least + // multiViewCount elements, and the format should be D24 or D32F + // for depth only, or D24S8 for depth and stencil. + rhiD->glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, depthTexD->texture, + 0, 0, multiViewCount); + if (rhiD->isStencilSupportingFormat(depthTexD->format())) { + rhiD->glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, depthTexD->texture, + 0, 0, multiViewCount); + } + } + } if (d.colorAttCount == 0) { d.pixelSize = depthTexD->pixelSize(); - d.sampleCount = 1; + d.sampleCount = depthTexD->sampleCount(); } } d.dsAttCount = 1; @@ -4240,12 +6144,20 @@ bool QGles2TextureRenderTarget::create() return false; } + if (rhiD->glObjectLabel) + rhiD->glObjectLabel(GL_FRAMEBUFFER, framebuffer, -1, m_objectName.constData()); + + QRhiRenderTargetAttachmentTracker::updateResIdList<QGles2Texture, QGles2RenderBuffer>(m_desc, &d.currentResIdList); + rhiD->registerResource(this); return true; } QSize QGles2TextureRenderTarget::pixelSize() const { + if (!QRhiRenderTargetAttachmentTracker::isUpToDate<QGles2Texture, QGles2RenderBuffer>(m_desc, d.currentResIdList)) + const_cast<QGles2TextureRenderTarget *>(this)->create(); + return d.pixelSize; } @@ -4271,7 +6183,9 @@ QGles2ShaderResourceBindings::~QGles2ShaderResourceBindings() void QGles2ShaderResourceBindings::destroy() { - // nothing to do here + QRHI_RES_RHI(QRhiGles2); + if (rhiD) + rhiD->unregisterResource(this); } bool QGles2ShaderResourceBindings::create() @@ -4280,10 +6194,30 @@ bool QGles2ShaderResourceBindings::create() if (!rhiD->sanityCheckShaderResourceBindings(this)) return false; + hasDynamicOffset = false; + for (int i = 0, ie = m_bindings.size(); i != ie; ++i) { + const QRhiShaderResourceBinding::Data *b = QRhiImplementation::shaderResourceBindingData(m_bindings.at(i)); + if (b->type == QRhiShaderResourceBinding::UniformBuffer) { + if (b->u.ubuf.hasDynamicOffset) { + hasDynamicOffset = true; + break; + } + } + } + + rhiD->updateLayoutDesc(this); + generation += 1; + rhiD->registerResource(this, false); return true; } +void QGles2ShaderResourceBindings::updateResources(UpdateFlags flags) +{ + Q_UNUSED(flags); + generation += 1; +} + QGles2GraphicsPipeline::QGles2GraphicsPipeline(QRhiImplementation *rhi) : QRhiGraphicsPipeline(rhi) { @@ -4309,9 +6243,20 @@ void QGles2GraphicsPipeline::destroy() samplers.clear(); QRHI_RES_RHI(QRhiGles2); - rhiD->releaseQueue.append(e); + if (rhiD) { + rhiD->releaseQueue.append(e); + rhiD->unregisterResource(this); + } +} - rhiD->unregisterResource(this); +static inline bool isGraphicsStage(const QRhiShaderStage &shaderStage) +{ + const QRhiShaderStage::Type t = shaderStage.type(); + return t == QRhiShaderStage::Vertex + || t == QRhiShaderStage::TessellationControl + || t == QRhiShaderStage::TessellationEvaluation + || t == QRhiShaderStage::Geometry + || t == QRhiShaderStage::Fragment; } bool QGles2GraphicsPipeline::create() @@ -4324,6 +6269,7 @@ bool QGles2GraphicsPipeline::create() if (!rhiD->ensureContext()) return false; + rhiD->pipelineCreationStart(); if (!rhiD->sanityCheckGraphicsPipeline(this)) return false; @@ -4331,62 +6277,126 @@ bool QGles2GraphicsPipeline::create() program = rhiD->f->glCreateProgram(); - QShaderDescription vsDesc; - QShaderDescription fsDesc; - for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) { - if (shaderStage.type() == QRhiShaderStage::Vertex) - vsDesc = shaderStage.shader().description(); - else if (shaderStage.type() == QRhiShaderStage::Fragment) - fsDesc = shaderStage.shader().description(); - } - - QByteArray diskCacheKey; - QRhiGles2::DiskCacheResult diskCacheResult = rhiD->tryLoadFromDiskCache(m_shaderStages.constData(), - m_shaderStages.count(), - program, - vsDesc.inputVariables(), - &diskCacheKey); - if (diskCacheResult == QRhiGles2::DiskCacheError) + enum { + VtxIdx = 0, + TCIdx, + TEIdx, + GeomIdx, + FragIdx, + LastIdx + }; + const auto descIdxForStage = [](const QRhiShaderStage &shaderStage) { + switch (shaderStage.type()) { + case QRhiShaderStage::Vertex: + return VtxIdx; + case QRhiShaderStage::TessellationControl: + return TCIdx; + case QRhiShaderStage::TessellationEvaluation: + return TEIdx; + case QRhiShaderStage::Geometry: + return GeomIdx; + case QRhiShaderStage::Fragment: + return FragIdx; + default: + break; + } + Q_UNREACHABLE_RETURN(VtxIdx); + }; + QShaderDescription desc[LastIdx]; + QShader::SeparateToCombinedImageSamplerMappingList samplerMappingList[LastIdx]; + bool vertexFragmentOnly = true; + for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { + if (isGraphicsStage(shaderStage)) { + const int idx = descIdxForStage(shaderStage); + if (idx != VtxIdx && idx != FragIdx) + vertexFragmentOnly = false; + QShader shader = shaderStage.shader(); + QShaderVersion shaderVersion; + desc[idx] = shader.description(); + if (!rhiD->shaderSource(shaderStage, &shaderVersion).isEmpty()) { + samplerMappingList[idx] = shader.separateToCombinedImageSamplerMappingList( + { QShader::GlslShader, shaderVersion, shaderStage.shaderVariant() }); + } + } + } + + QByteArray cacheKey; + QRhiGles2::ProgramCacheResult cacheResult = rhiD->tryLoadFromDiskOrPipelineCache(m_shaderStages.constData(), + m_shaderStages.size(), + program, + desc[VtxIdx].inputVariables(), + &cacheKey); + if (cacheResult == QRhiGles2::ProgramCacheError) return false; - if (diskCacheResult == QRhiGles2::DiskCacheMiss) { - for (const QRhiShaderStage &shaderStage : qAsConst(m_shaderStages)) { - if (shaderStage.type() == QRhiShaderStage::Vertex) { - if (!rhiD->compileShader(program, shaderStage, nullptr)) - return false; - } else if (shaderStage.type() == QRhiShaderStage::Fragment) { + if (cacheResult == QRhiGles2::ProgramCacheMiss) { + for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { + if (isGraphicsStage(shaderStage)) { if (!rhiD->compileShader(program, shaderStage, nullptr)) return false; } } // important when GLSL <= 150 is used that does not have location qualifiers - for (const QShaderDescription::InOutVariable &inVar : vsDesc.inputVariables()) + for (const QShaderDescription::InOutVariable &inVar : desc[VtxIdx].inputVariables()) rhiD->f->glBindAttribLocation(program, GLuint(inVar.location), inVar.name); + if (vertexFragmentOnly) + rhiD->sanityCheckVertexFragmentInterface(desc[VtxIdx], desc[FragIdx]); + if (!rhiD->linkProgram(program)) return false; - rhiD->trySaveToDiskCache(program, diskCacheKey); + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) { + // force replacing existing cache entry (if there is one, then + // something is wrong with it, as there was no hit) + rhiD->trySaveToPipelineCache(program, cacheKey, true); + } else { + // legacy QOpenGLShaderProgram style behavior: the "pipeline cache" + // was not enabled, so instead store to the Qt 5 disk cache + rhiD->trySaveToDiskCache(program, cacheKey); + } + } else { + Q_ASSERT(cacheResult == QRhiGles2::ProgramCacheHit); + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) { + // just so that it ends up in the pipeline cache also when the hit was + // from the disk cache + rhiD->trySaveToPipelineCache(program, cacheKey); + } } // Use the same work area for the vertex & fragment stages, thus ensuring // that we will not do superfluous glUniform calls for uniforms that are // present in both shaders. - QSet<int> activeUniformLocations; + QDuplicateTracker<int, 256> activeUniformLocations; + + for (const QRhiShaderStage &shaderStage : std::as_const(m_shaderStages)) { + if (isGraphicsStage(shaderStage)) { + const int idx = descIdxForStage(shaderStage); + for (const QShaderDescription::UniformBlock &ub : desc[idx].uniformBlocks()) + rhiD->gatherUniforms(program, ub, &activeUniformLocations, &uniforms); + for (const QShaderDescription::InOutVariable &v : desc[idx].combinedImageSamplers()) + rhiD->gatherSamplers(program, v, &samplers); + for (const QShader::SeparateToCombinedImageSamplerMapping &mapping : samplerMappingList[idx]) + rhiD->gatherGeneratedSamplers(program, mapping, &samplers); + } + } - for (const QShaderDescription::UniformBlock &ub : vsDesc.uniformBlocks()) - rhiD->gatherUniforms(program, ub, &activeUniformLocations, &uniforms); + std::sort(uniforms.begin(), uniforms.end(), + [](const QGles2UniformDescription &a, const QGles2UniformDescription &b) + { + return a.offset < b.offset; + }); - for (const QShaderDescription::UniformBlock &ub : fsDesc.uniformBlocks()) - rhiD->gatherUniforms(program, ub, &activeUniformLocations, &uniforms); + memset(uniformState, 0, sizeof(uniformState)); - for (const QShaderDescription::InOutVariable &v : vsDesc.combinedImageSamplers()) - rhiD->gatherSamplers(program, v, &samplers); + currentSrb = nullptr; + currentSrbGeneration = 0; - for (const QShaderDescription::InOutVariable &v : fsDesc.combinedImageSamplers()) - rhiD->gatherSamplers(program, v, &samplers); + if (rhiD->glObjectLabel) + rhiD->glObjectLabel(GL_PROGRAM, program, -1, m_objectName.constData()); + rhiD->pipelineCreationEnd(); generation += 1; rhiD->registerResource(this); return true; @@ -4417,9 +6427,10 @@ void QGles2ComputePipeline::destroy() samplers.clear(); QRHI_RES_RHI(QRhiGles2); - rhiD->releaseQueue.append(e); - - rhiD->unregisterResource(this); + if (rhiD) { + rhiD->releaseQueue.append(e); + rhiD->unregisterResource(this); + } } bool QGles2ComputePipeline::create() @@ -4432,32 +6443,64 @@ bool QGles2ComputePipeline::create() if (!rhiD->ensureContext()) return false; + rhiD->pipelineCreationStart(); + const QShaderDescription csDesc = m_shaderStage.shader().description(); + QShader::SeparateToCombinedImageSamplerMappingList csSamplerMappingList; + QShaderVersion shaderVersion; + if (!rhiD->shaderSource(m_shaderStage, &shaderVersion).isEmpty()) { + csSamplerMappingList = m_shaderStage.shader().separateToCombinedImageSamplerMappingList( + { QShader::GlslShader, shaderVersion, m_shaderStage.shaderVariant() }); + } + program = rhiD->f->glCreateProgram(); - QByteArray diskCacheKey; - QRhiGles2::DiskCacheResult diskCacheResult = rhiD->tryLoadFromDiskCache(&m_shaderStage, 1, program, {}, &diskCacheKey); - if (diskCacheResult == QRhiGles2::DiskCacheError) + QByteArray cacheKey; + QRhiGles2::ProgramCacheResult cacheResult = rhiD->tryLoadFromDiskOrPipelineCache(&m_shaderStage, 1, program, {}, &cacheKey); + if (cacheResult == QRhiGles2::ProgramCacheError) return false; - if (diskCacheResult == QRhiGles2::DiskCacheMiss) { + if (cacheResult == QRhiGles2::ProgramCacheMiss) { if (!rhiD->compileShader(program, m_shaderStage, nullptr)) return false; if (!rhiD->linkProgram(program)) return false; - rhiD->trySaveToDiskCache(program, diskCacheKey); + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) { + // force replacing existing cache entry (if there is one, then + // something is wrong with it, as there was no hit) + rhiD->trySaveToPipelineCache(program, cacheKey, true); + } else { + // legacy QOpenGLShaderProgram style behavior: the "pipeline cache" + // was not enabled, so instead store to the Qt 5 disk cache + rhiD->trySaveToDiskCache(program, cacheKey); + } + } else { + Q_ASSERT(cacheResult == QRhiGles2::ProgramCacheHit); + if (rhiD->rhiFlags.testFlag(QRhi::EnablePipelineCacheDataSave)) { + // just so that it ends up in the pipeline cache also when the hit was + // from the disk cache + rhiD->trySaveToPipelineCache(program, cacheKey); + } } - QSet<int> activeUniformLocations; + QDuplicateTracker<int, 256> activeUniformLocations; for (const QShaderDescription::UniformBlock &ub : csDesc.uniformBlocks()) rhiD->gatherUniforms(program, ub, &activeUniformLocations, &uniforms); for (const QShaderDescription::InOutVariable &v : csDesc.combinedImageSamplers()) rhiD->gatherSamplers(program, v, &samplers); + for (const QShader::SeparateToCombinedImageSamplerMapping &mapping : csSamplerMappingList) + rhiD->gatherGeneratedSamplers(program, mapping, &samplers); // storage images and buffers need no special steps here + memset(uniformState, 0, sizeof(uniformState)); + + currentSrb = nullptr; + currentSrbGeneration = 0; + + rhiD->pipelineCreationEnd(); generation += 1; rhiD->registerResource(this); return true; @@ -4481,7 +6524,9 @@ void QGles2CommandBuffer::destroy() QGles2SwapChain::QGles2SwapChain(QRhiImplementation *rhi) : QRhiSwapChain(rhi), - rt(rhi), + rt(rhi, this), + rtLeft(rhi, this), + rtRight(rhi, this), cb(rhi) { } @@ -4493,8 +6538,9 @@ QGles2SwapChain::~QGles2SwapChain() void QGles2SwapChain::destroy() { - QRHI_PROF; - QRHI_PROF_F(releaseSwapChain(this)); + QRHI_RES_RHI(QRhiGles2); + if (rhiD) + rhiD->unregisterResource(this); } QRhiCommandBuffer *QGles2SwapChain::currentFrameCommandBuffer() @@ -4507,19 +6553,59 @@ QRhiRenderTarget *QGles2SwapChain::currentFrameRenderTarget() return &rt; } +QRhiRenderTarget *QGles2SwapChain::currentFrameRenderTarget(StereoTargetBuffer targetBuffer) +{ + if (targetBuffer == LeftBuffer) + return rtLeft.d.isValid() ? &rtLeft : &rt; + else if (targetBuffer == RightBuffer) + return rtRight.d.isValid() ? &rtRight : &rt; + else + Q_UNREACHABLE_RETURN(nullptr); +} + QSize QGles2SwapChain::surfacePixelSize() { Q_ASSERT(m_window); - return m_window->size() * m_window->devicePixelRatio(); + if (QPlatformWindow *platformWindow = m_window->handle()) + // Prefer using QPlatformWindow geometry and DPR in order to avoid + // errors due to rounded QWindow geometry. + return platformWindow->geometry().size() * platformWindow->devicePixelRatio(); + else + return m_window->size() * m_window->devicePixelRatio(); +} + +bool QGles2SwapChain::isFormatSupported(Format f) +{ + return f == SDR; } QRhiRenderPassDescriptor *QGles2SwapChain::newCompatibleRenderPassDescriptor() { - return new QGles2RenderPassDescriptor(m_rhi); + QGles2RenderPassDescriptor *rpD = new QGles2RenderPassDescriptor(m_rhi); + QRHI_RES_RHI(QRhiGles2); + rhiD->registerResource(rpD, false); + return rpD; +} + +void QGles2SwapChain::initSwapChainRenderTarget(QGles2SwapChainRenderTarget *rt) +{ + rt->setRenderPassDescriptor(m_renderPassDesc); // for the public getter in QRhiRenderTarget + rt->d.rp = QRHI_RES(QGles2RenderPassDescriptor, m_renderPassDesc); + rt->d.pixelSize = pixelSize; + rt->d.dpr = float(m_window->devicePixelRatio()); + rt->d.sampleCount = qBound(1, m_sampleCount, 64); + rt->d.colorAttCount = 1; + rt->d.dsAttCount = m_depthStencil ? 1 : 0; + rt->d.srgbUpdateAndBlend = m_flags.testFlag(QRhiSwapChain::sRGB); } bool QGles2SwapChain::createOrResize() { + // can be called multiple times due to window resizes + const bool needsRegistration = !surface || surface != m_window; + if (surface && surface != m_window) + destroy(); + surface = m_window; m_currentPixelSize = surfacePixelSize(); pixelSize = m_currentPixelSize; @@ -4531,21 +6617,70 @@ bool QGles2SwapChain::createOrResize() m_depthStencil->create(); } - rt.d.rp = QRHI_RES(QGles2RenderPassDescriptor, m_renderPassDesc); - rt.d.pixelSize = pixelSize; - rt.d.dpr = float(m_window->devicePixelRatio()); - rt.d.sampleCount = qBound(1, m_sampleCount, 64); - rt.d.colorAttCount = 1; - rt.d.dsAttCount = m_depthStencil ? 1 : 0; - rt.d.srgbUpdateAndBlend = m_flags.testFlag(QRhiSwapChain::sRGB); + initSwapChainRenderTarget(&rt); + + if (m_window->format().stereo()) { + initSwapChainRenderTarget(&rtLeft); + rtLeft.d.stereoTarget = QRhiSwapChain::LeftBuffer; + initSwapChainRenderTarget(&rtRight); + rtRight.d.stereoTarget = QRhiSwapChain::RightBuffer; + } frameCount = 0; - QRHI_PROF; - // make something up - QRHI_PROF_F(resizeSwapChain(this, 2, m_sampleCount > 1 ? 2 : 0, m_sampleCount)); + QRHI_RES_RHI(QRhiGles2); + if (rhiD->rhiFlags.testFlag(QRhi::EnableTimestamps) && rhiD->caps.timestamps) + timestamps.prepare(rhiD); + + // The only reason to register this fairly fake gl swapchain + // object with no native resources underneath is to be able to + // implement a safe destroy(). + if (needsRegistration) + rhiD->registerResource(this, false); return true; } +void QGles2SwapChainTimestamps::prepare(QRhiGles2 *rhiD) +{ + if (!query[0]) + rhiD->f->glGenQueries(TIMESTAMP_PAIRS * 2, query); +} + +void QGles2SwapChainTimestamps::destroy(QRhiGles2 *rhiD) +{ + rhiD->f->glDeleteQueries(TIMESTAMP_PAIRS * 2, query); + memset(active, 0, sizeof(active)); + memset(query, 0, sizeof(query)); +} + +bool QGles2SwapChainTimestamps::tryQueryTimestamps(int pairIndex, QRhiGles2 *rhiD, double *elapsedSec) +{ + if (!active[pairIndex]) + return false; + + GLuint tsStart = query[pairIndex * 2]; + GLuint tsEnd = query[pairIndex * 2 + 1]; + + GLuint ready = GL_FALSE; + rhiD->f->glGetQueryObjectuiv(tsEnd, GL_QUERY_RESULT_AVAILABLE, &ready); + + if (!ready) + return false; + + bool result = false; + quint64 timestamps[2]; + rhiD->glGetQueryObjectui64v(tsStart, GL_QUERY_RESULT, ×tamps[0]); + rhiD->glGetQueryObjectui64v(tsEnd, GL_QUERY_RESULT, ×tamps[1]); + + if (timestamps[1] >= timestamps[0]) { + const quint64 nanoseconds = timestamps[1] - timestamps[0]; + *elapsedSec = nanoseconds / 1000000000.0; + result = true; + } + + active[pairIndex] = false; + return result; +} + QT_END_NAMESPACE |