diff options
Diffstat (limited to 'src/3rdparty/SPIRV-Cross')
29 files changed, 41183 insertions, 0 deletions
diff --git a/src/3rdparty/SPIRV-Cross/GLSL.std.450.h b/src/3rdparty/SPIRV-Cross/GLSL.std.450.h new file mode 100644 index 0000000..54cc00e --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/src/3rdparty/SPIRV-Cross/LICENSE b/src/3rdparty/SPIRV-Cross/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/3rdparty/SPIRV-Cross/Makefile b/src/3rdparty/SPIRV-Cross/Makefile new file mode 100644 index 0000000..0564b65 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/Makefile @@ -0,0 +1,41 @@ +TARGET := spirv-cross + +SOURCES := $(wildcard spirv_*.cpp) +CLI_SOURCES := main.cpp + +OBJECTS := $(SOURCES:.cpp=.o) +CLI_OBJECTS := $(CLI_SOURCES:.cpp=.o) + +STATIC_LIB := lib$(TARGET).a + +DEPS := $(OBJECTS:.o=.d) $(CLI_OBJECTS:.o=.d) + +CXXFLAGS += -std=c++11 -Wall -Wextra -Wshadow -D__STDC_LIMIT_MACROS + +ifeq ($(DEBUG), 1) + CXXFLAGS += -O0 -g +else + CXXFLAGS += -O2 -DNDEBUG +endif + +ifeq ($(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS), 1) + CXXFLAGS += -DSPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS -fno-exceptions +endif + +all: $(TARGET) + +-include $(DEPS) + +$(TARGET): $(CLI_OBJECTS) $(STATIC_LIB) + $(CXX) -o $@ $(CLI_OBJECTS) $(STATIC_LIB) $(LDFLAGS) + +$(STATIC_LIB): $(OBJECTS) + $(AR) rcs $@ $(OBJECTS) + +%.o: %.cpp + $(CXX) -c -o $@ $< $(CXXFLAGS) -MMD + +clean: + rm -f $(TARGET) $(OBJECTS) $(CLI_OBJECTS) $(STATIC_LIB) $(DEPS) + +.PHONY: clean diff --git a/src/3rdparty/SPIRV-Cross/qt_attribution.json b/src/3rdparty/SPIRV-Cross/qt_attribution.json new file mode 100644 index 0000000..46ba2b9 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/qt_attribution.json @@ -0,0 +1,16 @@ +[ + { + "Id": "SpirvCross", + "Name": "SPIRV-Cross", + "QDocModule": "qtshadertools", + "Description": "A practical tool and library for performing reflection on SPIR-V and disassembling SPIR-V back to high level languages.", + "QtUsage": "Shader code generation.", + + "Homepage": "https://github.com/KhronosGroup/SPIRV-Cross", + "Version": "3fa09f5677c7a62c71a1c25fd09c1d1c4842d922", + "License": "Apache License 2.0", + "LicenseId": "Apache-2.0", + "LicenseFile": "LICENSE", + "Copyright": "Copyright 2016-2018 ARM Limited" + } +] diff --git a/src/3rdparty/SPIRV-Cross/spirv.h b/src/3rdparty/SPIRV-Cross/spirv.h new file mode 100644 index 0000000..8da27dd --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv.h @@ -0,0 +1,1213 @@ +/* +** Copyright (c) 2014-2019 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python, C#, D +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** - C# will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL +** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10300 +#define SPV_REVISION 6 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010300; +static const unsigned int SpvRevision = 6; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, + SpvSourceLanguageHLSL = 5, + SpvSourceLanguageMax = 0x7fffffff, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, + SpvExecutionModelTaskNV = 5267, + SpvExecutionModelMeshNV = 5268, + SpvExecutionModelRayGenerationNV = 5313, + SpvExecutionModelIntersectionNV = 5314, + SpvExecutionModelAnyHitNV = 5315, + SpvExecutionModelClosestHitNV = 5316, + SpvExecutionModelMissNV = 5317, + SpvExecutionModelCallableNV = 5318, + SpvExecutionModelMax = 0x7fffffff, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, + SpvAddressingModelPhysicalStorageBuffer64EXT = 5348, + SpvAddressingModelMax = 0x7fffffff, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, + SpvMemoryModelVulkanKHR = 3, + SpvMemoryModelMax = 0x7fffffff, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, + SpvExecutionModeInitializer = 33, + SpvExecutionModeFinalizer = 34, + SpvExecutionModeSubgroupSize = 35, + SpvExecutionModeSubgroupsPerWorkgroup = 36, + SpvExecutionModeSubgroupsPerWorkgroupId = 37, + SpvExecutionModeLocalSizeId = 38, + SpvExecutionModeLocalSizeHintId = 39, + SpvExecutionModePostDepthCoverage = 4446, + SpvExecutionModeDenormPreserve = 4459, + SpvExecutionModeDenormFlushToZero = 4460, + SpvExecutionModeSignedZeroInfNanPreserve = 4461, + SpvExecutionModeRoundingModeRTE = 4462, + SpvExecutionModeRoundingModeRTZ = 4463, + SpvExecutionModeStencilRefReplacingEXT = 5027, + SpvExecutionModeOutputLinesNV = 5269, + SpvExecutionModeOutputPrimitivesNV = 5270, + SpvExecutionModeDerivativeGroupQuadsNV = 5289, + SpvExecutionModeDerivativeGroupLinearNV = 5290, + SpvExecutionModeOutputTrianglesNV = 5298, + SpvExecutionModeMax = 0x7fffffff, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, + SpvStorageClassStorageBuffer = 12, + SpvStorageClassCallableDataNV = 5328, + SpvStorageClassIncomingCallableDataNV = 5329, + SpvStorageClassRayPayloadNV = 5338, + SpvStorageClassHitAttributeNV = 5339, + SpvStorageClassIncomingRayPayloadNV = 5342, + SpvStorageClassShaderRecordBufferNV = 5343, + SpvStorageClassPhysicalStorageBufferEXT = 5349, + SpvStorageClassMax = 0x7fffffff, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, + SpvDimMax = 0x7fffffff, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, + SpvSamplerAddressingModeMax = 0x7fffffff, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, + SpvSamplerFilterModeMax = 0x7fffffff, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, + SpvImageFormatMax = 0x7fffffff, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, + SpvImageChannelOrderABGR = 19, + SpvImageChannelOrderMax = 0x7fffffff, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, + SpvImageChannelDataTypeMax = 0x7fffffff, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, + SpvImageOperandsMakeTexelAvailableKHRShift = 8, + SpvImageOperandsMakeTexelVisibleKHRShift = 9, + SpvImageOperandsNonPrivateTexelKHRShift = 10, + SpvImageOperandsVolatileTexelKHRShift = 11, + SpvImageOperandsMax = 0x7fffffff, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, + SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400, + SpvImageOperandsVolatileTexelKHRMask = 0x00000800, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, + SpvFPFastMathModeMax = 0x7fffffff, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, + SpvFPRoundingModeMax = 0x7fffffff, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, + SpvLinkageTypeMax = 0x7fffffff, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, + SpvAccessQualifierMax = 0x7fffffff, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, + SpvFunctionParameterAttributeMax = 0x7fffffff, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, + SpvDecorationMaxByteOffset = 45, + SpvDecorationAlignmentId = 46, + SpvDecorationMaxByteOffsetId = 47, + SpvDecorationNoSignedWrap = 4469, + SpvDecorationNoUnsignedWrap = 4470, + SpvDecorationExplicitInterpAMD = 4999, + SpvDecorationOverrideCoverageNV = 5248, + SpvDecorationPassthroughNV = 5250, + SpvDecorationViewportRelativeNV = 5252, + SpvDecorationSecondaryViewportRelativeNV = 5256, + SpvDecorationPerPrimitiveNV = 5271, + SpvDecorationPerViewNV = 5272, + SpvDecorationPerTaskNV = 5273, + SpvDecorationPerVertexNV = 5285, + SpvDecorationNonUniformEXT = 5300, + SpvDecorationRestrictPointerEXT = 5355, + SpvDecorationAliasedPointerEXT = 5356, + SpvDecorationHlslCounterBufferGOOGLE = 5634, + SpvDecorationHlslSemanticGOOGLE = 5635, + SpvDecorationMax = 0x7fffffff, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, + SpvBuiltInSubgroupEqMask = 4416, + SpvBuiltInSubgroupEqMaskKHR = 4416, + SpvBuiltInSubgroupGeMask = 4417, + SpvBuiltInSubgroupGeMaskKHR = 4417, + SpvBuiltInSubgroupGtMask = 4418, + SpvBuiltInSubgroupGtMaskKHR = 4418, + SpvBuiltInSubgroupLeMask = 4419, + SpvBuiltInSubgroupLeMaskKHR = 4419, + SpvBuiltInSubgroupLtMask = 4420, + SpvBuiltInSubgroupLtMaskKHR = 4420, + SpvBuiltInBaseVertex = 4424, + SpvBuiltInBaseInstance = 4425, + SpvBuiltInDrawIndex = 4426, + SpvBuiltInDeviceIndex = 4438, + SpvBuiltInViewIndex = 4440, + SpvBuiltInBaryCoordNoPerspAMD = 4992, + SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993, + SpvBuiltInBaryCoordNoPerspSampleAMD = 4994, + SpvBuiltInBaryCoordSmoothAMD = 4995, + SpvBuiltInBaryCoordSmoothCentroidAMD = 4996, + SpvBuiltInBaryCoordSmoothSampleAMD = 4997, + SpvBuiltInBaryCoordPullModelAMD = 4998, + SpvBuiltInFragStencilRefEXT = 5014, + SpvBuiltInViewportMaskNV = 5253, + SpvBuiltInSecondaryPositionNV = 5257, + SpvBuiltInSecondaryViewportMaskNV = 5258, + SpvBuiltInPositionPerViewNV = 5261, + SpvBuiltInViewportMaskPerViewNV = 5262, + SpvBuiltInFullyCoveredEXT = 5264, + SpvBuiltInTaskCountNV = 5274, + SpvBuiltInPrimitiveCountNV = 5275, + SpvBuiltInPrimitiveIndicesNV = 5276, + SpvBuiltInClipDistancePerViewNV = 5277, + SpvBuiltInCullDistancePerViewNV = 5278, + SpvBuiltInLayerPerViewNV = 5279, + SpvBuiltInMeshViewCountNV = 5280, + SpvBuiltInMeshViewIndicesNV = 5281, + SpvBuiltInBaryCoordNV = 5286, + SpvBuiltInBaryCoordNoPerspNV = 5287, + SpvBuiltInFragSizeEXT = 5292, + SpvBuiltInFragmentSizeNV = 5292, + SpvBuiltInFragInvocationCountEXT = 5293, + SpvBuiltInInvocationsPerPixelNV = 5293, + SpvBuiltInLaunchIdNV = 5319, + SpvBuiltInLaunchSizeNV = 5320, + SpvBuiltInWorldRayOriginNV = 5321, + SpvBuiltInWorldRayDirectionNV = 5322, + SpvBuiltInObjectRayOriginNV = 5323, + SpvBuiltInObjectRayDirectionNV = 5324, + SpvBuiltInRayTminNV = 5325, + SpvBuiltInRayTmaxNV = 5326, + SpvBuiltInInstanceCustomIndexNV = 5327, + SpvBuiltInObjectToWorldNV = 5330, + SpvBuiltInWorldToObjectNV = 5331, + SpvBuiltInHitTNV = 5332, + SpvBuiltInHitKindNV = 5333, + SpvBuiltInIncomingRayFlagsNV = 5351, + SpvBuiltInMax = 0x7fffffff, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, + SpvSelectionControlMax = 0x7fffffff, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, + SpvLoopControlDependencyInfiniteShift = 2, + SpvLoopControlDependencyLengthShift = 3, + SpvLoopControlMax = 0x7fffffff, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, + SpvLoopControlDependencyInfiniteMask = 0x00000004, + SpvLoopControlDependencyLengthMask = 0x00000008, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, + SpvFunctionControlMax = 0x7fffffff, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, + SpvMemorySemanticsOutputMemoryKHRShift = 12, + SpvMemorySemanticsMakeAvailableKHRShift = 13, + SpvMemorySemanticsMakeVisibleKHRShift = 14, + SpvMemorySemanticsMax = 0x7fffffff, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, + SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000, + SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000, + SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, + SpvMemoryAccessMakePointerAvailableKHRShift = 3, + SpvMemoryAccessMakePointerVisibleKHRShift = 4, + SpvMemoryAccessNonPrivatePointerKHRShift = 5, + SpvMemoryAccessMax = 0x7fffffff, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, + SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008, + SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, + SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, + SpvScopeQueueFamilyKHR = 5, + SpvScopeMax = 0x7fffffff, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, + SpvGroupOperationClusteredReduce = 3, + SpvGroupOperationPartitionedReduceNV = 6, + SpvGroupOperationPartitionedInclusiveScanNV = 7, + SpvGroupOperationPartitionedExclusiveScanNV = 8, + SpvGroupOperationMax = 0x7fffffff, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, + SpvKernelEnqueueFlagsMax = 0x7fffffff, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, + SpvKernelProfilingInfoMax = 0x7fffffff, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, + SpvCapabilitySubgroupDispatch = 58, + SpvCapabilityNamedBarrier = 59, + SpvCapabilityPipeStorage = 60, + SpvCapabilityGroupNonUniform = 61, + SpvCapabilityGroupNonUniformVote = 62, + SpvCapabilityGroupNonUniformArithmetic = 63, + SpvCapabilityGroupNonUniformBallot = 64, + SpvCapabilityGroupNonUniformShuffle = 65, + SpvCapabilityGroupNonUniformShuffleRelative = 66, + SpvCapabilityGroupNonUniformClustered = 67, + SpvCapabilityGroupNonUniformQuad = 68, + SpvCapabilitySubgroupBallotKHR = 4423, + SpvCapabilityDrawParameters = 4427, + SpvCapabilitySubgroupVoteKHR = 4431, + SpvCapabilityStorageBuffer16BitAccess = 4433, + SpvCapabilityStorageUniformBufferBlock16 = 4433, + SpvCapabilityStorageUniform16 = 4434, + SpvCapabilityUniformAndStorageBuffer16BitAccess = 4434, + SpvCapabilityStoragePushConstant16 = 4435, + SpvCapabilityStorageInputOutput16 = 4436, + SpvCapabilityDeviceGroup = 4437, + SpvCapabilityMultiView = 4439, + SpvCapabilityVariablePointersStorageBuffer = 4441, + SpvCapabilityVariablePointers = 4442, + SpvCapabilityAtomicStorageOps = 4445, + SpvCapabilitySampleMaskPostDepthCoverage = 4447, + SpvCapabilityStorageBuffer8BitAccess = 4448, + SpvCapabilityUniformAndStorageBuffer8BitAccess = 4449, + SpvCapabilityStoragePushConstant8 = 4450, + SpvCapabilityDenormPreserve = 4464, + SpvCapabilityDenormFlushToZero = 4465, + SpvCapabilitySignedZeroInfNanPreserve = 4466, + SpvCapabilityRoundingModeRTE = 4467, + SpvCapabilityRoundingModeRTZ = 4468, + SpvCapabilityFloat16ImageAMD = 5008, + SpvCapabilityImageGatherBiasLodAMD = 5009, + SpvCapabilityFragmentMaskAMD = 5010, + SpvCapabilityStencilExportEXT = 5013, + SpvCapabilityImageReadWriteLodAMD = 5015, + SpvCapabilitySampleMaskOverrideCoverageNV = 5249, + SpvCapabilityGeometryShaderPassthroughNV = 5251, + SpvCapabilityShaderViewportIndexLayerEXT = 5254, + SpvCapabilityShaderViewportIndexLayerNV = 5254, + SpvCapabilityShaderViewportMaskNV = 5255, + SpvCapabilityShaderStereoViewNV = 5259, + SpvCapabilityPerViewAttributesNV = 5260, + SpvCapabilityFragmentFullyCoveredEXT = 5265, + SpvCapabilityMeshShadingNV = 5266, + SpvCapabilityImageFootprintNV = 5282, + SpvCapabilityFragmentBarycentricNV = 5284, + SpvCapabilityComputeDerivativeGroupQuadsNV = 5288, + SpvCapabilityFragmentDensityEXT = 5291, + SpvCapabilityShadingRateNV = 5291, + SpvCapabilityGroupNonUniformPartitionedNV = 5297, + SpvCapabilityShaderNonUniformEXT = 5301, + SpvCapabilityRuntimeDescriptorArrayEXT = 5302, + SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + SpvCapabilityRayTracingNV = 5340, + SpvCapabilityVulkanMemoryModelKHR = 5345, + SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347, + SpvCapabilityComputeDerivativeGroupLinearNV = 5350, + SpvCapabilityCooperativeMatrixNV = 5357, + SpvCapabilitySubgroupShuffleINTEL = 5568, + SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, + SpvCapabilitySubgroupImageBlockIOINTEL = 5570, + SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579, + SpvCapabilityMax = 0x7fffffff, +} SpvCapability; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, + SpvOpSizeOf = 321, + SpvOpTypePipeStorage = 322, + SpvOpConstantPipeStorage = 323, + SpvOpCreatePipeFromPipeStorage = 324, + SpvOpGetKernelLocalSizeForSubgroupCount = 325, + SpvOpGetKernelMaxNumSubgroups = 326, + SpvOpTypeNamedBarrier = 327, + SpvOpNamedBarrierInitialize = 328, + SpvOpMemoryNamedBarrier = 329, + SpvOpModuleProcessed = 330, + SpvOpExecutionModeId = 331, + SpvOpDecorateId = 332, + SpvOpGroupNonUniformElect = 333, + SpvOpGroupNonUniformAll = 334, + SpvOpGroupNonUniformAny = 335, + SpvOpGroupNonUniformAllEqual = 336, + SpvOpGroupNonUniformBroadcast = 337, + SpvOpGroupNonUniformBroadcastFirst = 338, + SpvOpGroupNonUniformBallot = 339, + SpvOpGroupNonUniformInverseBallot = 340, + SpvOpGroupNonUniformBallotBitExtract = 341, + SpvOpGroupNonUniformBallotBitCount = 342, + SpvOpGroupNonUniformBallotFindLSB = 343, + SpvOpGroupNonUniformBallotFindMSB = 344, + SpvOpGroupNonUniformShuffle = 345, + SpvOpGroupNonUniformShuffleXor = 346, + SpvOpGroupNonUniformShuffleUp = 347, + SpvOpGroupNonUniformShuffleDown = 348, + SpvOpGroupNonUniformIAdd = 349, + SpvOpGroupNonUniformFAdd = 350, + SpvOpGroupNonUniformIMul = 351, + SpvOpGroupNonUniformFMul = 352, + SpvOpGroupNonUniformSMin = 353, + SpvOpGroupNonUniformUMin = 354, + SpvOpGroupNonUniformFMin = 355, + SpvOpGroupNonUniformSMax = 356, + SpvOpGroupNonUniformUMax = 357, + SpvOpGroupNonUniformFMax = 358, + SpvOpGroupNonUniformBitwiseAnd = 359, + SpvOpGroupNonUniformBitwiseOr = 360, + SpvOpGroupNonUniformBitwiseXor = 361, + SpvOpGroupNonUniformLogicalAnd = 362, + SpvOpGroupNonUniformLogicalOr = 363, + SpvOpGroupNonUniformLogicalXor = 364, + SpvOpGroupNonUniformQuadBroadcast = 365, + SpvOpGroupNonUniformQuadSwap = 366, + SpvOpSubgroupBallotKHR = 4421, + SpvOpSubgroupFirstInvocationKHR = 4422, + SpvOpSubgroupAllKHR = 4428, + SpvOpSubgroupAnyKHR = 4429, + SpvOpSubgroupAllEqualKHR = 4430, + SpvOpSubgroupReadInvocationKHR = 4432, + SpvOpGroupIAddNonUniformAMD = 5000, + SpvOpGroupFAddNonUniformAMD = 5001, + SpvOpGroupFMinNonUniformAMD = 5002, + SpvOpGroupUMinNonUniformAMD = 5003, + SpvOpGroupSMinNonUniformAMD = 5004, + SpvOpGroupFMaxNonUniformAMD = 5005, + SpvOpGroupUMaxNonUniformAMD = 5006, + SpvOpGroupSMaxNonUniformAMD = 5007, + SpvOpFragmentMaskFetchAMD = 5011, + SpvOpFragmentFetchAMD = 5012, + SpvOpImageSampleFootprintNV = 5283, + SpvOpGroupNonUniformPartitionNV = 5296, + SpvOpWritePackedPrimitiveIndices4x8NV = 5299, + SpvOpReportIntersectionNV = 5334, + SpvOpIgnoreIntersectionNV = 5335, + SpvOpTerminateRayNV = 5336, + SpvOpTraceNV = 5337, + SpvOpTypeAccelerationStructureNV = 5341, + SpvOpExecuteCallableNV = 5344, + SpvOpTypeCooperativeMatrixNV = 5358, + SpvOpCooperativeMatrixLoadNV = 5359, + SpvOpCooperativeMatrixStoreNV = 5360, + SpvOpCooperativeMatrixMulAddNV = 5361, + SpvOpCooperativeMatrixLengthNV = 5362, + SpvOpSubgroupShuffleINTEL = 5571, + SpvOpSubgroupShuffleDownINTEL = 5572, + SpvOpSubgroupShuffleUpINTEL = 5573, + SpvOpSubgroupShuffleXorINTEL = 5574, + SpvOpSubgroupBlockReadINTEL = 5575, + SpvOpSubgroupBlockWriteINTEL = 5576, + SpvOpSubgroupImageBlockReadINTEL = 5577, + SpvOpSubgroupImageBlockWriteINTEL = 5578, + SpvOpSubgroupImageMediaBlockReadINTEL = 5580, + SpvOpSubgroupImageMediaBlockWriteINTEL = 5581, + SpvOpDecorateStringGOOGLE = 5632, + SpvOpMemberDecorateStringGOOGLE = 5633, + SpvOpMax = 0x7fffffff, +} SpvOp; + +#endif + diff --git a/src/3rdparty/SPIRV-Cross/spirv.hpp b/src/3rdparty/SPIRV-Cross/spirv.hpp new file mode 100644 index 0000000..adc13de --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv.hpp @@ -0,0 +1,1216 @@ +// Copyright (c) 2014-2019 The Khronos Group Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and/or associated documentation files (the "Materials"), +// to deal in the Materials without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Materials, and to permit persons to whom the +// Materials are furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Materials. +// +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +// +// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +// IN THE MATERIALS. + +// This header is automatically generated by the same tool that creates +// the Binary Section of the SPIR-V specification. + +// Enumeration tokens for SPIR-V, in various styles: +// C, C++, C++11, JSON, Lua, Python, C#, D +// +// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +// - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +// - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +// - C# will use enum classes in the Specification class located in the "Spv" namespace, +// e.g.: Spv.Specification.SourceLanguage.GLSL +// - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +// +// Some tokens act like mask values, which can be OR'd together, +// while others are mutually exclusive. The mask-like ones have +// "Mask" in their name, and a parallel enum that has the shift +// amount (1 << x) for each corresponding enumerant. + +#ifndef spirv_HPP +#define spirv_HPP + +namespace spv { + +typedef unsigned int Id; + +#define SPV_VERSION 0x10300 +#define SPV_REVISION 6 + +static const unsigned int MagicNumber = 0x07230203; +static const unsigned int Version = 0x00010300; +static const unsigned int Revision = 6; +static const unsigned int OpCodeMask = 0xffff; +static const unsigned int WordCountShift = 16; + +enum SourceLanguage { + SourceLanguageUnknown = 0, + SourceLanguageESSL = 1, + SourceLanguageGLSL = 2, + SourceLanguageOpenCL_C = 3, + SourceLanguageOpenCL_CPP = 4, + SourceLanguageHLSL = 5, + SourceLanguageMax = 0x7fffffff, +}; + +enum ExecutionModel { + ExecutionModelVertex = 0, + ExecutionModelTessellationControl = 1, + ExecutionModelTessellationEvaluation = 2, + ExecutionModelGeometry = 3, + ExecutionModelFragment = 4, + ExecutionModelGLCompute = 5, + ExecutionModelKernel = 6, + ExecutionModelTaskNV = 5267, + ExecutionModelMeshNV = 5268, + ExecutionModelRayGenerationNV = 5313, + ExecutionModelIntersectionNV = 5314, + ExecutionModelAnyHitNV = 5315, + ExecutionModelClosestHitNV = 5316, + ExecutionModelMissNV = 5317, + ExecutionModelCallableNV = 5318, + ExecutionModelMax = 0x7fffffff, +}; + +enum AddressingModel { + AddressingModelLogical = 0, + AddressingModelPhysical32 = 1, + AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64EXT = 5348, + AddressingModelMax = 0x7fffffff, +}; + +enum MemoryModel { + MemoryModelSimple = 0, + MemoryModelGLSL450 = 1, + MemoryModelOpenCL = 2, + MemoryModelVulkanKHR = 3, + MemoryModelMax = 0x7fffffff, +}; + +enum ExecutionMode { + ExecutionModeInvocations = 0, + ExecutionModeSpacingEqual = 1, + ExecutionModeSpacingFractionalEven = 2, + ExecutionModeSpacingFractionalOdd = 3, + ExecutionModeVertexOrderCw = 4, + ExecutionModeVertexOrderCcw = 5, + ExecutionModePixelCenterInteger = 6, + ExecutionModeOriginUpperLeft = 7, + ExecutionModeOriginLowerLeft = 8, + ExecutionModeEarlyFragmentTests = 9, + ExecutionModePointMode = 10, + ExecutionModeXfb = 11, + ExecutionModeDepthReplacing = 12, + ExecutionModeDepthGreater = 14, + ExecutionModeDepthLess = 15, + ExecutionModeDepthUnchanged = 16, + ExecutionModeLocalSize = 17, + ExecutionModeLocalSizeHint = 18, + ExecutionModeInputPoints = 19, + ExecutionModeInputLines = 20, + ExecutionModeInputLinesAdjacency = 21, + ExecutionModeTriangles = 22, + ExecutionModeInputTrianglesAdjacency = 23, + ExecutionModeQuads = 24, + ExecutionModeIsolines = 25, + ExecutionModeOutputVertices = 26, + ExecutionModeOutputPoints = 27, + ExecutionModeOutputLineStrip = 28, + ExecutionModeOutputTriangleStrip = 29, + ExecutionModeVecTypeHint = 30, + ExecutionModeContractionOff = 31, + ExecutionModeInitializer = 33, + ExecutionModeFinalizer = 34, + ExecutionModeSubgroupSize = 35, + ExecutionModeSubgroupsPerWorkgroup = 36, + ExecutionModeSubgroupsPerWorkgroupId = 37, + ExecutionModeLocalSizeId = 38, + ExecutionModeLocalSizeHintId = 39, + ExecutionModePostDepthCoverage = 4446, + ExecutionModeDenormPreserve = 4459, + ExecutionModeDenormFlushToZero = 4460, + ExecutionModeSignedZeroInfNanPreserve = 4461, + ExecutionModeRoundingModeRTE = 4462, + ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesNV = 5270, + ExecutionModeDerivativeGroupQuadsNV = 5289, + ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesNV = 5298, + ExecutionModeMax = 0x7fffffff, +}; + +enum StorageClass { + StorageClassUniformConstant = 0, + StorageClassInput = 1, + StorageClassUniform = 2, + StorageClassOutput = 3, + StorageClassWorkgroup = 4, + StorageClassCrossWorkgroup = 5, + StorageClassPrivate = 6, + StorageClassFunction = 7, + StorageClassGeneric = 8, + StorageClassPushConstant = 9, + StorageClassAtomicCounter = 10, + StorageClassImage = 11, + StorageClassStorageBuffer = 12, + StorageClassCallableDataNV = 5328, + StorageClassIncomingCallableDataNV = 5329, + StorageClassRayPayloadNV = 5338, + StorageClassHitAttributeNV = 5339, + StorageClassIncomingRayPayloadNV = 5342, + StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassMax = 0x7fffffff, +}; + +enum Dim { + Dim1D = 0, + Dim2D = 1, + Dim3D = 2, + DimCube = 3, + DimRect = 4, + DimBuffer = 5, + DimSubpassData = 6, + DimMax = 0x7fffffff, +}; + +enum SamplerAddressingMode { + SamplerAddressingModeNone = 0, + SamplerAddressingModeClampToEdge = 1, + SamplerAddressingModeClamp = 2, + SamplerAddressingModeRepeat = 3, + SamplerAddressingModeRepeatMirrored = 4, + SamplerAddressingModeMax = 0x7fffffff, +}; + +enum SamplerFilterMode { + SamplerFilterModeNearest = 0, + SamplerFilterModeLinear = 1, + SamplerFilterModeMax = 0x7fffffff, +}; + +enum ImageFormat { + ImageFormatUnknown = 0, + ImageFormatRgba32f = 1, + ImageFormatRgba16f = 2, + ImageFormatR32f = 3, + ImageFormatRgba8 = 4, + ImageFormatRgba8Snorm = 5, + ImageFormatRg32f = 6, + ImageFormatRg16f = 7, + ImageFormatR11fG11fB10f = 8, + ImageFormatR16f = 9, + ImageFormatRgba16 = 10, + ImageFormatRgb10A2 = 11, + ImageFormatRg16 = 12, + ImageFormatRg8 = 13, + ImageFormatR16 = 14, + ImageFormatR8 = 15, + ImageFormatRgba16Snorm = 16, + ImageFormatRg16Snorm = 17, + ImageFormatRg8Snorm = 18, + ImageFormatR16Snorm = 19, + ImageFormatR8Snorm = 20, + ImageFormatRgba32i = 21, + ImageFormatRgba16i = 22, + ImageFormatRgba8i = 23, + ImageFormatR32i = 24, + ImageFormatRg32i = 25, + ImageFormatRg16i = 26, + ImageFormatRg8i = 27, + ImageFormatR16i = 28, + ImageFormatR8i = 29, + ImageFormatRgba32ui = 30, + ImageFormatRgba16ui = 31, + ImageFormatRgba8ui = 32, + ImageFormatR32ui = 33, + ImageFormatRgb10a2ui = 34, + ImageFormatRg32ui = 35, + ImageFormatRg16ui = 36, + ImageFormatRg8ui = 37, + ImageFormatR16ui = 38, + ImageFormatR8ui = 39, + ImageFormatMax = 0x7fffffff, +}; + +enum ImageChannelOrder { + ImageChannelOrderR = 0, + ImageChannelOrderA = 1, + ImageChannelOrderRG = 2, + ImageChannelOrderRA = 3, + ImageChannelOrderRGB = 4, + ImageChannelOrderRGBA = 5, + ImageChannelOrderBGRA = 6, + ImageChannelOrderARGB = 7, + ImageChannelOrderIntensity = 8, + ImageChannelOrderLuminance = 9, + ImageChannelOrderRx = 10, + ImageChannelOrderRGx = 11, + ImageChannelOrderRGBx = 12, + ImageChannelOrderDepth = 13, + ImageChannelOrderDepthStencil = 14, + ImageChannelOrdersRGB = 15, + ImageChannelOrdersRGBx = 16, + ImageChannelOrdersRGBA = 17, + ImageChannelOrdersBGRA = 18, + ImageChannelOrderABGR = 19, + ImageChannelOrderMax = 0x7fffffff, +}; + +enum ImageChannelDataType { + ImageChannelDataTypeSnormInt8 = 0, + ImageChannelDataTypeSnormInt16 = 1, + ImageChannelDataTypeUnormInt8 = 2, + ImageChannelDataTypeUnormInt16 = 3, + ImageChannelDataTypeUnormShort565 = 4, + ImageChannelDataTypeUnormShort555 = 5, + ImageChannelDataTypeUnormInt101010 = 6, + ImageChannelDataTypeSignedInt8 = 7, + ImageChannelDataTypeSignedInt16 = 8, + ImageChannelDataTypeSignedInt32 = 9, + ImageChannelDataTypeUnsignedInt8 = 10, + ImageChannelDataTypeUnsignedInt16 = 11, + ImageChannelDataTypeUnsignedInt32 = 12, + ImageChannelDataTypeHalfFloat = 13, + ImageChannelDataTypeFloat = 14, + ImageChannelDataTypeUnormInt24 = 15, + ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeMax = 0x7fffffff, +}; + +enum ImageOperandsShift { + ImageOperandsBiasShift = 0, + ImageOperandsLodShift = 1, + ImageOperandsGradShift = 2, + ImageOperandsConstOffsetShift = 3, + ImageOperandsOffsetShift = 4, + ImageOperandsConstOffsetsShift = 5, + ImageOperandsSampleShift = 6, + ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelKHRShift = 11, + ImageOperandsMax = 0x7fffffff, +}; + +enum ImageOperandsMask { + ImageOperandsMaskNone = 0, + ImageOperandsBiasMask = 0x00000001, + ImageOperandsLodMask = 0x00000002, + ImageOperandsGradMask = 0x00000004, + ImageOperandsConstOffsetMask = 0x00000008, + ImageOperandsOffsetMask = 0x00000010, + ImageOperandsConstOffsetsMask = 0x00000020, + ImageOperandsSampleMask = 0x00000040, + ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelKHRMask = 0x00000800, +}; + +enum FPFastMathModeShift { + FPFastMathModeNotNaNShift = 0, + FPFastMathModeNotInfShift = 1, + FPFastMathModeNSZShift = 2, + FPFastMathModeAllowRecipShift = 3, + FPFastMathModeFastShift = 4, + FPFastMathModeMax = 0x7fffffff, +}; + +enum FPFastMathModeMask { + FPFastMathModeMaskNone = 0, + FPFastMathModeNotNaNMask = 0x00000001, + FPFastMathModeNotInfMask = 0x00000002, + FPFastMathModeNSZMask = 0x00000004, + FPFastMathModeAllowRecipMask = 0x00000008, + FPFastMathModeFastMask = 0x00000010, +}; + +enum FPRoundingMode { + FPRoundingModeRTE = 0, + FPRoundingModeRTZ = 1, + FPRoundingModeRTP = 2, + FPRoundingModeRTN = 3, + FPRoundingModeMax = 0x7fffffff, +}; + +enum LinkageType { + LinkageTypeExport = 0, + LinkageTypeImport = 1, + LinkageTypeMax = 0x7fffffff, +}; + +enum AccessQualifier { + AccessQualifierReadOnly = 0, + AccessQualifierWriteOnly = 1, + AccessQualifierReadWrite = 2, + AccessQualifierMax = 0x7fffffff, +}; + +enum FunctionParameterAttribute { + FunctionParameterAttributeZext = 0, + FunctionParameterAttributeSext = 1, + FunctionParameterAttributeByVal = 2, + FunctionParameterAttributeSret = 3, + FunctionParameterAttributeNoAlias = 4, + FunctionParameterAttributeNoCapture = 5, + FunctionParameterAttributeNoWrite = 6, + FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeMax = 0x7fffffff, +}; + +enum Decoration { + DecorationRelaxedPrecision = 0, + DecorationSpecId = 1, + DecorationBlock = 2, + DecorationBufferBlock = 3, + DecorationRowMajor = 4, + DecorationColMajor = 5, + DecorationArrayStride = 6, + DecorationMatrixStride = 7, + DecorationGLSLShared = 8, + DecorationGLSLPacked = 9, + DecorationCPacked = 10, + DecorationBuiltIn = 11, + DecorationNoPerspective = 13, + DecorationFlat = 14, + DecorationPatch = 15, + DecorationCentroid = 16, + DecorationSample = 17, + DecorationInvariant = 18, + DecorationRestrict = 19, + DecorationAliased = 20, + DecorationVolatile = 21, + DecorationConstant = 22, + DecorationCoherent = 23, + DecorationNonWritable = 24, + DecorationNonReadable = 25, + DecorationUniform = 26, + DecorationSaturatedConversion = 28, + DecorationStream = 29, + DecorationLocation = 30, + DecorationComponent = 31, + DecorationIndex = 32, + DecorationBinding = 33, + DecorationDescriptorSet = 34, + DecorationOffset = 35, + DecorationXfbBuffer = 36, + DecorationXfbStride = 37, + DecorationFuncParamAttr = 38, + DecorationFPRoundingMode = 39, + DecorationFPFastMathMode = 40, + DecorationLinkageAttributes = 41, + DecorationNoContraction = 42, + DecorationInputAttachmentIndex = 43, + DecorationAlignment = 44, + DecorationMaxByteOffset = 45, + DecorationAlignmentId = 46, + DecorationMaxByteOffsetId = 47, + DecorationNoSignedWrap = 4469, + DecorationNoUnsignedWrap = 4470, + DecorationExplicitInterpAMD = 4999, + DecorationOverrideCoverageNV = 5248, + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveNV = 5271, + DecorationPerViewNV = 5272, + DecorationPerTaskNV = 5273, + DecorationPerVertexNV = 5285, + DecorationNonUniformEXT = 5300, + DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointerEXT = 5356, + DecorationHlslCounterBufferGOOGLE = 5634, + DecorationHlslSemanticGOOGLE = 5635, + DecorationMax = 0x7fffffff, +}; + +enum BuiltIn { + BuiltInPosition = 0, + BuiltInPointSize = 1, + BuiltInClipDistance = 3, + BuiltInCullDistance = 4, + BuiltInVertexId = 5, + BuiltInInstanceId = 6, + BuiltInPrimitiveId = 7, + BuiltInInvocationId = 8, + BuiltInLayer = 9, + BuiltInViewportIndex = 10, + BuiltInTessLevelOuter = 11, + BuiltInTessLevelInner = 12, + BuiltInTessCoord = 13, + BuiltInPatchVertices = 14, + BuiltInFragCoord = 15, + BuiltInPointCoord = 16, + BuiltInFrontFacing = 17, + BuiltInSampleId = 18, + BuiltInSamplePosition = 19, + BuiltInSampleMask = 20, + BuiltInFragDepth = 22, + BuiltInHelperInvocation = 23, + BuiltInNumWorkgroups = 24, + BuiltInWorkgroupSize = 25, + BuiltInWorkgroupId = 26, + BuiltInLocalInvocationId = 27, + BuiltInGlobalInvocationId = 28, + BuiltInLocalInvocationIndex = 29, + BuiltInWorkDim = 30, + BuiltInGlobalSize = 31, + BuiltInEnqueuedWorkgroupSize = 32, + BuiltInGlobalOffset = 33, + BuiltInGlobalLinearId = 34, + BuiltInSubgroupSize = 36, + BuiltInSubgroupMaxSize = 37, + BuiltInNumSubgroups = 38, + BuiltInNumEnqueuedSubgroups = 39, + BuiltInSubgroupId = 40, + BuiltInSubgroupLocalInvocationId = 41, + BuiltInVertexIndex = 42, + BuiltInInstanceIndex = 43, + BuiltInSubgroupEqMask = 4416, + BuiltInSubgroupEqMaskKHR = 4416, + BuiltInSubgroupGeMask = 4417, + BuiltInSubgroupGeMaskKHR = 4417, + BuiltInSubgroupGtMask = 4418, + BuiltInSubgroupGtMaskKHR = 4418, + BuiltInSubgroupLeMask = 4419, + BuiltInSubgroupLeMaskKHR = 4419, + BuiltInSubgroupLtMask = 4420, + BuiltInSubgroupLtMaskKHR = 4420, + BuiltInBaseVertex = 4424, + BuiltInBaseInstance = 4425, + BuiltInDrawIndex = 4426, + BuiltInDeviceIndex = 4438, + BuiltInViewIndex = 4440, + BuiltInBaryCoordNoPerspAMD = 4992, + BuiltInBaryCoordNoPerspCentroidAMD = 4993, + BuiltInBaryCoordNoPerspSampleAMD = 4994, + BuiltInBaryCoordSmoothAMD = 4995, + BuiltInBaryCoordSmoothCentroidAMD = 4996, + BuiltInBaryCoordSmoothSampleAMD = 4997, + BuiltInBaryCoordPullModelAMD = 4998, + BuiltInFragStencilRefEXT = 5014, + BuiltInViewportMaskNV = 5253, + BuiltInSecondaryPositionNV = 5257, + BuiltInSecondaryViewportMaskNV = 5258, + BuiltInPositionPerViewNV = 5261, + BuiltInViewportMaskPerViewNV = 5262, + BuiltInFullyCoveredEXT = 5264, + BuiltInTaskCountNV = 5274, + BuiltInPrimitiveCountNV = 5275, + BuiltInPrimitiveIndicesNV = 5276, + BuiltInClipDistancePerViewNV = 5277, + BuiltInCullDistancePerViewNV = 5278, + BuiltInLayerPerViewNV = 5279, + BuiltInMeshViewCountNV = 5280, + BuiltInMeshViewIndicesNV = 5281, + BuiltInBaryCoordNV = 5286, + BuiltInBaryCoordNoPerspNV = 5287, + BuiltInFragSizeEXT = 5292, + BuiltInFragmentSizeNV = 5292, + BuiltInFragInvocationCountEXT = 5293, + BuiltInInvocationsPerPixelNV = 5293, + BuiltInLaunchIdNV = 5319, + BuiltInLaunchSizeNV = 5320, + BuiltInWorldRayOriginNV = 5321, + BuiltInWorldRayDirectionNV = 5322, + BuiltInObjectRayOriginNV = 5323, + BuiltInObjectRayDirectionNV = 5324, + BuiltInRayTminNV = 5325, + BuiltInRayTmaxNV = 5326, + BuiltInInstanceCustomIndexNV = 5327, + BuiltInObjectToWorldNV = 5330, + BuiltInWorldToObjectNV = 5331, + BuiltInHitTNV = 5332, + BuiltInHitKindNV = 5333, + BuiltInIncomingRayFlagsNV = 5351, + BuiltInMax = 0x7fffffff, +}; + +enum SelectionControlShift { + SelectionControlFlattenShift = 0, + SelectionControlDontFlattenShift = 1, + SelectionControlMax = 0x7fffffff, +}; + +enum SelectionControlMask { + SelectionControlMaskNone = 0, + SelectionControlFlattenMask = 0x00000001, + SelectionControlDontFlattenMask = 0x00000002, +}; + +enum LoopControlShift { + LoopControlUnrollShift = 0, + LoopControlDontUnrollShift = 1, + LoopControlDependencyInfiniteShift = 2, + LoopControlDependencyLengthShift = 3, + LoopControlMax = 0x7fffffff, +}; + +enum LoopControlMask { + LoopControlMaskNone = 0, + LoopControlUnrollMask = 0x00000001, + LoopControlDontUnrollMask = 0x00000002, + LoopControlDependencyInfiniteMask = 0x00000004, + LoopControlDependencyLengthMask = 0x00000008, +}; + +enum FunctionControlShift { + FunctionControlInlineShift = 0, + FunctionControlDontInlineShift = 1, + FunctionControlPureShift = 2, + FunctionControlConstShift = 3, + FunctionControlMax = 0x7fffffff, +}; + +enum FunctionControlMask { + FunctionControlMaskNone = 0, + FunctionControlInlineMask = 0x00000001, + FunctionControlDontInlineMask = 0x00000002, + FunctionControlPureMask = 0x00000004, + FunctionControlConstMask = 0x00000008, +}; + +enum MemorySemanticsShift { + MemorySemanticsAcquireShift = 1, + MemorySemanticsReleaseShift = 2, + MemorySemanticsAcquireReleaseShift = 3, + MemorySemanticsSequentiallyConsistentShift = 4, + MemorySemanticsUniformMemoryShift = 6, + MemorySemanticsSubgroupMemoryShift = 7, + MemorySemanticsWorkgroupMemoryShift = 8, + MemorySemanticsCrossWorkgroupMemoryShift = 9, + MemorySemanticsAtomicCounterMemoryShift = 10, + MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleKHRShift = 14, + MemorySemanticsMax = 0x7fffffff, +}; + +enum MemorySemanticsMask { + MemorySemanticsMaskNone = 0, + MemorySemanticsAcquireMask = 0x00000002, + MemorySemanticsReleaseMask = 0x00000004, + MemorySemanticsAcquireReleaseMask = 0x00000008, + MemorySemanticsSequentiallyConsistentMask = 0x00000010, + MemorySemanticsUniformMemoryMask = 0x00000040, + MemorySemanticsSubgroupMemoryMask = 0x00000080, + MemorySemanticsWorkgroupMemoryMask = 0x00000100, + MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + MemorySemanticsAtomicCounterMemoryMask = 0x00000400, + MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleKHRMask = 0x00004000, +}; + +enum MemoryAccessShift { + MemoryAccessVolatileShift = 0, + MemoryAccessAlignedShift = 1, + MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerKHRShift = 5, + MemoryAccessMax = 0x7fffffff, +}; + +enum MemoryAccessMask { + MemoryAccessMaskNone = 0, + MemoryAccessVolatileMask = 0x00000001, + MemoryAccessAlignedMask = 0x00000002, + MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerKHRMask = 0x00000020, +}; + +enum Scope { + ScopeCrossDevice = 0, + ScopeDevice = 1, + ScopeWorkgroup = 2, + ScopeSubgroup = 3, + ScopeInvocation = 4, + ScopeQueueFamilyKHR = 5, + ScopeMax = 0x7fffffff, +}; + +enum GroupOperation { + GroupOperationReduce = 0, + GroupOperationInclusiveScan = 1, + GroupOperationExclusiveScan = 2, + GroupOperationClusteredReduce = 3, + GroupOperationPartitionedReduceNV = 6, + GroupOperationPartitionedInclusiveScanNV = 7, + GroupOperationPartitionedExclusiveScanNV = 8, + GroupOperationMax = 0x7fffffff, +}; + +enum KernelEnqueueFlags { + KernelEnqueueFlagsNoWait = 0, + KernelEnqueueFlagsWaitKernel = 1, + KernelEnqueueFlagsWaitWorkGroup = 2, + KernelEnqueueFlagsMax = 0x7fffffff, +}; + +enum KernelProfilingInfoShift { + KernelProfilingInfoCmdExecTimeShift = 0, + KernelProfilingInfoMax = 0x7fffffff, +}; + +enum KernelProfilingInfoMask { + KernelProfilingInfoMaskNone = 0, + KernelProfilingInfoCmdExecTimeMask = 0x00000001, +}; + +enum Capability { + CapabilityMatrix = 0, + CapabilityShader = 1, + CapabilityGeometry = 2, + CapabilityTessellation = 3, + CapabilityAddresses = 4, + CapabilityLinkage = 5, + CapabilityKernel = 6, + CapabilityVector16 = 7, + CapabilityFloat16Buffer = 8, + CapabilityFloat16 = 9, + CapabilityFloat64 = 10, + CapabilityInt64 = 11, + CapabilityInt64Atomics = 12, + CapabilityImageBasic = 13, + CapabilityImageReadWrite = 14, + CapabilityImageMipmap = 15, + CapabilityPipes = 17, + CapabilityGroups = 18, + CapabilityDeviceEnqueue = 19, + CapabilityLiteralSampler = 20, + CapabilityAtomicStorage = 21, + CapabilityInt16 = 22, + CapabilityTessellationPointSize = 23, + CapabilityGeometryPointSize = 24, + CapabilityImageGatherExtended = 25, + CapabilityStorageImageMultisample = 27, + CapabilityUniformBufferArrayDynamicIndexing = 28, + CapabilitySampledImageArrayDynamicIndexing = 29, + CapabilityStorageBufferArrayDynamicIndexing = 30, + CapabilityStorageImageArrayDynamicIndexing = 31, + CapabilityClipDistance = 32, + CapabilityCullDistance = 33, + CapabilityImageCubeArray = 34, + CapabilitySampleRateShading = 35, + CapabilityImageRect = 36, + CapabilitySampledRect = 37, + CapabilityGenericPointer = 38, + CapabilityInt8 = 39, + CapabilityInputAttachment = 40, + CapabilitySparseResidency = 41, + CapabilityMinLod = 42, + CapabilitySampled1D = 43, + CapabilityImage1D = 44, + CapabilitySampledCubeArray = 45, + CapabilitySampledBuffer = 46, + CapabilityImageBuffer = 47, + CapabilityImageMSArray = 48, + CapabilityStorageImageExtendedFormats = 49, + CapabilityImageQuery = 50, + CapabilityDerivativeControl = 51, + CapabilityInterpolationFunction = 52, + CapabilityTransformFeedback = 53, + CapabilityGeometryStreams = 54, + CapabilityStorageImageReadWithoutFormat = 55, + CapabilityStorageImageWriteWithoutFormat = 56, + CapabilityMultiViewport = 57, + CapabilitySubgroupDispatch = 58, + CapabilityNamedBarrier = 59, + CapabilityPipeStorage = 60, + CapabilityGroupNonUniform = 61, + CapabilityGroupNonUniformVote = 62, + CapabilityGroupNonUniformArithmetic = 63, + CapabilityGroupNonUniformBallot = 64, + CapabilityGroupNonUniformShuffle = 65, + CapabilityGroupNonUniformShuffleRelative = 66, + CapabilityGroupNonUniformClustered = 67, + CapabilityGroupNonUniformQuad = 68, + CapabilitySubgroupBallotKHR = 4423, + CapabilityDrawParameters = 4427, + CapabilitySubgroupVoteKHR = 4431, + CapabilityStorageBuffer16BitAccess = 4433, + CapabilityStorageUniformBufferBlock16 = 4433, + CapabilityStorageUniform16 = 4434, + CapabilityUniformAndStorageBuffer16BitAccess = 4434, + CapabilityStoragePushConstant16 = 4435, + CapabilityStorageInputOutput16 = 4436, + CapabilityDeviceGroup = 4437, + CapabilityMultiView = 4439, + CapabilityVariablePointersStorageBuffer = 4441, + CapabilityVariablePointers = 4442, + CapabilityAtomicStorageOps = 4445, + CapabilitySampleMaskPostDepthCoverage = 4447, + CapabilityStorageBuffer8BitAccess = 4448, + CapabilityUniformAndStorageBuffer8BitAccess = 4449, + CapabilityStoragePushConstant8 = 4450, + CapabilityDenormPreserve = 4464, + CapabilityDenormFlushToZero = 4465, + CapabilitySignedZeroInfNanPreserve = 4466, + CapabilityRoundingModeRTE = 4467, + CapabilityRoundingModeRTZ = 4468, + CapabilityFloat16ImageAMD = 5008, + CapabilityImageGatherBiasLodAMD = 5009, + CapabilityFragmentMaskAMD = 5010, + CapabilityStencilExportEXT = 5013, + CapabilityImageReadWriteLodAMD = 5015, + CapabilitySampleMaskOverrideCoverageNV = 5249, + CapabilityGeometryShaderPassthroughNV = 5251, + CapabilityShaderViewportIndexLayerEXT = 5254, + CapabilityShaderViewportIndexLayerNV = 5254, + CapabilityShaderViewportMaskNV = 5255, + CapabilityShaderStereoViewNV = 5259, + CapabilityPerViewAttributesNV = 5260, + CapabilityFragmentFullyCoveredEXT = 5265, + CapabilityMeshShadingNV = 5266, + CapabilityImageFootprintNV = 5282, + CapabilityFragmentBarycentricNV = 5284, + CapabilityComputeDerivativeGroupQuadsNV = 5288, + CapabilityFragmentDensityEXT = 5291, + CapabilityShadingRateNV = 5291, + CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + CapabilityRayTracingNV = 5340, + CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddressesEXT = 5347, + CapabilityComputeDerivativeGroupLinearNV = 5350, + CapabilitySubgroupShuffleINTEL = 5568, + CapabilitySubgroupBufferBlockIOINTEL = 5569, + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityMax = 0x7fffffff, +}; + +enum Op { + OpNop = 0, + OpUndef = 1, + OpSourceContinued = 2, + OpSource = 3, + OpSourceExtension = 4, + OpName = 5, + OpMemberName = 6, + OpString = 7, + OpLine = 8, + OpExtension = 10, + OpExtInstImport = 11, + OpExtInst = 12, + OpMemoryModel = 14, + OpEntryPoint = 15, + OpExecutionMode = 16, + OpCapability = 17, + OpTypeVoid = 19, + OpTypeBool = 20, + OpTypeInt = 21, + OpTypeFloat = 22, + OpTypeVector = 23, + OpTypeMatrix = 24, + OpTypeImage = 25, + OpTypeSampler = 26, + OpTypeSampledImage = 27, + OpTypeArray = 28, + OpTypeRuntimeArray = 29, + OpTypeStruct = 30, + OpTypeOpaque = 31, + OpTypePointer = 32, + OpTypeFunction = 33, + OpTypeEvent = 34, + OpTypeDeviceEvent = 35, + OpTypeReserveId = 36, + OpTypeQueue = 37, + OpTypePipe = 38, + OpTypeForwardPointer = 39, + OpConstantTrue = 41, + OpConstantFalse = 42, + OpConstant = 43, + OpConstantComposite = 44, + OpConstantSampler = 45, + OpConstantNull = 46, + OpSpecConstantTrue = 48, + OpSpecConstantFalse = 49, + OpSpecConstant = 50, + OpSpecConstantComposite = 51, + OpSpecConstantOp = 52, + OpFunction = 54, + OpFunctionParameter = 55, + OpFunctionEnd = 56, + OpFunctionCall = 57, + OpVariable = 59, + OpImageTexelPointer = 60, + OpLoad = 61, + OpStore = 62, + OpCopyMemory = 63, + OpCopyMemorySized = 64, + OpAccessChain = 65, + OpInBoundsAccessChain = 66, + OpPtrAccessChain = 67, + OpArrayLength = 68, + OpGenericPtrMemSemantics = 69, + OpInBoundsPtrAccessChain = 70, + OpDecorate = 71, + OpMemberDecorate = 72, + OpDecorationGroup = 73, + OpGroupDecorate = 74, + OpGroupMemberDecorate = 75, + OpVectorExtractDynamic = 77, + OpVectorInsertDynamic = 78, + OpVectorShuffle = 79, + OpCompositeConstruct = 80, + OpCompositeExtract = 81, + OpCompositeInsert = 82, + OpCopyObject = 83, + OpTranspose = 84, + OpSampledImage = 86, + OpImageSampleImplicitLod = 87, + OpImageSampleExplicitLod = 88, + OpImageSampleDrefImplicitLod = 89, + OpImageSampleDrefExplicitLod = 90, + OpImageSampleProjImplicitLod = 91, + OpImageSampleProjExplicitLod = 92, + OpImageSampleProjDrefImplicitLod = 93, + OpImageSampleProjDrefExplicitLod = 94, + OpImageFetch = 95, + OpImageGather = 96, + OpImageDrefGather = 97, + OpImageRead = 98, + OpImageWrite = 99, + OpImage = 100, + OpImageQueryFormat = 101, + OpImageQueryOrder = 102, + OpImageQuerySizeLod = 103, + OpImageQuerySize = 104, + OpImageQueryLod = 105, + OpImageQueryLevels = 106, + OpImageQuerySamples = 107, + OpConvertFToU = 109, + OpConvertFToS = 110, + OpConvertSToF = 111, + OpConvertUToF = 112, + OpUConvert = 113, + OpSConvert = 114, + OpFConvert = 115, + OpQuantizeToF16 = 116, + OpConvertPtrToU = 117, + OpSatConvertSToU = 118, + OpSatConvertUToS = 119, + OpConvertUToPtr = 120, + OpPtrCastToGeneric = 121, + OpGenericCastToPtr = 122, + OpGenericCastToPtrExplicit = 123, + OpBitcast = 124, + OpSNegate = 126, + OpFNegate = 127, + OpIAdd = 128, + OpFAdd = 129, + OpISub = 130, + OpFSub = 131, + OpIMul = 132, + OpFMul = 133, + OpUDiv = 134, + OpSDiv = 135, + OpFDiv = 136, + OpUMod = 137, + OpSRem = 138, + OpSMod = 139, + OpFRem = 140, + OpFMod = 141, + OpVectorTimesScalar = 142, + OpMatrixTimesScalar = 143, + OpVectorTimesMatrix = 144, + OpMatrixTimesVector = 145, + OpMatrixTimesMatrix = 146, + OpOuterProduct = 147, + OpDot = 148, + OpIAddCarry = 149, + OpISubBorrow = 150, + OpUMulExtended = 151, + OpSMulExtended = 152, + OpAny = 154, + OpAll = 155, + OpIsNan = 156, + OpIsInf = 157, + OpIsFinite = 158, + OpIsNormal = 159, + OpSignBitSet = 160, + OpLessOrGreater = 161, + OpOrdered = 162, + OpUnordered = 163, + OpLogicalEqual = 164, + OpLogicalNotEqual = 165, + OpLogicalOr = 166, + OpLogicalAnd = 167, + OpLogicalNot = 168, + OpSelect = 169, + OpIEqual = 170, + OpINotEqual = 171, + OpUGreaterThan = 172, + OpSGreaterThan = 173, + OpUGreaterThanEqual = 174, + OpSGreaterThanEqual = 175, + OpULessThan = 176, + OpSLessThan = 177, + OpULessThanEqual = 178, + OpSLessThanEqual = 179, + OpFOrdEqual = 180, + OpFUnordEqual = 181, + OpFOrdNotEqual = 182, + OpFUnordNotEqual = 183, + OpFOrdLessThan = 184, + OpFUnordLessThan = 185, + OpFOrdGreaterThan = 186, + OpFUnordGreaterThan = 187, + OpFOrdLessThanEqual = 188, + OpFUnordLessThanEqual = 189, + OpFOrdGreaterThanEqual = 190, + OpFUnordGreaterThanEqual = 191, + OpShiftRightLogical = 194, + OpShiftRightArithmetic = 195, + OpShiftLeftLogical = 196, + OpBitwiseOr = 197, + OpBitwiseXor = 198, + OpBitwiseAnd = 199, + OpNot = 200, + OpBitFieldInsert = 201, + OpBitFieldSExtract = 202, + OpBitFieldUExtract = 203, + OpBitReverse = 204, + OpBitCount = 205, + OpDPdx = 207, + OpDPdy = 208, + OpFwidth = 209, + OpDPdxFine = 210, + OpDPdyFine = 211, + OpFwidthFine = 212, + OpDPdxCoarse = 213, + OpDPdyCoarse = 214, + OpFwidthCoarse = 215, + OpEmitVertex = 218, + OpEndPrimitive = 219, + OpEmitStreamVertex = 220, + OpEndStreamPrimitive = 221, + OpControlBarrier = 224, + OpMemoryBarrier = 225, + OpAtomicLoad = 227, + OpAtomicStore = 228, + OpAtomicExchange = 229, + OpAtomicCompareExchange = 230, + OpAtomicCompareExchangeWeak = 231, + OpAtomicIIncrement = 232, + OpAtomicIDecrement = 233, + OpAtomicIAdd = 234, + OpAtomicISub = 235, + OpAtomicSMin = 236, + OpAtomicUMin = 237, + OpAtomicSMax = 238, + OpAtomicUMax = 239, + OpAtomicAnd = 240, + OpAtomicOr = 241, + OpAtomicXor = 242, + OpPhi = 245, + OpLoopMerge = 246, + OpSelectionMerge = 247, + OpLabel = 248, + OpBranch = 249, + OpBranchConditional = 250, + OpSwitch = 251, + OpKill = 252, + OpReturn = 253, + OpReturnValue = 254, + OpUnreachable = 255, + OpLifetimeStart = 256, + OpLifetimeStop = 257, + OpGroupAsyncCopy = 259, + OpGroupWaitEvents = 260, + OpGroupAll = 261, + OpGroupAny = 262, + OpGroupBroadcast = 263, + OpGroupIAdd = 264, + OpGroupFAdd = 265, + OpGroupFMin = 266, + OpGroupUMin = 267, + OpGroupSMin = 268, + OpGroupFMax = 269, + OpGroupUMax = 270, + OpGroupSMax = 271, + OpReadPipe = 274, + OpWritePipe = 275, + OpReservedReadPipe = 276, + OpReservedWritePipe = 277, + OpReserveReadPipePackets = 278, + OpReserveWritePipePackets = 279, + OpCommitReadPipe = 280, + OpCommitWritePipe = 281, + OpIsValidReserveId = 282, + OpGetNumPipePackets = 283, + OpGetMaxPipePackets = 284, + OpGroupReserveReadPipePackets = 285, + OpGroupReserveWritePipePackets = 286, + OpGroupCommitReadPipe = 287, + OpGroupCommitWritePipe = 288, + OpEnqueueMarker = 291, + OpEnqueueKernel = 292, + OpGetKernelNDrangeSubGroupCount = 293, + OpGetKernelNDrangeMaxSubGroupSize = 294, + OpGetKernelWorkGroupSize = 295, + OpGetKernelPreferredWorkGroupSizeMultiple = 296, + OpRetainEvent = 297, + OpReleaseEvent = 298, + OpCreateUserEvent = 299, + OpIsValidEvent = 300, + OpSetUserEventStatus = 301, + OpCaptureEventProfilingInfo = 302, + OpGetDefaultQueue = 303, + OpBuildNDRange = 304, + OpImageSparseSampleImplicitLod = 305, + OpImageSparseSampleExplicitLod = 306, + OpImageSparseSampleDrefImplicitLod = 307, + OpImageSparseSampleDrefExplicitLod = 308, + OpImageSparseSampleProjImplicitLod = 309, + OpImageSparseSampleProjExplicitLod = 310, + OpImageSparseSampleProjDrefImplicitLod = 311, + OpImageSparseSampleProjDrefExplicitLod = 312, + OpImageSparseFetch = 313, + OpImageSparseGather = 314, + OpImageSparseDrefGather = 315, + OpImageSparseTexelsResident = 316, + OpNoLine = 317, + OpAtomicFlagTestAndSet = 318, + OpAtomicFlagClear = 319, + OpImageSparseRead = 320, + OpSizeOf = 321, + OpTypePipeStorage = 322, + OpConstantPipeStorage = 323, + OpCreatePipeFromPipeStorage = 324, + OpGetKernelLocalSizeForSubgroupCount = 325, + OpGetKernelMaxNumSubgroups = 326, + OpTypeNamedBarrier = 327, + OpNamedBarrierInitialize = 328, + OpMemoryNamedBarrier = 329, + OpModuleProcessed = 330, + OpExecutionModeId = 331, + OpDecorateId = 332, + OpGroupNonUniformElect = 333, + OpGroupNonUniformAll = 334, + OpGroupNonUniformAny = 335, + OpGroupNonUniformAllEqual = 336, + OpGroupNonUniformBroadcast = 337, + OpGroupNonUniformBroadcastFirst = 338, + OpGroupNonUniformBallot = 339, + OpGroupNonUniformInverseBallot = 340, + OpGroupNonUniformBallotBitExtract = 341, + OpGroupNonUniformBallotBitCount = 342, + OpGroupNonUniformBallotFindLSB = 343, + OpGroupNonUniformBallotFindMSB = 344, + OpGroupNonUniformShuffle = 345, + OpGroupNonUniformShuffleXor = 346, + OpGroupNonUniformShuffleUp = 347, + OpGroupNonUniformShuffleDown = 348, + OpGroupNonUniformIAdd = 349, + OpGroupNonUniformFAdd = 350, + OpGroupNonUniformIMul = 351, + OpGroupNonUniformFMul = 352, + OpGroupNonUniformSMin = 353, + OpGroupNonUniformUMin = 354, + OpGroupNonUniformFMin = 355, + OpGroupNonUniformSMax = 356, + OpGroupNonUniformUMax = 357, + OpGroupNonUniformFMax = 358, + OpGroupNonUniformBitwiseAnd = 359, + OpGroupNonUniformBitwiseOr = 360, + OpGroupNonUniformBitwiseXor = 361, + OpGroupNonUniformLogicalAnd = 362, + OpGroupNonUniformLogicalOr = 363, + OpGroupNonUniformLogicalXor = 364, + OpGroupNonUniformQuadBroadcast = 365, + OpGroupNonUniformQuadSwap = 366, + OpSubgroupBallotKHR = 4421, + OpSubgroupFirstInvocationKHR = 4422, + OpSubgroupAllKHR = 4428, + OpSubgroupAnyKHR = 4429, + OpSubgroupAllEqualKHR = 4430, + OpSubgroupReadInvocationKHR = 4432, + OpGroupIAddNonUniformAMD = 5000, + OpGroupFAddNonUniformAMD = 5001, + OpGroupFMinNonUniformAMD = 5002, + OpGroupUMinNonUniformAMD = 5003, + OpGroupSMinNonUniformAMD = 5004, + OpGroupFMaxNonUniformAMD = 5005, + OpGroupUMaxNonUniformAMD = 5006, + OpGroupSMaxNonUniformAMD = 5007, + OpFragmentMaskFetchAMD = 5011, + OpFragmentFetchAMD = 5012, + OpImageSampleFootprintNV = 5283, + OpGroupNonUniformPartitionNV = 5296, + OpWritePackedPrimitiveIndices4x8NV = 5299, + OpReportIntersectionNV = 5334, + OpIgnoreIntersectionNV = 5335, + OpTerminateRayNV = 5336, + OpTraceNV = 5337, + OpTypeAccelerationStructureNV = 5341, + OpExecuteCallableNV = 5344, + OpSubgroupShuffleINTEL = 5571, + OpSubgroupShuffleDownINTEL = 5572, + OpSubgroupShuffleUpINTEL = 5573, + OpSubgroupShuffleXorINTEL = 5574, + OpSubgroupBlockReadINTEL = 5575, + OpSubgroupBlockWriteINTEL = 5576, + OpSubgroupImageBlockReadINTEL = 5577, + OpSubgroupImageBlockWriteINTEL = 5578, + OpSubgroupImageMediaBlockReadINTEL = 5580, + OpSubgroupImageMediaBlockWriteINTEL = 5581, + OpDecorateStringGOOGLE = 5632, + OpMemberDecorateStringGOOGLE = 5633, + OpMax = 0x7fffffff, +}; + +// Overload operator| for mask bit combining + +inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } + +} // end namespace spv + +#endif // #ifndef spirv_HPP + diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp new file mode 100644 index 0000000..4ca9ef5 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.cpp @@ -0,0 +1,226 @@ +/* + * Copyright 2016-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cfg.hpp" +#include "spirv_cross.hpp" +#include <algorithm> +#include <assert.h> + +using namespace std; + +namespace spirv_cross +{ +CFG::CFG(Compiler &compiler_, const SPIRFunction &func_) + : compiler(compiler_) + , func(func_) +{ + build_post_order_visit_order(); + build_immediate_dominators(); +} + +uint32_t CFG::find_common_dominator(uint32_t a, uint32_t b) const +{ + while (a != b) + { + if (get_visit_order(a) < get_visit_order(b)) + a = get_immediate_dominator(a); + else + b = get_immediate_dominator(b); + } + return a; +} + +void CFG::build_immediate_dominators() +{ + // Traverse the post-order in reverse and build up the immediate dominator tree. + immediate_dominators.clear(); + immediate_dominators[func.entry_block] = func.entry_block; + + for (auto i = post_order.size(); i; i--) + { + uint32_t block = post_order[i - 1]; + auto &pred = preceding_edges[block]; + if (pred.empty()) // This is for the entry block, but we've already set up the dominators. + continue; + + for (auto &edge : pred) + { + if (immediate_dominators[block]) + { + assert(immediate_dominators[edge]); + immediate_dominators[block] = find_common_dominator(block, edge); + } + else + immediate_dominators[block] = edge; + } + } +} + +bool CFG::is_back_edge(uint32_t to) const +{ + // We have a back edge if the visit order is set with the temporary magic value 0. + // Crossing edges will have already been recorded with a visit order. + auto itr = visit_order.find(to); + assert(itr != end(visit_order)); + return itr->second.get() == 0; +} + +bool CFG::post_order_visit(uint32_t block_id) +{ + // If we have already branched to this block (back edge), stop recursion. + // If our branches are back-edges, we do not record them. + // We have to record crossing edges however. + if (visit_order[block_id].get() >= 0) + return !is_back_edge(block_id); + + // Block back-edges from recursively revisiting ourselves. + visit_order[block_id].get() = 0; + + // First visit our branch targets. + auto &block = compiler.get<SPIRBlock>(block_id); + switch (block.terminator) + { + case SPIRBlock::Direct: + if (post_order_visit(block.next_block)) + add_branch(block_id, block.next_block); + break; + + case SPIRBlock::Select: + if (post_order_visit(block.true_block)) + add_branch(block_id, block.true_block); + if (post_order_visit(block.false_block)) + add_branch(block_id, block.false_block); + break; + + case SPIRBlock::MultiSelect: + for (auto &target : block.cases) + { + if (post_order_visit(target.block)) + add_branch(block_id, target.block); + } + if (block.default_block && post_order_visit(block.default_block)) + add_branch(block_id, block.default_block); + break; + + default: + break; + } + + // If this is a loop header, add an implied branch to the merge target. + // This is needed to avoid annoying cases with do { ... } while(false) loops often generated by inliners. + // To the CFG, this is linear control flow, but we risk picking the do/while scope as our dominating block. + // This makes sure that if we are accessing a variable outside the do/while, we choose the loop header as dominator. + if (block.merge == SPIRBlock::MergeLoop) + add_branch(block_id, block.merge_block); + + // Then visit ourselves. Start counting at one, to let 0 be a magic value for testing back vs. crossing edges. + visit_order[block_id].get() = ++visit_count; + post_order.push_back(block_id); + return true; +} + +void CFG::build_post_order_visit_order() +{ + uint32_t block = func.entry_block; + visit_count = 0; + visit_order.clear(); + post_order.clear(); + post_order_visit(block); +} + +void CFG::add_branch(uint32_t from, uint32_t to) +{ + const auto add_unique = [](vector<uint32_t> &l, uint32_t value) { + auto itr = find(begin(l), end(l), value); + if (itr == end(l)) + l.push_back(value); + }; + add_unique(preceding_edges[to], from); + add_unique(succeeding_edges[from], to); +} + +DominatorBuilder::DominatorBuilder(const CFG &cfg_) + : cfg(cfg_) +{ +} + +void DominatorBuilder::add_block(uint32_t block) +{ + if (!cfg.get_immediate_dominator(block)) + { + // Unreachable block via the CFG, we will never emit this code anyways. + return; + } + + if (!dominator) + { + dominator = block; + return; + } + + if (block != dominator) + dominator = cfg.find_common_dominator(block, dominator); +} + +void DominatorBuilder::lift_continue_block_dominator() +{ + // It is possible for a continue block to be the dominator of a variable is only accessed inside the while block of a do-while loop. + // We cannot safely declare variables inside a continue block, so move any variable declared + // in a continue block to the entry block to simplify. + // It makes very little sense for a continue block to ever be a dominator, so fall back to the simplest + // solution. + + if (!dominator) + return; + + auto &block = cfg.get_compiler().get<SPIRBlock>(dominator); + auto post_order = cfg.get_visit_order(dominator); + + // If we are branching to a block with a higher post-order traversal index (continue blocks), we have a problem + // since we cannot create sensible GLSL code for this, fallback to entry block. + bool back_edge_dominator = false; + switch (block.terminator) + { + case SPIRBlock::Direct: + if (cfg.get_visit_order(block.next_block) > post_order) + back_edge_dominator = true; + break; + + case SPIRBlock::Select: + if (cfg.get_visit_order(block.true_block) > post_order) + back_edge_dominator = true; + if (cfg.get_visit_order(block.false_block) > post_order) + back_edge_dominator = true; + break; + + case SPIRBlock::MultiSelect: + for (auto &target : block.cases) + { + if (cfg.get_visit_order(target.block) > post_order) + back_edge_dominator = true; + } + if (block.default_block && cfg.get_visit_order(block.default_block) > post_order) + back_edge_dominator = true; + break; + + default: + break; + } + + if (back_edge_dominator) + dominator = cfg.get_function().entry_block; +} +} // namespace spirv_cross diff --git a/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp new file mode 100644 index 0000000..5e89320 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cfg.hpp @@ -0,0 +1,149 @@ +/* + * Copyright 2016-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_CFG_HPP +#define SPIRV_CROSS_CFG_HPP + +#include "spirv_common.hpp" +#include <assert.h> + +namespace spirv_cross +{ +class Compiler; +class CFG +{ +public: + CFG(Compiler &compiler, const SPIRFunction &function); + + Compiler &get_compiler() + { + return compiler; + } + + const Compiler &get_compiler() const + { + return compiler; + } + + const SPIRFunction &get_function() const + { + return func; + } + + uint32_t get_immediate_dominator(uint32_t block) const + { + auto itr = immediate_dominators.find(block); + if (itr != std::end(immediate_dominators)) + return itr->second; + else + return 0; + } + + uint32_t get_visit_order(uint32_t block) const + { + auto itr = visit_order.find(block); + assert(itr != std::end(visit_order)); + int v = itr->second.get(); + assert(v > 0); + return uint32_t(v); + } + + uint32_t find_common_dominator(uint32_t a, uint32_t b) const; + + const std::vector<uint32_t> &get_preceding_edges(uint32_t block) const + { + auto itr = preceding_edges.find(block); + if (itr != std::end(preceding_edges)) + return itr->second; + else + return empty_vector; + } + + const std::vector<uint32_t> &get_succeeding_edges(uint32_t block) const + { + auto itr = succeeding_edges.find(block); + if (itr != std::end(succeeding_edges)) + return itr->second; + else + return empty_vector; + } + + template <typename Op> + void walk_from(std::unordered_set<uint32_t> &seen_blocks, uint32_t block, const Op &op) const + { + if (seen_blocks.count(block)) + return; + seen_blocks.insert(block); + + op(block); + for (auto b : get_succeeding_edges(block)) + walk_from(seen_blocks, b, op); + } + +private: + struct VisitOrder + { + int &get() + { + return v; + } + + const int &get() const + { + return v; + } + + int v = -1; + }; + + Compiler &compiler; + const SPIRFunction &func; + std::unordered_map<uint32_t, std::vector<uint32_t>> preceding_edges; + std::unordered_map<uint32_t, std::vector<uint32_t>> succeeding_edges; + std::unordered_map<uint32_t, uint32_t> immediate_dominators; + std::unordered_map<uint32_t, VisitOrder> visit_order; + std::vector<uint32_t> post_order; + std::vector<uint32_t> empty_vector; + + void add_branch(uint32_t from, uint32_t to); + void build_post_order_visit_order(); + void build_immediate_dominators(); + bool post_order_visit(uint32_t block); + uint32_t visit_count = 0; + + bool is_back_edge(uint32_t to) const; +}; + +class DominatorBuilder +{ +public: + DominatorBuilder(const CFG &cfg); + + void add_block(uint32_t block); + uint32_t get_dominator() const + { + return dominator; + } + + void lift_continue_block_dominator(); + +private: + const CFG &cfg; + uint32_t dominator = 0; +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_common.hpp b/src/3rdparty/SPIRV-Cross/spirv_common.hpp new file mode 100644 index 0000000..dcd27af --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_common.hpp @@ -0,0 +1,1518 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_COMMON_HPP +#define SPIRV_CROSS_COMMON_HPP + +#include "spirv.hpp" + +#include <algorithm> +#include <cstdio> +#include <cstring> +#include <functional> +#include <memory> +#include <sstream> +#include <stack> +#include <stdexcept> +#include <stdint.h> +#include <string> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +namespace spirv_cross +{ + +#ifdef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#ifndef _MSC_VER +[[noreturn]] +#endif +inline void +report_and_abort(const std::string &msg) +{ +#ifdef NDEBUG + (void)msg; +#else + fprintf(stderr, "There was a compiler error: %s\n", msg.c_str()); +#endif + fflush(stderr); + abort(); +} + +#define SPIRV_CROSS_THROW(x) report_and_abort(x) +#else +class CompilerError : public std::runtime_error +{ +public: + explicit CompilerError(const std::string &str) + : std::runtime_error(str) + { + } +}; + +#define SPIRV_CROSS_THROW(x) throw CompilerError(x) +#endif + +//#define SPIRV_CROSS_COPY_CONSTRUCTOR_SANITIZE + +// MSVC 2013 does not have noexcept. We need this for Variant to get move constructor to work correctly +// instead of copy constructor. +// MSVC 2013 ignores that move constructors cannot throw in std::vector, so just don't define it. +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define SPIRV_CROSS_NOEXCEPT +#else +#define SPIRV_CROSS_NOEXCEPT noexcept +#endif + +#if __cplusplus >= 201402l +#define SPIRV_CROSS_DEPRECATED(reason) [[deprecated(reason)]] +#elif defined(__GNUC__) +#define SPIRV_CROSS_DEPRECATED(reason) __attribute__((deprecated)) +#elif defined(_MSC_VER) +#define SPIRV_CROSS_DEPRECATED(reason) __declspec(deprecated(reason)) +#else +#define SPIRV_CROSS_DEPRECATED(reason) +#endif + +namespace inner +{ +template <typename T> +void join_helper(std::ostringstream &stream, T &&t) +{ + stream << std::forward<T>(t); +} + +template <typename T, typename... Ts> +void join_helper(std::ostringstream &stream, T &&t, Ts &&... ts) +{ + stream << std::forward<T>(t); + join_helper(stream, std::forward<Ts>(ts)...); +} +} // namespace inner + +class Bitset +{ +public: + Bitset() = default; + explicit inline Bitset(uint64_t lower_) + : lower(lower_) + { + } + + inline bool get(uint32_t bit) const + { + if (bit < 64) + return (lower & (1ull << bit)) != 0; + else + return higher.count(bit) != 0; + } + + inline void set(uint32_t bit) + { + if (bit < 64) + lower |= 1ull << bit; + else + higher.insert(bit); + } + + inline void clear(uint32_t bit) + { + if (bit < 64) + lower &= ~(1ull << bit); + else + higher.erase(bit); + } + + inline uint64_t get_lower() const + { + return lower; + } + + inline void reset() + { + lower = 0; + higher.clear(); + } + + inline void merge_and(const Bitset &other) + { + lower &= other.lower; + std::unordered_set<uint32_t> tmp_set; + for (auto &v : higher) + if (other.higher.count(v) != 0) + tmp_set.insert(v); + higher = std::move(tmp_set); + } + + inline void merge_or(const Bitset &other) + { + lower |= other.lower; + for (auto &v : other.higher) + higher.insert(v); + } + + inline bool operator==(const Bitset &other) const + { + if (lower != other.lower) + return false; + + if (higher.size() != other.higher.size()) + return false; + + for (auto &v : higher) + if (other.higher.count(v) == 0) + return false; + + return true; + } + + inline bool operator!=(const Bitset &other) const + { + return !(*this == other); + } + + template <typename Op> + void for_each_bit(const Op &op) const + { + // TODO: Add ctz-based iteration. + for (uint32_t i = 0; i < 64; i++) + { + if (lower & (1ull << i)) + op(i); + } + + if (higher.empty()) + return; + + // Need to enforce an order here for reproducible results, + // but hitting this path should happen extremely rarely, so having this slow path is fine. + std::vector<uint32_t> bits; + bits.reserve(higher.size()); + for (auto &v : higher) + bits.push_back(v); + std::sort(std::begin(bits), std::end(bits)); + + for (auto &v : bits) + op(v); + } + + inline bool empty() const + { + return lower == 0 && higher.empty(); + } + +private: + // The most common bits to set are all lower than 64, + // so optimize for this case. Bits spilling outside 64 go into a slower data structure. + // In almost all cases, higher data structure will not be used. + uint64_t lower = 0; + std::unordered_set<uint32_t> higher; +}; + +// Helper template to avoid lots of nasty string temporary munging. +template <typename... Ts> +std::string join(Ts &&... ts) +{ + std::ostringstream stream; + inner::join_helper(stream, std::forward<Ts>(ts)...); + return stream.str(); +} + +inline std::string merge(const std::vector<std::string> &list) +{ + std::string s; + for (auto &elem : list) + { + s += elem; + if (&elem != &list.back()) + s += ", "; + } + return s; +} + +// Make sure we don't accidentally call this with float or doubles with SFINAE. +// Have to use the radix-aware overload. +template <typename T, typename std::enable_if<!std::is_floating_point<T>::value, int>::type = 0> +inline std::string convert_to_string(const T &t) +{ + return std::to_string(t); +} + +// Allow implementations to set a convenient standard precision +#ifndef SPIRV_CROSS_FLT_FMT +#define SPIRV_CROSS_FLT_FMT "%.32g" +#endif + +#ifdef _MSC_VER +// sprintf warning. +// We cannot rely on snprintf existing because, ..., MSVC. +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +static inline void fixup_radix_point(char *str, char radix_point) +{ + // Setting locales is a very risky business in multi-threaded program, + // so just fixup locales instead. We only need to care about the radix point. + if (radix_point != '.') + { + while (*str != '\0') + { + if (*str == radix_point) + *str = '.'; + str++; + } + } +} + +inline std::string convert_to_string(float t, char locale_radix_point) +{ + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, SPIRV_CROSS_FLT_FMT, t); + fixup_radix_point(buf, locale_radix_point); + + // Ensure that the literal is float. + if (!strchr(buf, '.') && !strchr(buf, 'e')) + strcat(buf, ".0"); + return buf; +} + +inline std::string convert_to_string(double t, char locale_radix_point) +{ + // std::to_string for floating point values is broken. + // Fallback to something more sane. + char buf[64]; + sprintf(buf, SPIRV_CROSS_FLT_FMT, t); + fixup_radix_point(buf, locale_radix_point); + + // Ensure that the literal is float. + if (!strchr(buf, '.') && !strchr(buf, 'e')) + strcat(buf, ".0"); + return buf; +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +struct Instruction +{ + uint16_t op = 0; + uint16_t count = 0; + uint32_t offset = 0; + uint32_t length = 0; +}; + +// Helper for Variant interface. +struct IVariant +{ + virtual ~IVariant() = default; + virtual std::unique_ptr<IVariant> clone() = 0; + + uint32_t self = 0; +}; + +#define SPIRV_CROSS_DECLARE_CLONE(T) \ + std::unique_ptr<IVariant> clone() override \ + { \ + return std::unique_ptr<IVariant>(new T(*this)); \ + } + +enum Types +{ + TypeNone, + TypeType, + TypeVariable, + TypeConstant, + TypeFunction, + TypeFunctionPrototype, + TypeBlock, + TypeExtension, + TypeExpression, + TypeConstantOp, + TypeCombinedImageSampler, + TypeAccessChain, + TypeUndef, + TypeCount +}; + +struct SPIRUndef : IVariant +{ + enum + { + type = TypeUndef + }; + + explicit SPIRUndef(uint32_t basetype_) + : basetype(basetype_) + { + } + uint32_t basetype; + + SPIRV_CROSS_DECLARE_CLONE(SPIRUndef) +}; + +// This type is only used by backends which need to access the combined image and sampler IDs separately after +// the OpSampledImage opcode. +struct SPIRCombinedImageSampler : IVariant +{ + enum + { + type = TypeCombinedImageSampler + }; + SPIRCombinedImageSampler(uint32_t type_, uint32_t image_, uint32_t sampler_) + : combined_type(type_) + , image(image_) + , sampler(sampler_) + { + } + uint32_t combined_type; + uint32_t image; + uint32_t sampler; + + SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler) +}; + +struct SPIRConstantOp : IVariant +{ + enum + { + type = TypeConstantOp + }; + + SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length) + : opcode(op) + , arguments(args, args + length) + , basetype(result_type) + { + } + + spv::Op opcode; + std::vector<uint32_t> arguments; + uint32_t basetype; + + SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp) +}; + +struct SPIRType : IVariant +{ + enum + { + type = TypeType + }; + + enum BaseType + { + Unknown, + Void, + Boolean, + SByte, + UByte, + Short, + UShort, + Int, + UInt, + Int64, + UInt64, + AtomicCounter, + Half, + Float, + Double, + Struct, + Image, + SampledImage, + Sampler, + AccelerationStructureNV, + + // Keep internal types at the end. + ControlPointArray, + Char + }; + + // Scalar/vector/matrix support. + BaseType basetype = Unknown; + uint32_t width = 0; + uint32_t vecsize = 1; + uint32_t columns = 1; + + // Arrays, support array of arrays by having a vector of array sizes. + std::vector<uint32_t> array; + + // Array elements can be either specialization constants or specialization ops. + // This array determines how to interpret the array size. + // If an element is true, the element is a literal, + // otherwise, it's an expression, which must be resolved on demand. + // The actual size is not really known until runtime. + std::vector<bool> array_size_literal; + + // Pointers + // Keep track of how many pointer layers we have. + uint32_t pointer_depth = 0; + bool pointer = false; + + spv::StorageClass storage = spv::StorageClassGeneric; + + std::vector<uint32_t> member_types; + + struct ImageType + { + uint32_t type; + spv::Dim dim; + bool depth; + bool arrayed; + bool ms; + uint32_t sampled; + spv::ImageFormat format; + spv::AccessQualifier access; + } image; + + // Structs can be declared multiple times if they are used as part of interface blocks. + // We want to detect this so that we only emit the struct definition once. + // Since we cannot rely on OpName to be equal, we need to figure out aliases. + uint32_t type_alias = 0; + + // Denotes the type which this type is based on. + // Allows the backend to traverse how a complex type is built up during access chains. + uint32_t parent_type = 0; + + // Used in backends to avoid emitting members with conflicting names. + std::unordered_set<std::string> member_name_cache; + + SPIRV_CROSS_DECLARE_CLONE(SPIRType) +}; + +struct SPIRExtension : IVariant +{ + enum + { + type = TypeExtension + }; + + enum Extension + { + Unsupported, + GLSL, + SPV_AMD_shader_ballot, + SPV_AMD_shader_explicit_vertex_parameter, + SPV_AMD_shader_trinary_minmax, + SPV_AMD_gcn_shader + }; + + explicit SPIRExtension(Extension ext_) + : ext(ext_) + { + } + + Extension ext; + SPIRV_CROSS_DECLARE_CLONE(SPIRExtension) +}; + +// SPIREntryPoint is not a variant since its IDs are used to decorate OpFunction, +// so in order to avoid conflicts, we can't stick them in the ids array. +struct SPIREntryPoint +{ + SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name) + : self(self_) + , name(entry_name) + , orig_name(entry_name) + , model(execution_model) + { + } + SPIREntryPoint() = default; + + uint32_t self = 0; + std::string name; + std::string orig_name; + std::vector<uint32_t> interface_variables; + + Bitset flags; + struct + { + uint32_t x = 0, y = 0, z = 0; + uint32_t constant = 0; // Workgroup size can be expressed as a constant/spec-constant instead. + } workgroup_size; + uint32_t invocations = 0; + uint32_t output_vertices = 0; + spv::ExecutionModel model = spv::ExecutionModelMax; +}; + +struct SPIRExpression : IVariant +{ + enum + { + type = TypeExpression + }; + + // Only created by the backend target to avoid creating tons of temporaries. + SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_) + : expression(move(expr)) + , expression_type(expression_type_) + , immutable(immutable_) + { + } + + // If non-zero, prepend expression with to_expression(base_expression). + // Used in amortizing multiple calls to to_expression() + // where in certain cases that would quickly force a temporary when not needed. + uint32_t base_expression = 0; + + std::string expression; + uint32_t expression_type = 0; + + // If this expression is a forwarded load, + // allow us to reference the original variable. + uint32_t loaded_from = 0; + + // If this expression will never change, we can avoid lots of temporaries + // in high level source. + // An expression being immutable can be speculative, + // it is assumed that this is true almost always. + bool immutable = false; + + // Before use, this expression must be transposed. + // This is needed for targets which don't support row_major layouts. + bool need_transpose = false; + + // Whether or not this is an access chain expression. + bool access_chain = false; + + // A list of expressions which this expression depends on. + std::vector<uint32_t> expression_dependencies; + + // By reading this expression, we implicitly read these expressions as well. + // Used by access chain Store and Load since we read multiple expressions in this case. + std::vector<uint32_t> implied_read_expressions; + + SPIRV_CROSS_DECLARE_CLONE(SPIRExpression) +}; + +struct SPIRFunctionPrototype : IVariant +{ + enum + { + type = TypeFunctionPrototype + }; + + explicit SPIRFunctionPrototype(uint32_t return_type_) + : return_type(return_type_) + { + } + + uint32_t return_type; + std::vector<uint32_t> parameter_types; + + SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype) +}; + +struct SPIRBlock : IVariant +{ + enum + { + type = TypeBlock + }; + + enum Terminator + { + Unknown, + Direct, // Emit next block directly without a particular condition. + + Select, // Block ends with an if/else block. + MultiSelect, // Block ends with switch statement. + + Return, // Block ends with return. + Unreachable, // Noop + Kill // Discard + }; + + enum Merge + { + MergeNone, + MergeLoop, + MergeSelection + }; + + enum Hints + { + HintNone, + HintUnroll, + HintDontUnroll, + HintFlatten, + HintDontFlatten + }; + + enum Method + { + MergeToSelectForLoop, + MergeToDirectForLoop, + MergeToSelectContinueForLoop + }; + + enum ContinueBlockType + { + ContinueNone, + + // Continue block is branchless and has at least one instruction. + ForLoop, + + // Noop continue block. + WhileLoop, + + // Continue block is conditional. + DoWhileLoop, + + // Highly unlikely that anything will use this, + // since it is really awkward/impossible to express in GLSL. + ComplexLoop + }; + + enum + { + NoDominator = 0xffffffffu + }; + + Terminator terminator = Unknown; + Merge merge = MergeNone; + Hints hint = HintNone; + uint32_t next_block = 0; + uint32_t merge_block = 0; + uint32_t continue_block = 0; + + uint32_t return_value = 0; // If 0, return nothing (void). + uint32_t condition = 0; + uint32_t true_block = 0; + uint32_t false_block = 0; + uint32_t default_block = 0; + + std::vector<Instruction> ops; + + struct Phi + { + uint32_t local_variable; // flush local variable ... + uint32_t parent; // If we're in from_block and want to branch into this block ... + uint32_t function_variable; // to this function-global "phi" variable first. + }; + + // Before entering this block flush out local variables to magical "phi" variables. + std::vector<Phi> phi_variables; + + // Declare these temporaries before beginning the block. + // Used for handling complex continue blocks which have side effects. + std::vector<std::pair<uint32_t, uint32_t>> declare_temporary; + + // Declare these temporaries, but only conditionally if this block turns out to be + // a complex loop header. + std::vector<std::pair<uint32_t, uint32_t>> potential_declare_temporary; + + struct Case + { + uint32_t value; + uint32_t block; + }; + std::vector<Case> cases; + + // If we have tried to optimize code for this block but failed, + // keep track of this. + bool disable_block_optimization = false; + + // If the continue block is complex, fallback to "dumb" for loops. + bool complex_continue = false; + + // Do we need a ladder variable to defer breaking out of a loop construct after a switch block? + bool need_ladder_break = false; + + // The dominating block which this block might be within. + // Used in continue; blocks to determine if we really need to write continue. + uint32_t loop_dominator = 0; + + // All access to these variables are dominated by this block, + // so before branching anywhere we need to make sure that we declare these variables. + std::vector<uint32_t> dominated_variables; + + // These are variables which should be declared in a for loop header, if we + // fail to use a classic for-loop, + // we remove these variables, and fall back to regular variables outside the loop. + std::vector<uint32_t> loop_variables; + + // Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or + // sub-group-like operations. + // Make sure that we only use these expressions in the original block. + std::vector<uint32_t> invalidate_expressions; + + SPIRV_CROSS_DECLARE_CLONE(SPIRBlock) +}; + +struct SPIRFunction : IVariant +{ + enum + { + type = TypeFunction + }; + + SPIRFunction(uint32_t return_type_, uint32_t function_type_) + : return_type(return_type_) + , function_type(function_type_) + { + } + + struct Parameter + { + uint32_t type; + uint32_t id; + uint32_t read_count; + uint32_t write_count; + + // Set to true if this parameter aliases a global variable, + // used mostly in Metal where global variables + // have to be passed down to functions as regular arguments. + // However, for this kind of variable, we should not care about + // read and write counts as access to the function arguments + // is not local to the function in question. + bool alias_global_variable; + }; + + // When calling a function, and we're remapping separate image samplers, + // resolve these arguments into combined image samplers and pass them + // as additional arguments in this order. + // It gets more complicated as functions can pull in their own globals + // and combine them with parameters, + // so we need to distinguish if something is local parameter index + // or a global ID. + struct CombinedImageSamplerParameter + { + uint32_t id; + uint32_t image_id; + uint32_t sampler_id; + bool global_image; + bool global_sampler; + bool depth; + }; + + uint32_t return_type; + uint32_t function_type; + std::vector<Parameter> arguments; + + // Can be used by backends to add magic arguments. + // Currently used by combined image/sampler implementation. + + std::vector<Parameter> shadow_arguments; + std::vector<uint32_t> local_variables; + uint32_t entry_block = 0; + std::vector<uint32_t> blocks; + std::vector<CombinedImageSamplerParameter> combined_parameters; + + void add_local_variable(uint32_t id) + { + local_variables.push_back(id); + } + + void add_parameter(uint32_t parameter_type, uint32_t id, bool alias_global_variable = false) + { + // Arguments are read-only until proven otherwise. + arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable }); + } + + // Hooks to be run when the function returns. + // Mostly used for lowering internal data structures onto flattened structures. + // Need to defer this, because they might rely on things which change during compilation. + std::vector<std::function<void()>> fixup_hooks_out; + + // Hooks to be run when the function begins. + // Mostly used for populating internal data structures from flattened structures. + // Need to defer this, because they might rely on things which change during compilation. + std::vector<std::function<void()>> fixup_hooks_in; + + // On function entry, make sure to copy a constant array into thread addr space to work around + // the case where we are passing a constant array by value to a function on backends which do not + // consider arrays value types. + std::vector<uint32_t> constant_arrays_needed_on_stack; + + bool active = false; + bool flush_undeclared = true; + bool do_combined_parameters = true; + + SPIRV_CROSS_DECLARE_CLONE(SPIRFunction) +}; + +struct SPIRAccessChain : IVariant +{ + enum + { + type = TypeAccessChain + }; + + SPIRAccessChain(uint32_t basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, + int32_t static_index_) + : basetype(basetype_) + , storage(storage_) + , base(std::move(base_)) + , dynamic_index(std::move(dynamic_index_)) + , static_index(static_index_) + { + } + + // The access chain represents an offset into a buffer. + // Some backends need more complicated handling of access chains to be able to use buffers, like HLSL + // which has no usable buffer type ala GLSL SSBOs. + // StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses. + + uint32_t basetype; + spv::StorageClass storage; + std::string base; + std::string dynamic_index; + int32_t static_index; + + uint32_t loaded_from = 0; + uint32_t matrix_stride = 0; + bool row_major_matrix = false; + bool immutable = false; + + // By reading this expression, we implicitly read these expressions as well. + // Used by access chain Store and Load since we read multiple expressions in this case. + std::vector<uint32_t> implied_read_expressions; + + SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain) +}; + +struct SPIRVariable : IVariant +{ + enum + { + type = TypeVariable + }; + + SPIRVariable() = default; + SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0, uint32_t basevariable_ = 0) + : basetype(basetype_) + , storage(storage_) + , initializer(initializer_) + , basevariable(basevariable_) + { + } + + uint32_t basetype = 0; + spv::StorageClass storage = spv::StorageClassGeneric; + uint32_t decoration = 0; + uint32_t initializer = 0; + uint32_t basevariable = 0; + + std::vector<uint32_t> dereference_chain; + bool compat_builtin = false; + + // If a variable is shadowed, we only statically assign to it + // and never actually emit a statement for it. + // When we read the variable as an expression, just forward + // shadowed_id as the expression. + bool statically_assigned = false; + uint32_t static_expression = 0; + + // Temporaries which can remain forwarded as long as this variable is not modified. + std::vector<uint32_t> dependees; + bool forwardable = true; + + bool deferred_declaration = false; + bool phi_variable = false; + + // Used to deal with Phi variable flushes. See flush_phi(). + bool allocate_temporary_copy = false; + + bool remapped_variable = false; + uint32_t remapped_components = 0; + + // The block which dominates all access to this variable. + uint32_t dominator = 0; + // If true, this variable is a loop variable, when accessing the variable + // outside a loop, + // we should statically forward it. + bool loop_variable = false; + // Set to true while we're inside the for loop. + bool loop_variable_enable = false; + + SPIRFunction::Parameter *parameter = nullptr; + + SPIRV_CROSS_DECLARE_CLONE(SPIRVariable) +}; + +struct SPIRConstant : IVariant +{ + enum + { + type = TypeConstant + }; + + union Constant { + uint32_t u32; + int32_t i32; + float f32; + + uint64_t u64; + int64_t i64; + double f64; + }; + + struct ConstantVector + { + Constant r[4]; + // If != 0, this element is a specialization constant, and we should keep track of it as such. + uint32_t id[4]; + uint32_t vecsize = 1; + + // Workaround for MSVC 2013, initializing an array breaks. + ConstantVector() + { + memset(r, 0, sizeof(r)); + for (unsigned i = 0; i < 4; i++) + id[i] = 0; + } + }; + + struct ConstantMatrix + { + ConstantVector c[4]; + // If != 0, this column is a specialization constant, and we should keep track of it as such. + uint32_t id[4]; + uint32_t columns = 1; + + // Workaround for MSVC 2013, initializing an array breaks. + ConstantMatrix() + { + for (unsigned i = 0; i < 4; i++) + id[i] = 0; + } + }; + + static inline float f16_to_f32(uint16_t u16_value) + { + // Based on the GLM implementation. + int s = (u16_value >> 15) & 0x1; + int e = (u16_value >> 10) & 0x1f; + int m = (u16_value >> 0) & 0x3ff; + + union { + float f32; + uint32_t u32; + } u; + + if (e == 0) + { + if (m == 0) + { + u.u32 = uint32_t(s) << 31; + return u.f32; + } + else + { + while ((m & 0x400) == 0) + { + m <<= 1; + e--; + } + + e++; + m &= ~0x400; + } + } + else if (e == 31) + { + if (m == 0) + { + u.u32 = (uint32_t(s) << 31) | 0x7f800000u; + return u.f32; + } + else + { + u.u32 = (uint32_t(s) << 31) | 0x7f800000u | (m << 13); + return u.f32; + } + } + + e += 127 - 15; + m <<= 13; + u.u32 = (uint32_t(s) << 31) | (e << 23) | m; + return u.f32; + } + + inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const + { + return m.c[col].id[row]; + } + + inline uint32_t specialization_constant_id(uint32_t col) const + { + return m.id[col]; + } + + inline uint32_t scalar(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].u32; + } + + inline int16_t scalar_i16(uint32_t col = 0, uint32_t row = 0) const + { + return int16_t(m.c[col].r[row].u32 & 0xffffu); + } + + inline uint16_t scalar_u16(uint32_t col = 0, uint32_t row = 0) const + { + return uint16_t(m.c[col].r[row].u32 & 0xffffu); + } + + inline int8_t scalar_i8(uint32_t col = 0, uint32_t row = 0) const + { + return int8_t(m.c[col].r[row].u32 & 0xffu); + } + + inline uint8_t scalar_u8(uint32_t col = 0, uint32_t row = 0) const + { + return uint8_t(m.c[col].r[row].u32 & 0xffu); + } + + inline float scalar_f16(uint32_t col = 0, uint32_t row = 0) const + { + return f16_to_f32(scalar_u16(col, row)); + } + + inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].f32; + } + + inline int32_t scalar_i32(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].i32; + } + + inline double scalar_f64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].f64; + } + + inline int64_t scalar_i64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].i64; + } + + inline uint64_t scalar_u64(uint32_t col = 0, uint32_t row = 0) const + { + return m.c[col].r[row].u64; + } + + inline const ConstantVector &vector() const + { + return m.c[0]; + } + + inline uint32_t vector_size() const + { + return m.c[0].vecsize; + } + + inline uint32_t columns() const + { + return m.columns; + } + + inline void make_null(const SPIRType &constant_type_) + { + m = {}; + m.columns = constant_type_.columns; + for (auto &c : m.c) + c.vecsize = constant_type_.vecsize; + } + + inline bool constant_is_null() const + { + if (specialization) + return false; + if (!subconstants.empty()) + return false; + + for (uint32_t col = 0; col < columns(); col++) + for (uint32_t row = 0; row < vector_size(); row++) + if (scalar_u64(col, row) != 0) + return false; + + return true; + } + + explicit SPIRConstant(uint32_t constant_type_) + : constant_type(constant_type_) + { + } + + SPIRConstant() = default; + + SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + subconstants.insert(end(subconstants), elements, elements + num_elements); + specialization = specialized; + } + + // Construct scalar (32-bit). + SPIRConstant(uint32_t constant_type_, uint32_t v0, bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + m.c[0].r[0].u32 = v0; + m.c[0].vecsize = 1; + m.columns = 1; + } + + // Construct scalar (64-bit). + SPIRConstant(uint32_t constant_type_, uint64_t v0, bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + m.c[0].r[0].u64 = v0; + m.c[0].vecsize = 1; + m.columns = 1; + } + + // Construct vectors and matrices. + SPIRConstant(uint32_t constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, + bool specialized) + : constant_type(constant_type_) + , specialization(specialized) + { + bool matrix = vector_elements[0]->m.c[0].vecsize > 1; + + if (matrix) + { + m.columns = num_elements; + + for (uint32_t i = 0; i < num_elements; i++) + { + m.c[i] = vector_elements[i]->m.c[0]; + if (vector_elements[i]->specialization) + m.id[i] = vector_elements[i]->self; + } + } + else + { + m.c[0].vecsize = num_elements; + m.columns = 1; + + for (uint32_t i = 0; i < num_elements; i++) + { + m.c[0].r[i] = vector_elements[i]->m.c[0].r[0]; + if (vector_elements[i]->specialization) + m.c[0].id[i] = vector_elements[i]->self; + } + } + } + + uint32_t constant_type = 0; + ConstantMatrix m; + + // If this constant is a specialization constant (i.e. created with OpSpecConstant*). + bool specialization = false; + // If this constant is used as an array length which creates specialization restrictions on some backends. + bool is_used_as_array_length = false; + + // If true, this is a LUT, and should always be declared in the outer scope. + bool is_used_as_lut = false; + + // For composites which are constant arrays, etc. + std::vector<uint32_t> subconstants; + + // Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant, + // and uses them to initialize the constant. This allows the user + // to still be able to specialize the value by supplying corresponding + // preprocessor directives before compiling the shader. + std::string specialization_constant_macro_name; + + SPIRV_CROSS_DECLARE_CLONE(SPIRConstant) +}; + +class Variant +{ +public: + // MSVC 2013 workaround, we shouldn't need these constructors. + Variant() = default; + + // Marking custom move constructor as noexcept is important. + Variant(Variant &&other) SPIRV_CROSS_NOEXCEPT + { + *this = std::move(other); + } + + Variant(const Variant &variant) + { + *this = variant; + } + + // Marking custom move constructor as noexcept is important. + Variant &operator=(Variant &&other) SPIRV_CROSS_NOEXCEPT + { + if (this != &other) + { + holder = std::move(other.holder); + type = other.type; + allow_type_rewrite = other.allow_type_rewrite; + other.type = TypeNone; + } + return *this; + } + + // This copy/clone should only be called in the Compiler constructor. + // If this is called inside ::compile(), we invalidate any references we took higher in the stack. + // This should never happen. + Variant &operator=(const Variant &other) + { +#ifdef SPIRV_CROSS_COPY_CONSTRUCTOR_SANITIZE + abort(); +#endif + if (this != &other) + { + holder.reset(); + if (other.holder) + holder = other.holder->clone(); + type = other.type; + allow_type_rewrite = other.allow_type_rewrite; + } + return *this; + } + + void set(std::unique_ptr<IVariant> val, Types new_type) + { + holder = std::move(val); + if (!allow_type_rewrite && type != TypeNone && type != new_type) + SPIRV_CROSS_THROW("Overwriting a variant with new type."); + type = new_type; + allow_type_rewrite = false; + } + + template <typename T> + T &get() + { + if (!holder) + SPIRV_CROSS_THROW("nullptr"); + if (static_cast<Types>(T::type) != type) + SPIRV_CROSS_THROW("Bad cast"); + return *static_cast<T *>(holder.get()); + } + + template <typename T> + const T &get() const + { + if (!holder) + SPIRV_CROSS_THROW("nullptr"); + if (static_cast<Types>(T::type) != type) + SPIRV_CROSS_THROW("Bad cast"); + return *static_cast<const T *>(holder.get()); + } + + Types get_type() const + { + return type; + } + + uint32_t get_id() const + { + return holder ? holder->self : 0; + } + + bool empty() const + { + return !holder; + } + + void reset() + { + holder.reset(); + type = TypeNone; + } + + void set_allow_type_rewrite() + { + allow_type_rewrite = true; + } + +private: + std::unique_ptr<IVariant> holder; + Types type = TypeNone; + bool allow_type_rewrite = false; +}; + +template <typename T> +T &variant_get(Variant &var) +{ + return var.get<T>(); +} + +template <typename T> +const T &variant_get(const Variant &var) +{ + return var.get<T>(); +} + +template <typename T, typename... P> +T &variant_set(Variant &var, P &&... args) +{ + auto uptr = std::unique_ptr<T>(new T(std::forward<P>(args)...)); + auto ptr = uptr.get(); + var.set(std::move(uptr), static_cast<Types>(T::type)); + return *ptr; +} + +struct AccessChainMeta +{ + uint32_t storage_packed_type = 0; + bool need_transpose = false; + bool storage_is_packed = false; + bool storage_is_invariant = false; +}; + +struct Meta +{ + struct Decoration + { + std::string alias; + std::string qualified_alias; + std::string hlsl_semantic; + Bitset decoration_flags; + spv::BuiltIn builtin_type = spv::BuiltInMax; + uint32_t location = 0; + uint32_t component = 0; + uint32_t set = 0; + uint32_t binding = 0; + uint32_t offset = 0; + uint32_t array_stride = 0; + uint32_t matrix_stride = 0; + uint32_t input_attachment = 0; + uint32_t spec_id = 0; + uint32_t index = 0; + spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax; + bool builtin = false; + + struct + { + uint32_t packed_type = 0; + bool packed = false; + uint32_t ib_member_index = ~(0u); + uint32_t ib_orig_id = 0; + uint32_t argument_buffer_id = ~(0u); + } extended; + }; + + Decoration decoration; + std::vector<Decoration> members; + + std::unordered_map<uint32_t, uint32_t> decoration_word_offset; + + // For SPV_GOOGLE_hlsl_functionality1. + bool hlsl_is_magic_counter_buffer = false; + // ID for the sibling counter buffer. + uint32_t hlsl_magic_counter_buffer = 0; +}; + +// A user callback that remaps the type of any variable. +// var_name is the declared name of the variable. +// name_of_type is the textual name of the type which will be used in the code unless written to by the callback. +using VariableTypeRemapCallback = + std::function<void(const SPIRType &type, const std::string &var_name, std::string &name_of_type)>; + +class Hasher +{ +public: + inline void u32(uint32_t value) + { + h = (h * 0x100000001b3ull) ^ value; + } + + inline uint64_t get() const + { + return h; + } + +private: + uint64_t h = 0xcbf29ce484222325ull; +}; + +static inline bool type_is_floating_point(const SPIRType &type) +{ + return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double; +} + +static inline bool type_is_integral(const SPIRType &type) +{ + return type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte || type.basetype == SPIRType::Short || + type.basetype == SPIRType::UShort || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || + type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64; +} + +static inline SPIRType::BaseType to_signed_basetype(uint32_t width) +{ + switch (width) + { + case 8: + return SPIRType::SByte; + case 16: + return SPIRType::Short; + case 32: + return SPIRType::Int; + case 64: + return SPIRType::Int64; + default: + SPIRV_CROSS_THROW("Invalid bit width."); + } +} + +static inline SPIRType::BaseType to_unsigned_basetype(uint32_t width) +{ + switch (width) + { + case 8: + return SPIRType::UByte; + case 16: + return SPIRType::UShort; + case 32: + return SPIRType::UInt; + case 64: + return SPIRType::UInt64; + default: + SPIRV_CROSS_THROW("Invalid bit width."); + } +} + +// Returns true if an arithmetic operation does not change behavior depending on signedness. +static inline bool opcode_is_sign_invariant(spv::Op opcode) +{ + switch (opcode) + { + case spv::OpIEqual: + case spv::OpINotEqual: + case spv::OpISub: + case spv::OpIAdd: + case spv::OpIMul: + case spv::OpShiftLeftLogical: + case spv::OpBitwiseOr: + case spv::OpBitwiseXor: + case spv::OpBitwiseAnd: + return true; + + default: + return false; + } +} +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp new file mode 100644 index 0000000..1b791ee --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cpp.cpp @@ -0,0 +1,547 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cpp.hpp" + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +void CompilerCPP::emit_buffer_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get<SPIRType>(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t descriptor_set = ir.meta[var.self].decoration.set; + uint32_t binding = ir.meta[var.self].decoration.binding; + + emit_block_struct(type); + auto buffer_name = to_name(type.self); + + statement("internal::Resource<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); + statement(""); +} + +void CompilerCPP::emit_interface_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get<SPIRType>(var.basetype); + + const char *qual = var.storage == StorageClassInput ? "StageInput" : "StageOutput"; + const char *lowerqual = var.storage == StorageClassInput ? "stage_input" : "stage_output"; + auto instance_name = to_name(var.self); + uint32_t location = ir.meta[var.self].decoration.location; + + string buffer_name; + auto flags = ir.meta[type.self].decoration.decoration_flags; + if (flags.get(DecorationBlock)) + { + emit_block_struct(type); + buffer_name = to_name(type.self); + } + else + buffer_name = type_to_glsl(type); + + statement("internal::", qual, "<", buffer_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back(join("s.register_", lowerqual, "(", instance_name, "__", ", ", location, ");")); + statement(""); +} + +void CompilerCPP::emit_shared(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto instance_name = to_name(var.self); + statement(CompilerGLSL::variable_decl(var), ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name); +} + +void CompilerCPP::emit_uniform(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get<SPIRType>(var.basetype); + auto instance_name = to_name(var.self); + + uint32_t descriptor_set = ir.meta[var.self].decoration.set; + uint32_t binding = ir.meta[var.self].decoration.binding; + uint32_t location = ir.meta[var.self].decoration.location; + + string type_name = type_to_glsl(type); + remap_variable_type_name(type, instance_name, type_name); + + if (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::AtomicCounter) + { + statement("internal::Resource<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_resource(", instance_name, "__", ", ", descriptor_set, ", ", binding, ");")); + } + else + { + statement("internal::UniformConstant<", type_name, type_to_array_glsl(type), "> ", instance_name, "__;"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, "__.get()"); + resource_registrations.push_back( + join("s.register_uniform_constant(", instance_name, "__", ", ", location, ");")); + } + + statement(""); +} + +void CompilerCPP::emit_push_constant_block(const SPIRVariable &var) +{ + add_resource_name(var.self); + + auto &type = get<SPIRType>(var.basetype); + auto &flags = ir.meta[var.self].decoration.decoration_flags; + if (flags.get(DecorationBinding) || flags.get(DecorationDescriptorSet)) + SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); + + emit_block_struct(type); + auto buffer_name = to_name(type.self); + auto instance_name = to_name(var.self); + + statement("internal::PushConstant<", buffer_name, type_to_array_glsl(type), "> ", instance_name, ";"); + statement_no_indent("#define ", instance_name, " __res->", instance_name, ".get()"); + resource_registrations.push_back(join("s.register_push_constant(", instance_name, "__", ");")); + statement(""); +} + +void CompilerCPP::emit_block_struct(SPIRType &type) +{ + // C++ can't do interface blocks, so we fake it by emitting a separate struct. + // However, these structs are not allowed to alias anything, so remove it before + // emitting the struct. + // + // The type we have here needs to be resolved to the non-pointer type so we can remove aliases. + auto &self = get<SPIRType>(type.self); + self.type_alias = 0; + emit_struct(self); +} + +void CompilerCPP::emit_resources() +{ + for (auto &id : ir.ids) + { + if (id.get_type() == TypeConstant) + { + auto &c = id.get<SPIRConstant>(); + + bool needs_declaration = c.specialization || c.is_used_as_lut; + + if (needs_declaration) + { + if (!options.vulkan_semantics && c.specialization) + { + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + } + emit_constant(c); + } + } + else if (id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get<SPIRConstantOp>()); + } + } + + // Output all basic struct types which are not Block or BufferBlock as these are declared inplace + // when such variables are instantiated. + for (auto &id : ir.ids) + { + if (id.get_type() == TypeType) + { + auto &type = id.get<SPIRType>(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && + !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + emit_struct(type); + } + } + } + + statement("struct Resources : ", resource_type); + begin_scope(); + + // Output UBOs and SSBOs + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get<SPIRVariable>(); + auto &type = get<SPIRType>(var.basetype); + + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform && + !is_hidden_variable(var) && + (ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + emit_buffer_block(var); + } + } + } + + // Output push constant blocks + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get<SPIRVariable>(); + auto &type = get<SPIRType>(var.basetype); + if (!is_hidden_variable(var) && var.storage != StorageClassFunction && type.pointer && + type.storage == StorageClassPushConstant) + { + emit_push_constant_block(var); + } + } + } + + // Output in/out interfaces. + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get<SPIRVariable>(); + auto &type = get<SPIRType>(var.basetype); + + if (var.storage != StorageClassFunction && !is_hidden_variable(var) && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self)) + { + emit_interface_block(var); + } + } + } + + // Output Uniform Constants (values, samplers, images, etc). + for (auto &id : ir.ids) + { + if (id.get_type() == TypeVariable) + { + auto &var = id.get<SPIRVariable>(); + auto &type = get<SPIRType>(var.basetype); + + if (var.storage != StorageClassFunction && !is_hidden_variable(var) && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + { + emit_uniform(var); + } + } + } + + // Global variables. + bool emitted = false; + for (auto global : global_variables) + { + auto &var = get<SPIRVariable>(global); + if (var.storage == StorageClassWorkgroup) + { + emit_shared(var); + emitted = true; + } + } + + if (emitted) + statement(""); + + declare_undefined_values(); + + statement("inline void init(spirv_cross_shader& s)"); + begin_scope(); + statement(resource_type, "::init(s);"); + for (auto ® : resource_registrations) + statement(reg); + end_scope(); + resource_registrations.clear(); + + end_scope_decl(); + + statement(""); + statement("Resources* __res;"); + if (get_entry_point().model == ExecutionModelGLCompute) + statement("ComputePrivateResources __priv_res;"); + statement(""); + + // Emit regular globals which are allocated per invocation. + emitted = false; + for (auto global : global_variables) + { + auto &var = get<SPIRVariable>(global); + if (var.storage == StorageClassPrivate) + { + if (var.storage == StorageClassWorkgroup) + emit_shared(var); + else + statement(CompilerGLSL::variable_decl(var), ";"); + emitted = true; + } + } + + if (emitted) + statement(""); +} + +string CompilerCPP::compile() +{ + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 450; + backend.float_literal_suffix = true; + backend.double_literal_suffix = false; + backend.long_long_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.basic_int_type = "int32_t"; + backend.basic_uint_type = "uint32_t"; + backend.swizzle_is_function = true; + backend.shared_is_implied = true; + backend.flexible_member_array_supported = false; + backend.explicit_struct_type = true; + backend.use_initializer_list = true; + + build_function_control_flow_graphs_and_analyze(); + update_active_builtins(); + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + resource_registrations.clear(); + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr<ostringstream>(new ostringstream()); + + emit_header(); + emit_resources(); + + emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset()); + + pass_count++; + } while (force_recompile); + + // Match opening scope of emit_header(). + end_scope_decl(); + // namespace + end_scope(); + + // Emit C entry points + emit_c_linkage(); + + // Entry point in CPP is always main() for the time being. + get_entry_point().name = "main"; + + return buffer->str(); +} + +void CompilerCPP::emit_c_linkage() +{ + statement(""); + + statement("spirv_cross_shader_t *spirv_cross_construct(void)"); + begin_scope(); + statement("return new ", impl_type, "();"); + end_scope(); + + statement(""); + statement("void spirv_cross_destruct(spirv_cross_shader_t *shader)"); + begin_scope(); + statement("delete static_cast<", impl_type, "*>(shader);"); + end_scope(); + + statement(""); + statement("void spirv_cross_invoke(spirv_cross_shader_t *shader)"); + begin_scope(); + statement("static_cast<", impl_type, "*>(shader)->invoke();"); + end_scope(); + + statement(""); + statement("static const struct spirv_cross_interface vtable ="); + begin_scope(); + statement("spirv_cross_construct,"); + statement("spirv_cross_destruct,"); + statement("spirv_cross_invoke,"); + end_scope_decl(); + + statement(""); + statement("const struct spirv_cross_interface *", + interface_name.empty() ? string("spirv_cross_get_interface") : interface_name, "(void)"); + begin_scope(); + statement("return &vtable;"); + end_scope(); +} + +void CompilerCPP::emit_function_prototype(SPIRFunction &func, const Bitset &) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + local_variable_names = resource_names; + string decl; + + auto &type = get<SPIRType>(func.return_type); + decl += "inline "; + decl += type_to_glsl(type); + decl += " "; + + if (func.self == ir.default_entry_point) + { + decl += "main"; + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + for (auto &arg : func.arguments) + { + add_local_variable_name(arg.id); + + decl += argument_decl(arg); + if (&arg != &func.arguments.back()) + decl += ", "; + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + var->parameter = &arg; + } + + decl += ")"; + statement(decl); +} + +string CompilerCPP::argument_decl(const SPIRFunction::Parameter &arg) +{ + auto &type = expression_type(arg.id); + bool constref = !type.pointer || arg.write_count == 0; + + auto &var = get<SPIRVariable>(arg.id); + + string base = type_to_glsl(type); + string variable_name = to_name(var.self); + remap_variable_type_name(type, variable_name, base); + + for (uint32_t i = 0; i < type.array.size(); i++) + base = join("std::array<", base, ", ", to_array_size(type, i), ">"); + + return join(constref ? "const " : "", base, " &", variable_name); +} + +string CompilerCPP::variable_decl(const SPIRType &type, const string &name, uint32_t /* id */) +{ + string base = type_to_glsl(type); + remap_variable_type_name(type, name, base); + bool runtime = false; + + for (uint32_t i = 0; i < type.array.size(); i++) + { + auto &array = type.array[i]; + if (!array && type.array_size_literal[i]) + { + // Avoid using runtime arrays with std::array since this is undefined. + // Runtime arrays cannot be passed around as values, so this is fine. + runtime = true; + } + else + base = join("std::array<", base, ", ", to_array_size(type, i), ">"); + } + base += ' '; + return base + name + (runtime ? "[1]" : ""); +} + +void CompilerCPP::emit_header() +{ + auto &execution = get_entry_point(); + + statement("// This C++ shader is autogenerated by spirv-cross."); + statement("#include \"spirv_cross/internal_interface.hpp\""); + statement("#include \"spirv_cross/external_interface.h\""); + // Needed to properly implement GLSL-style arrays. + statement("#include <array>"); + statement("#include <stdint.h>"); + statement(""); + statement("using namespace spirv_cross;"); + statement("using namespace glm;"); + statement(""); + + statement("namespace Impl"); + begin_scope(); + + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelGLCompute: + case ExecutionModelFragment: + case ExecutionModelVertex: + statement("struct Shader"); + begin_scope(); + break; + + default: + SPIRV_CROSS_THROW("Unsupported execution model."); + } + + switch (execution.model) + { + case ExecutionModelGeometry: + impl_type = "GeometryShader<Impl::Shader, Impl::Shader::Resources>"; + resource_type = "GeometryResources"; + break; + + case ExecutionModelVertex: + impl_type = "VertexShader<Impl::Shader, Impl::Shader::Resources>"; + resource_type = "VertexResources"; + break; + + case ExecutionModelFragment: + impl_type = "FragmentShader<Impl::Shader, Impl::Shader::Resources>"; + resource_type = "FragmentResources"; + break; + + case ExecutionModelGLCompute: + impl_type = join("ComputeShader<Impl::Shader, Impl::Shader::Resources, ", execution.workgroup_size.x, ", ", + execution.workgroup_size.y, ", ", execution.workgroup_size.z, ">"); + resource_type = "ComputeResources"; + break; + + case ExecutionModelTessellationControl: + impl_type = "TessControlShader<Impl::Shader, Impl::Shader::Resources>"; + resource_type = "TessControlResources"; + break; + + case ExecutionModelTessellationEvaluation: + impl_type = "TessEvaluationShader<Impl::Shader, Impl::Shader::Resources>"; + resource_type = "TessEvaluationResources"; + break; + + default: + SPIRV_CROSS_THROW("Unsupported execution model."); + } +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_cpp.hpp b/src/3rdparty/SPIRV-Cross/spirv_cpp.hpp new file mode 100644 index 0000000..bcdb669 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cpp.hpp @@ -0,0 +1,87 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_CPP_HPP +#define SPIRV_CROSS_CPP_HPP + +#include "spirv_glsl.hpp" +#include <utility> +#include <vector> + +namespace spirv_cross +{ +class CompilerCPP : public CompilerGLSL +{ +public: + explicit CompilerCPP(std::vector<uint32_t> spirv_) + : CompilerGLSL(move(spirv_)) + { + } + + CompilerCPP(const uint32_t *ir_, size_t word_count) + : CompilerGLSL(ir_, word_count) + { + } + + explicit CompilerCPP(const ParsedIR &ir_) + : CompilerGLSL(ir_) + { + } + + explicit CompilerCPP(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) + { + } + + std::string compile() override; + + // Sets a custom symbol name that can override + // spirv_cross_get_interface. + // + // Useful when several shader interfaces are linked + // statically into the same binary. + void set_interface_name(std::string name) + { + interface_name = std::move(name); + } + +private: + void emit_header() override; + void emit_c_linkage(); + void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; + + void emit_resources(); + void emit_buffer_block(const SPIRVariable &type) override; + void emit_push_constant_block(const SPIRVariable &var) override; + void emit_interface_block(const SPIRVariable &type); + void emit_block_chain(SPIRBlock &block); + void emit_uniform(const SPIRVariable &var) override; + void emit_shared(const SPIRVariable &var); + void emit_block_struct(SPIRType &type); + std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id) override; + + std::string argument_decl(const SPIRFunction::Parameter &arg); + + std::vector<std::string> resource_registrations; + std::string impl_type; + std::string resource_type; + uint32_t shared_counter = 0; + + std::string interface_name; +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp new file mode 100644 index 0000000..7ca2fe1 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp @@ -0,0 +1,4121 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross.hpp" +#include "GLSL.std.450.h" +#include "spirv_cfg.hpp" +#include "spirv_parser.hpp" +#include <algorithm> +#include <cstring> +#include <utility> + +using namespace std; +using namespace spv; +using namespace spirv_cross; + +Compiler::Compiler(vector<uint32_t> ir_) +{ + Parser parser(move(ir_)); + parser.parse(); + set_ir(move(parser.get_parsed_ir())); +} + +Compiler::Compiler(const uint32_t *ir_, size_t word_count) +{ + Parser parser(ir_, word_count); + parser.parse(); + set_ir(move(parser.get_parsed_ir())); +} + +Compiler::Compiler(const ParsedIR &ir_) +{ + set_ir(ir_); +} + +Compiler::Compiler(ParsedIR &&ir_) +{ + set_ir(move(ir_)); +} + +void Compiler::set_ir(ParsedIR &&ir_) +{ + ir = move(ir_); + parse_fixup(); +} + +void Compiler::set_ir(const ParsedIR &ir_) +{ + ir = ir_; + parse_fixup(); +} + +string Compiler::compile() +{ + return ""; +} + +bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) +{ + auto &type = get<SPIRType>(v.basetype); + bool ssbo = v.storage == StorageClassStorageBuffer || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + bool image = type.basetype == SPIRType::Image; + bool counter = type.basetype == SPIRType::AtomicCounter; + + bool is_restrict; + if (ssbo) + is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict); + else + is_restrict = has_decoration(v.self, DecorationRestrict); + + return !is_restrict && (ssbo || image || counter); +} + +bool Compiler::block_is_pure(const SPIRBlock &block) +{ + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast<Op>(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + if (!function_is_pure(get<SPIRFunction>(func))) + return false; + break; + } + + case OpCopyMemory: + case OpStore: + { + auto &type = expression_type(ops[0]); + if (type.storage != StorageClassFunction) + return false; + break; + } + + case OpImageWrite: + return false; + + // Atomics are impure. + case OpAtomicLoad: + case OpAtomicStore: + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + return false; + + // Geometry shader builtins modify global state. + case OpEndPrimitive: + case OpEmitStreamVertex: + case OpEndStreamPrimitive: + case OpEmitVertex: + return false; + + // Barriers disallow any reordering, so we should treat blocks with barrier as writing. + case OpControlBarrier: + case OpMemoryBarrier: + return false; + + // Ray tracing builtins are impure. + case OpReportIntersectionNV: + case OpIgnoreIntersectionNV: + case OpTerminateRayNV: + case OpTraceNV: + case OpExecuteCallableNV: + return false; + + // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. + + default: + break; + } + } + + return true; +} + +string Compiler::to_name(uint32_t id, bool allow_alias) const +{ + if (allow_alias && ir.ids[id].get_type() == TypeType) + { + // If this type is a simple alias, emit the + // name of the original type instead. + // We don't want to override the meta alias + // as that can be overridden by the reflection APIs after parse. + auto &type = get<SPIRType>(id); + if (type.type_alias) + { + // If the alias master has been specially packed, we will have emitted a clean variant as well, + // so skip the name aliasing here. + if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) + return to_name(type.type_alias); + } + } + + auto &alias = ir.get_name(id); + if (alias.empty()) + return join("_", id); + else + return alias; +} + +bool Compiler::function_is_pure(const SPIRFunction &func) +{ + for (auto block : func.blocks) + { + if (!block_is_pure(get<SPIRBlock>(block))) + { + //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str()); + return false; + } + } + + //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str()); + return true; +} + +void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id) +{ + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast<Op>(i.op); + + switch (op) + { + case OpFunctionCall: + { + uint32_t func = ops[2]; + register_global_read_dependencies(get<SPIRFunction>(func), id); + break; + } + + case OpLoad: + case OpImageRead: + { + // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. + auto *var = maybe_get_backing_variable(ops[2]); + if (var && var->storage != StorageClassFunction) + { + auto &type = get<SPIRType>(var->basetype); + + // InputTargets are immutable. + if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData) + var->dependees.push_back(id); + } + break; + } + + default: + break; + } + } +} + +void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id) +{ + for (auto block : func.blocks) + register_global_read_dependencies(get<SPIRBlock>(block), id); +} + +SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) +{ + auto *var = maybe_get<SPIRVariable>(chain); + if (!var) + { + auto *cexpr = maybe_get<SPIRExpression>(chain); + if (cexpr) + var = maybe_get<SPIRVariable>(cexpr->loaded_from); + + auto *access_chain = maybe_get<SPIRAccessChain>(chain); + if (access_chain) + var = maybe_get<SPIRVariable>(access_chain->loaded_from); + } + + return var; +} + +void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) +{ + auto &e = get<SPIRExpression>(expr); + auto *var = maybe_get_backing_variable(chain); + + if (var) + { + e.loaded_from = var->self; + + // If the backing variable is immutable, we do not need to depend on the variable. + if (forwarded && !is_immutable(var->self)) + var->dependees.push_back(e.self); + + // If we load from a parameter, make sure we create "inout" if we also write to the parameter. + // The default is "in" however, so we never invalidate our compilation by reading. + if (var && var->parameter) + var->parameter->read_count++; + } +} + +void Compiler::register_write(uint32_t chain) +{ + auto *var = maybe_get<SPIRVariable>(chain); + if (!var) + { + // If we're storing through an access chain, invalidate the backing variable instead. + auto *expr = maybe_get<SPIRExpression>(chain); + if (expr && expr->loaded_from) + var = maybe_get<SPIRVariable>(expr->loaded_from); + + auto *access_chain = maybe_get<SPIRAccessChain>(chain); + if (access_chain && access_chain->loaded_from) + var = maybe_get<SPIRVariable>(access_chain->loaded_from); + } + + if (var) + { + // If our variable is in a storage class which can alias with other buffers, + // invalidate all variables which depend on aliased variables. And if this is a + // variable pointer, then invalidate all variables regardless. + if (get_variable_data_type(*var).pointer) + flush_all_active_variables(); + if (variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + else if (var) + flush_dependees(*var); + + // We tried to write to a parameter which is not marked with out qualifier, force a recompile. + if (var->parameter && var->parameter->write_count == 0) + { + var->parameter->write_count++; + force_recompile = true; + } + } + else + { + // If we stored through a variable pointer, then we don't know which + // variable we stored to. So *all* expressions after this point need to + // be invalidated. + // FIXME: If we can prove that the variable pointer will point to + // only certain variables, we can invalidate only those. + flush_all_active_variables(); + } +} + +void Compiler::flush_dependees(SPIRVariable &var) +{ + for (auto expr : var.dependees) + invalid_expressions.insert(expr); + var.dependees.clear(); +} + +void Compiler::flush_all_aliased_variables() +{ + for (auto aliased : aliased_variables) + flush_dependees(get<SPIRVariable>(aliased)); +} + +void Compiler::flush_all_atomic_capable_variables() +{ + for (auto global : global_variables) + flush_dependees(get<SPIRVariable>(global)); + flush_all_aliased_variables(); +} + +void Compiler::flush_control_dependent_expressions(uint32_t block_id) +{ + auto &block = get<SPIRBlock>(block_id); + for (auto &expr : block.invalidate_expressions) + invalid_expressions.insert(expr); + block.invalidate_expressions.clear(); +} + +void Compiler::flush_all_active_variables() +{ + // Invalidate all temporaries we read from variables in this block since they were forwarded. + // Invalidate all temporaries we read from globals. + for (auto &v : current_function->local_variables) + flush_dependees(get<SPIRVariable>(v)); + for (auto &arg : current_function->arguments) + flush_dependees(get<SPIRVariable>(arg.id)); + for (auto global : global_variables) + flush_dependees(get<SPIRVariable>(global)); + + flush_all_aliased_variables(); +} + +uint32_t Compiler::expression_type_id(uint32_t id) const +{ + switch (ir.ids[id].get_type()) + { + case TypeVariable: + return get<SPIRVariable>(id).basetype; + + case TypeExpression: + return get<SPIRExpression>(id).expression_type; + + case TypeConstant: + return get<SPIRConstant>(id).constant_type; + + case TypeConstantOp: + return get<SPIRConstantOp>(id).basetype; + + case TypeUndef: + return get<SPIRUndef>(id).basetype; + + case TypeCombinedImageSampler: + return get<SPIRCombinedImageSampler>(id).combined_type; + + case TypeAccessChain: + return get<SPIRAccessChain>(id).basetype; + + default: + SPIRV_CROSS_THROW("Cannot resolve expression type."); + } +} + +const SPIRType &Compiler::expression_type(uint32_t id) const +{ + return get<SPIRType>(expression_type_id(id)); +} + +bool Compiler::expression_is_lvalue(uint32_t id) const +{ + auto &type = expression_type(id); + switch (type.basetype) + { + case SPIRType::SampledImage: + case SPIRType::Image: + case SPIRType::Sampler: + return false; + + default: + return true; + } +} + +bool Compiler::is_immutable(uint32_t id) const +{ + if (ir.ids[id].get_type() == TypeVariable) + { + auto &var = get<SPIRVariable>(id); + + // Anything we load from the UniformConstant address space is guaranteed to be immutable. + bool pointer_to_const = var.storage == StorageClassUniformConstant; + return pointer_to_const || var.phi_variable || !expression_is_lvalue(id); + } + else if (ir.ids[id].get_type() == TypeAccessChain) + return get<SPIRAccessChain>(id).immutable; + else if (ir.ids[id].get_type() == TypeExpression) + return get<SPIRExpression>(id).immutable; + else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp || + ir.ids[id].get_type() == TypeUndef) + return true; + else + return false; +} + +static inline bool storage_class_is_interface(spv::StorageClass storage) +{ + switch (storage) + { + case StorageClassInput: + case StorageClassOutput: + case StorageClassUniform: + case StorageClassUniformConstant: + case StorageClassAtomicCounter: + case StorageClassPushConstant: + case StorageClassStorageBuffer: + return true; + + default: + return false; + } +} + +bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const +{ + if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable) + return true; + + // Combined image samplers are always considered active as they are "magic" variables. + if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) { + return samp.combined_id == var.self; + }) != end(combined_image_samplers)) + { + return false; + } + + bool hidden = false; + if (check_active_interface_variables && storage_class_is_interface(var.storage)) + hidden = active_interface_variables.find(var.self) == end(active_interface_variables); + return hidden; +} + +bool Compiler::is_builtin_type(const SPIRType &type) const +{ + auto *type_meta = ir.find_meta(type.self); + + // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin. + if (type_meta) + for (auto &m : type_meta->members) + if (m.builtin) + return true; + + return false; +} + +bool Compiler::is_builtin_variable(const SPIRVariable &var) const +{ + auto *m = ir.find_meta(var.self); + + if (var.compat_builtin || (m && m->decoration.builtin)) + return true; + else + return is_builtin_type(get<SPIRType>(var.basetype)); +} + +bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const +{ + auto *type_meta = ir.find_meta(type.self); + + if (type_meta) + { + auto &memb = type_meta->members; + if (index < memb.size() && memb[index].builtin) + { + if (builtin) + *builtin = memb[index].builtin_type; + return true; + } + } + + return false; +} + +bool Compiler::is_scalar(const SPIRType &type) const +{ + return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1; +} + +bool Compiler::is_vector(const SPIRType &type) const +{ + return type.vecsize > 1 && type.columns == 1; +} + +bool Compiler::is_matrix(const SPIRType &type) const +{ + return type.vecsize > 1 && type.columns > 1; +} + +bool Compiler::is_array(const SPIRType &type) const +{ + return !type.array.empty(); +} + +ShaderResources Compiler::get_shader_resources() const +{ + return get_shader_resources(nullptr); +} + +ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> &active_variables) const +{ + return get_shader_resources(&active_variables); +} + +bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + uint32_t variable = 0; + switch (opcode) + { + // Need this first, otherwise, GCC complains about unhandled switch statements. + default: + break; + + case OpFunctionCall: + { + // Invalid SPIR-V. + if (length < 3) + return false; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + { + auto *var = compiler.maybe_get<SPIRVariable>(args[i]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[i]); + } + break; + } + + case OpSelect: + { + // Invalid SPIR-V. + if (length < 5) + return false; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + { + auto *var = compiler.maybe_get<SPIRVariable>(args[i]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[i]); + } + break; + } + + case OpPhi: + { + // Invalid SPIR-V. + if (length < 2) + return false; + + uint32_t count = length - 2; + args += 2; + for (uint32_t i = 0; i < count; i += 2) + { + auto *var = compiler.maybe_get<SPIRVariable>(args[i]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[i]); + } + break; + } + + case OpAtomicStore: + case OpStore: + // Invalid SPIR-V. + if (length < 1) + return false; + variable = args[0]; + break; + + case OpCopyMemory: + { + if (length < 2) + return false; + + auto *var = compiler.maybe_get<SPIRVariable>(args[0]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(variable); + + var = compiler.maybe_get<SPIRVariable>(args[1]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(variable); + break; + } + + case OpExtInst: + { + if (length < 5) + return false; + uint32_t extension_set = args[2]; + if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) + { + enum AMDShaderExplicitVertexParameter + { + InterpolateAtVertexAMD = 1 + }; + + auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]); + + switch (op) + { + case InterpolateAtVertexAMD: + { + auto *var = compiler.maybe_get<SPIRVariable>(args[4]); + if (var && storage_class_is_interface(var->storage)) + variables.insert(args[4]); + break; + } + + default: + break; + } + } + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + case OpLoad: + case OpCopyObject: + case OpImageTexelPointer: + case OpAtomicLoad: + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + // Invalid SPIR-V. + if (length < 3) + return false; + variable = args[2]; + break; + } + + if (variable) + { + auto *var = compiler.maybe_get<SPIRVariable>(variable); + if (var && storage_class_is_interface(var->storage)) + variables.insert(variable); + } + return true; +} + +unordered_set<uint32_t> Compiler::get_active_interface_variables() const +{ + // Traverse the call graph and find all interface variables which are in use. + unordered_set<uint32_t> variables; + InterfaceVariableAccessHandler handler(*this, variables); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + + // Make sure we preserve output variables which are only initialized, but never accessed by any code. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { + if (var.storage == StorageClassOutput && var.initializer != 0) + variables.insert(var.self); + }); + + // If we needed to create one, we'll need it. + if (dummy_sampler_id) + variables.insert(dummy_sampler_id); + + return variables; +} + +void Compiler::set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables) +{ + active_interface_variables = move(active_variables); + check_active_interface_variables = true; +} + +ShaderResources Compiler::get_shader_resources(const unordered_set<uint32_t> *active_variables) const +{ + ShaderResources res; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + // It is possible for uniform storage classes to be passed as function parameters, so detect + // that. To detect function parameters, check of StorageClass of variable is function scope. + if (var.storage == StorageClassFunction || !type.pointer || is_builtin_variable(var)) + return; + + if (active_variables && active_variables->find(var.self) == end(*active_variables)) + return; + + // Input + if (var.storage == StorageClassInput && interface_variable_exists_in_entry_point(var.self)) + { + if (has_decoration(type.self, DecorationBlock)) + { + res.stage_inputs.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + } + else + res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Subpass inputs + else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData) + { + res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Outputs + else if (var.storage == StorageClassOutput && interface_variable_exists_in_entry_point(var.self)) + { + if (has_decoration(type.self, DecorationBlock)) + { + res.stage_outputs.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + } + else + res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // UBOs + else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock)) + { + res.uniform_buffers.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + } + // Old way to declare SSBOs. + else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)) + { + res.storage_buffers.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + } + // Modern way to declare SSBOs. + else if (type.storage == StorageClassStorageBuffer) + { + res.storage_buffers.push_back( + { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self) }); + } + // Push constant blocks + else if (type.storage == StorageClassPushConstant) + { + // There can only be one push constant block, but keep the vector in case this restriction is lifted + // in the future. + res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Images + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && + type.image.sampled == 2) + { + res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Separate images + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && + type.image.sampled == 1) + { + res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Separate samplers + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler) + { + res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Textures + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage) + { + res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Atomic counters + else if (type.storage == StorageClassAtomicCounter) + { + res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + // Acceleration structures + else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructureNV) + { + res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); + } + }); + + return res; +} + +bool Compiler::type_is_block_like(const SPIRType &type) const +{ + if (type.basetype != SPIRType::Struct) + return false; + + if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + { + return true; + } + + // Block-like types may have Offset decorations. + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + if (has_member_decoration(type.self, i, DecorationOffset)) + return true; + + return false; +} + +void Compiler::fixup_type_alias() +{ + // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. + // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type + // alias if the slave type is a block type. + ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) { + if (type.type_alias && type_is_block_like(type)) + { + // Become the master. + ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) { + if (other_id == type.self) + return; + + if (other_type.type_alias == type.type_alias) + other_type.type_alias = type.self; + }); + + this->get<SPIRType>(type.type_alias).type_alias = self; + type.type_alias = 0; + } + }); + + ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) { + if (type.type_alias && type_is_block_like(type)) + { + // This is not allowed, drop the type_alias. + type.type_alias = 0; + } + }); + + // Reorder declaration of types so that the master of the type alias is always emitted first. + // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which + // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. + auto &type_ids = ir.ids_for_type[TypeType]; + for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) + { + auto &type = get<SPIRType>(*alias_itr); + if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) + { + // We will skip declaring this type, so make sure the type_alias type comes before. + auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias); + assert(master_itr != end(type_ids)); + + if (alias_itr < master_itr) + { + // Must also swap the type order for the constant-type joined array. + auto &joined_types = ir.ids_for_constant_or_type; + auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); + auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); + assert(alt_alias_itr != end(joined_types)); + assert(alt_master_itr != end(joined_types)); + + swap(*alias_itr, *master_itr); + swap(*alt_alias_itr, *alt_master_itr); + } + } + } +} + +void Compiler::parse_fixup() +{ + // Figure out specialization constants for work group sizes. + for (auto id_ : ir.ids_for_constant_or_variable) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get<SPIRConstant>(); + if (ir.meta[c.self].decoration.builtin && ir.meta[c.self].decoration.builtin_type == BuiltInWorkgroupSize) + { + // In current SPIR-V, there can be just one constant like this. + // All entry points will receive the constant value. + for (auto &entry : ir.entry_points) + { + entry.second.workgroup_size.constant = c.self; + entry.second.workgroup_size.x = c.scalar(0, 0); + entry.second.workgroup_size.y = c.scalar(0, 1); + entry.second.workgroup_size.z = c.scalar(0, 2); + } + } + } + else if (id.get_type() == TypeVariable) + { + auto &var = id.get<SPIRVariable>(); + if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || + var.storage == StorageClassOutput) + global_variables.push_back(var.self); + if (variable_storage_is_aliased(var)) + aliased_variables.push_back(var.self); + } + } + + fixup_type_alias(); +} + +void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary, + string &name) +{ + if (name.empty()) + return; + + const auto find_name = [&](const string &n) -> bool { + if (cache_primary.find(n) != end(cache_primary)) + return true; + + if (&cache_primary != &cache_secondary) + if (cache_secondary.find(n) != end(cache_secondary)) + return true; + + return false; + }; + + const auto insert_name = [&](const string &n) { cache_primary.insert(n); }; + + if (!find_name(name)) + { + insert_name(name); + return; + } + + uint32_t counter = 0; + auto tmpname = name; + + bool use_linked_underscore = true; + + if (tmpname == "_") + { + // We cannot just append numbers, as we will end up creating internally reserved names. + // Make it like _0_<counter> instead. + tmpname += "0"; + } + else if (tmpname.back() == '_') + { + // The last_character is an underscore, so we don't need to link in underscore. + // This would violate double underscore rules. + use_linked_underscore = false; + } + + // If there is a collision (very rare), + // keep tacking on extra identifier until it's unique. + do + { + counter++; + name = tmpname + (use_linked_underscore ? "_" : "") + convert_to_string(counter); + } while (find_name(name)); + insert_name(name); +} + +void Compiler::update_name_cache(unordered_set<string> &cache, string &name) +{ + update_name_cache(cache, cache, name); +} + +void Compiler::set_name(uint32_t id, const std::string &name) +{ + ir.set_name(id, name); +} + +const SPIRType &Compiler::get_type(uint32_t id) const +{ + return get<SPIRType>(id); +} + +const SPIRType &Compiler::get_type_from_variable(uint32_t id) const +{ + return get<SPIRType>(get<SPIRVariable>(id).basetype); +} + +uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const +{ + auto *p_type = &get<SPIRType>(type_id); + if (p_type->pointer) + { + assert(p_type->parent_type); + type_id = p_type->parent_type; + } + return type_id; +} + +const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const +{ + auto *p_type = &type; + if (p_type->pointer) + { + assert(p_type->parent_type); + p_type = &get<SPIRType>(p_type->parent_type); + } + return *p_type; +} + +const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const +{ + return get_pointee_type(get<SPIRType>(type_id)); +} + +uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const +{ + if (var.phi_variable) + return var.basetype; + return get_pointee_type_id(var.basetype); +} + +SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) +{ + return get<SPIRType>(get_variable_data_type_id(var)); +} + +const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const +{ + return get<SPIRType>(get_variable_data_type_id(var)); +} + +SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) +{ + SPIRType *type = &get_variable_data_type(var); + if (is_array(*type)) + type = &get<SPIRType>(type->parent_type); + return *type; +} + +const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const +{ + const SPIRType *type = &get_variable_data_type(var); + if (is_array(*type)) + type = &get<SPIRType>(type->parent_type); + return *type; +} + +bool Compiler::is_sampled_image_type(const SPIRType &type) +{ + return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 && + type.image.dim != DimBuffer; +} + +void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + const std::string &argument) +{ + ir.set_member_decoration_string(id, index, decoration, argument); +} + +void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +{ + ir.set_member_decoration(id, index, decoration, argument); +} + +void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name) +{ + ir.set_member_name(id, index, name); +} + +const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const +{ + return ir.get_member_name(id, index); +} + +void Compiler::set_qualified_name(uint32_t id, const string &name) +{ + ir.meta[id].decoration.qualified_alias = name; +} + +void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name) +{ + ir.meta[type_id].members.resize(max(ir.meta[type_id].members.size(), size_t(index) + 1)); + ir.meta[type_id].members[index].qualified_alias = name; +} + +const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t index) const +{ + auto *m = ir.find_meta(type_id); + if (m && index < m->members.size()) + return m->members[index].qualified_alias; + else + return ir.get_empty_string(); +} + +uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +{ + return ir.get_member_decoration(id, index, decoration); +} + +const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const +{ + return ir.get_member_decoration_bitset(id, index); +} + +bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +{ + return ir.has_member_decoration(id, index, decoration); +} + +void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +{ + ir.unset_member_decoration(id, index, decoration); +} + +void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument) +{ + ir.set_decoration_string(id, decoration, argument); +} + +void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +{ + ir.set_decoration(id, decoration, argument); +} + +void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value) +{ + auto &dec = ir.meta[id].decoration; + switch (decoration) + { + case SPIRVCrossDecorationPacked: + dec.extended.packed = true; + break; + + case SPIRVCrossDecorationPackedType: + dec.extended.packed_type = value; + break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = value; + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = value; + break; + + case SPIRVCrossDecorationArgumentBufferID: + dec.extended.argument_buffer_id = value; + break; + } +} + +void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, + uint32_t value) +{ + ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); + auto &dec = ir.meta[type].members[index]; + + switch (decoration) + { + case SPIRVCrossDecorationPacked: + dec.extended.packed = true; + break; + + case SPIRVCrossDecorationPackedType: + dec.extended.packed_type = value; + break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = value; + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = value; + break; + + case SPIRVCrossDecorationArgumentBufferID: + dec.extended.argument_buffer_id = value; + break; + } +} + +uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(id); + if (!m) + return 0; + + auto &dec = m->decoration; + switch (decoration) + { + case SPIRVCrossDecorationPacked: + return uint32_t(dec.extended.packed); + + case SPIRVCrossDecorationPackedType: + return dec.extended.packed_type; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index; + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id; + + case SPIRVCrossDecorationArgumentBufferID: + return dec.extended.argument_buffer_id; + } + + return 0; +} + +uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(type); + if (!m) + return 0; + + if (index >= m->members.size()) + return 0; + + auto &dec = m->members[index]; + switch (decoration) + { + case SPIRVCrossDecorationPacked: + return uint32_t(dec.extended.packed); + + case SPIRVCrossDecorationPackedType: + return dec.extended.packed_type; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index; + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id; + + case SPIRVCrossDecorationArgumentBufferID: + return dec.extended.argument_buffer_id; + } + + return 0; +} + +bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(id); + if (!m) + return false; + + auto &dec = m->decoration; + switch (decoration) + { + case SPIRVCrossDecorationPacked: + return dec.extended.packed; + + case SPIRVCrossDecorationPackedType: + return dec.extended.packed_type != 0; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index != uint32_t(-1); + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id != 0; + + case SPIRVCrossDecorationArgumentBufferID: + return dec.extended.argument_buffer_id != 0; + } + + return false; +} + +bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const +{ + auto *m = ir.find_meta(type); + if (!m) + return false; + + if (index >= m->members.size()) + return false; + + auto &dec = m->members[index]; + switch (decoration) + { + case SPIRVCrossDecorationPacked: + return dec.extended.packed; + + case SPIRVCrossDecorationPackedType: + return dec.extended.packed_type != 0; + + case SPIRVCrossDecorationInterfaceMemberIndex: + return dec.extended.ib_member_index != uint32_t(-1); + + case SPIRVCrossDecorationInterfaceOrigID: + return dec.extended.ib_orig_id != 0; + + case SPIRVCrossDecorationArgumentBufferID: + return dec.extended.argument_buffer_id != uint32_t(-1); + } + + return false; +} + +void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration) +{ + auto &dec = ir.meta[id].decoration; + switch (decoration) + { + case SPIRVCrossDecorationPacked: + dec.extended.packed = false; + break; + + case SPIRVCrossDecorationPackedType: + dec.extended.packed_type = 0; + break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = ~(0u); + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = 0; + break; + + case SPIRVCrossDecorationArgumentBufferID: + dec.extended.argument_buffer_id = 0; + break; + } +} + +void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) +{ + ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); + auto &dec = ir.meta[type].members[index]; + + switch (decoration) + { + case SPIRVCrossDecorationPacked: + dec.extended.packed = false; + break; + + case SPIRVCrossDecorationPackedType: + dec.extended.packed_type = 0; + break; + + case SPIRVCrossDecorationInterfaceMemberIndex: + dec.extended.ib_member_index = ~(0u); + break; + + case SPIRVCrossDecorationInterfaceOrigID: + dec.extended.ib_orig_id = 0; + break; + + case SPIRVCrossDecorationArgumentBufferID: + dec.extended.argument_buffer_id = 0; + break; + } +} + +StorageClass Compiler::get_storage_class(uint32_t id) const +{ + return get<SPIRVariable>(id).storage; +} + +const std::string &Compiler::get_name(uint32_t id) const +{ + return ir.get_name(id); +} + +const std::string Compiler::get_fallback_name(uint32_t id) const +{ + return join("_", id); +} + +const std::string Compiler::get_block_fallback_name(uint32_t id) const +{ + auto &var = get<SPIRVariable>(id); + if (get_name(id).empty()) + return join("_", get<SPIRType>(var.basetype).self, "_", id); + else + return get_name(id); +} + +const Bitset &Compiler::get_decoration_bitset(uint32_t id) const +{ + return ir.get_decoration_bitset(id); +} + +bool Compiler::has_decoration(uint32_t id, Decoration decoration) const +{ + return ir.has_decoration(id, decoration); +} + +const string &Compiler::get_decoration_string(uint32_t id, Decoration decoration) const +{ + return ir.get_decoration_string(id, decoration); +} + +const string &Compiler::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +{ + return ir.get_member_decoration_string(id, index, decoration); +} + +uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const +{ + return ir.get_decoration(id, decoration); +} + +void Compiler::unset_decoration(uint32_t id, Decoration decoration) +{ + ir.unset_decoration(id, decoration); +} + +bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const +{ + auto *m = ir.find_meta(id); + if (!m) + return false; + + auto &word_offsets = m->decoration_word_offset; + auto itr = word_offsets.find(decoration); + if (itr == end(word_offsets)) + return false; + + word_offset = itr->second; + return true; +} + +bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const +{ + // Tried and failed. + if (block.disable_block_optimization || block.complex_continue) + return false; + + if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) + { + // Try to detect common for loop pattern + // which the code backend can use to create cleaner code. + // for(;;) { if (cond) { some_body; } else { break; } } + // is the pattern we're looking for. + const auto *false_block = maybe_get<SPIRBlock>(block.false_block); + const auto *true_block = maybe_get<SPIRBlock>(block.true_block); + const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block); + + bool false_block_is_merge = block.false_block == block.merge_block || + (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); + + bool true_block_is_merge = block.true_block == block.merge_block || + (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); + + bool positive_candidate = + block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge; + + bool negative_candidate = + block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge; + + bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop && + (positive_candidate || negative_candidate); + + if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) + ret = block.true_block == block.continue_block; + else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) + ret = block.false_block == block.continue_block; + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get<SPIRBlock>(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self) + return false; + } + return ret; + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + // Empty loop header that just sets up merge target + // and branches to loop body. + bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty(); + + if (!ret) + return false; + + auto &child = get<SPIRBlock>(block.next_block); + + const auto *false_block = maybe_get<SPIRBlock>(child.false_block); + const auto *true_block = maybe_get<SPIRBlock>(child.true_block); + const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block); + + bool false_block_is_merge = child.false_block == block.merge_block || + (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); + + bool true_block_is_merge = child.true_block == block.merge_block || + (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); + + bool positive_candidate = + child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge; + + bool negative_candidate = + child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge; + + ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone && + (positive_candidate || negative_candidate); + + // If we have OpPhi which depends on branches which came from our own block, + // we need to flush phi variables in else block instead of a trivial break, + // so we cannot assume this is a for loop candidate. + if (ret) + { + for (auto &phi : block.phi_variables) + if (phi.parent == block.self || phi.parent == child.self) + return false; + + for (auto &phi : child.phi_variables) + if (phi.parent == block.self) + return false; + + auto *merge = maybe_get<SPIRBlock>(block.merge_block); + if (merge) + for (auto &phi : merge->phi_variables) + if (phi.parent == block.self || phi.parent == child.false_block) + return false; + } + + return ret; + } + else + return false; +} + +bool Compiler::block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to) +{ + auto *start = &from; + + if (start->self == to.self) + return true; + + // Break cycles. + if (is_continue(start->self)) + return false; + + // If our select block doesn't merge, we must break or continue in these blocks, + // so if continues occur branchless within these blocks, consider them branchless as well. + // This is typically used for loop control. + if (start->terminator == SPIRBlock::Select && start->merge == SPIRBlock::MergeNone && + (block_is_outside_flow_control_from_block(get<SPIRBlock>(start->true_block), to) || + block_is_outside_flow_control_from_block(get<SPIRBlock>(start->false_block), to))) + { + return true; + } + else if (start->merge_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->merge_block), to)) + { + return true; + } + else if (start->next_block && block_is_outside_flow_control_from_block(get<SPIRBlock>(start->next_block), to)) + { + return true; + } + else + return false; +} + +bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const +{ + if (!execution_is_branchless(from, to)) + return false; + + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (!start->ops.empty()) + return false; + + auto &next = get<SPIRBlock>(start->next_block); + // Flushing phi variables does not count as noop. + for (auto &phi : next.phi_variables) + if (phi.parent == start->self) + return false; + + start = &next; + } +} + +bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const +{ + auto *start = &from; + for (;;) + { + if (start->self == to.self) + return true; + + if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone) + start = &get<SPIRBlock>(start->next_block); + else + return false; + } +} + +SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const +{ + // The block was deemed too complex during code emit, pick conservative fallback paths. + if (block.complex_continue) + return SPIRBlock::ComplexLoop; + + // In older glslang output continue block can be equal to the loop header. + // In this case, execution is clearly branchless, so just assume a while loop header here. + if (block.merge == SPIRBlock::MergeLoop) + return SPIRBlock::WhileLoop; + + auto &dominator = get<SPIRBlock>(block.loop_dominator); + + if (execution_is_noop(block, dominator)) + return SPIRBlock::WhileLoop; + else if (execution_is_branchless(block, dominator)) + return SPIRBlock::ForLoop; + else + { + const auto *false_block = maybe_get<SPIRBlock>(block.false_block); + const auto *true_block = maybe_get<SPIRBlock>(block.true_block); + const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block); + + bool positive_do_while = block.true_block == dominator.self && + (block.false_block == dominator.merge_block || + (false_block && merge_block && execution_is_noop(*false_block, *merge_block))); + + bool negative_do_while = block.false_block == dominator.self && + (block.true_block == dominator.merge_block || + (true_block && merge_block && execution_is_noop(*true_block, *merge_block))); + + if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select && + (positive_do_while || negative_do_while)) + { + return SPIRBlock::DoWhileLoop; + } + else + return SPIRBlock::ComplexLoop; + } +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const +{ + handler.set_current_block(block); + + // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, + // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing + // inside dead blocks ... + for (auto &i : block.ops) + { + auto ops = stream(i); + auto op = static_cast<Op>(i.op); + + if (!handler.handle(op, ops, i.length)) + return false; + + if (op == OpFunctionCall) + { + auto &func = get<SPIRFunction>(ops[2]); + if (handler.follow_function_call(func)) + { + if (!handler.begin_function_scope(ops, i.length)) + return false; + if (!traverse_all_reachable_opcodes(get<SPIRFunction>(ops[2]), handler)) + return false; + if (!handler.end_function_scope(ops, i.length)) + return false; + } + } + } + + return true; +} + +bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const +{ + for (auto block : func.blocks) + if (!traverse_all_reachable_opcodes(get<SPIRBlock>(block), handler)) + return false; + + return true; +} + +uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.self); + if (type_meta) + { + // Decoration must be set in valid SPIR-V, otherwise throw. + auto &dec = type_meta->members[index]; + if (dec.decoration_flags.get(DecorationOffset)) + return dec.offset; + else + SPIRV_CROSS_THROW("Struct member does not have Offset set."); + } + else + SPIRV_CROSS_THROW("Struct member does not have Offset set."); +} + +uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.member_types[index]); + if (type_meta) + { + // Decoration must be set in valid SPIR-V, otherwise throw. + // ArrayStride is part of the array type not OpMemberDecorate. + auto &dec = type_meta->decoration; + if (dec.decoration_flags.get(DecorationArrayStride)) + return dec.array_stride; + else + SPIRV_CROSS_THROW("Struct member does not have ArrayStride set."); + } + else + SPIRV_CROSS_THROW("Struct member does not have Offset set."); +} + +uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.self); + if (type_meta) + { + // Decoration must be set in valid SPIR-V, otherwise throw. + // MatrixStride is part of OpMemberDecorate. + auto &dec = type_meta->members[index]; + if (dec.decoration_flags.get(DecorationMatrixStride)) + return dec.matrix_stride; + else + SPIRV_CROSS_THROW("Struct member does not have MatrixStride set."); + } + else + SPIRV_CROSS_THROW("Struct member does not have MatrixStride set."); +} + +size_t Compiler::get_declared_struct_size(const SPIRType &type) const +{ + if (type.member_types.empty()) + SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); + + uint32_t last = uint32_t(type.member_types.size() - 1); + size_t offset = type_struct_member_offset(type, last); + size_t size = get_declared_struct_member_size(type, last); + return offset + size; +} + +size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const +{ + if (type.member_types.empty()) + SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); + + size_t size = get_declared_struct_size(type); + auto &last_type = get<SPIRType>(type.member_types.back()); + if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array + size += array_size * type_struct_member_array_stride(type, uint32_t(type.member_types.size() - 1)); + + return size; +} + +size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +{ + if (struct_type.member_types.empty()) + SPIRV_CROSS_THROW("Declared struct in block cannot be empty."); + + auto &flags = get_member_decoration_bitset(struct_type.self, index); + auto &type = get<SPIRType>(struct_type.member_types[index]); + + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types. + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying size for object with opaque size."); + + default: + break; + } + + if (!type.array.empty()) + { + // For arrays, we can use ArrayStride to get an easy check. + bool array_size_literal = type.array_size_literal.back(); + uint32_t array_size = array_size_literal ? type.array.back() : get<SPIRConstant>(type.array.back()).scalar(); + return type_struct_member_array_stride(struct_type, index) * array_size; + } + else if (type.basetype == SPIRType::Struct) + { + return get_declared_struct_size(type); + } + else + { + unsigned vecsize = type.vecsize; + unsigned columns = type.columns; + + // Vectors. + if (columns == 1) + { + size_t component_size = type.width / 8; + return vecsize * component_size; + } + else + { + uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index); + + // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses. + if (flags.get(DecorationRowMajor)) + return matrix_stride * vecsize; + else if (flags.get(DecorationColMajor)) + return matrix_stride * columns; + else + SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices."); + } + } +} + +bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain) + return true; + + bool ptr_chain = (opcode == OpPtrAccessChain); + + // Invalid SPIR-V. + if (length < (ptr_chain ? 5u : 4u)) + return false; + + if (args[2] != id) + return true; + + // Don't bother traversing the entire access chain tree yet. + // If we access a struct member, assume we access the entire member. + uint32_t index = compiler.get<SPIRConstant>(args[ptr_chain ? 4 : 3]).scalar(); + + // Seen this index already. + if (seen.find(index) != end(seen)) + return true; + seen.insert(index); + + auto &type = compiler.expression_type(id); + uint32_t offset = compiler.type_struct_member_offset(type, index); + + size_t range; + // If we have another member in the struct, deduce the range by looking at the next member. + // This is okay since structs in SPIR-V can have padding, but Offset decoration must be + // monotonically increasing. + // Of course, this doesn't take into account if the SPIR-V for some reason decided to add + // very large amounts of padding, but that's not really a big deal. + if (index + 1 < type.member_types.size()) + { + range = compiler.type_struct_member_offset(type, index + 1) - offset; + } + else + { + // No padding, so just deduce it from the size of the member directly. + range = compiler.get_declared_struct_member_size(type, index); + } + + ranges.push_back({ index, offset, range }); + return true; +} + +std::vector<BufferRange> Compiler::get_active_buffer_ranges(uint32_t id) const +{ + std::vector<BufferRange> ranges; + BufferAccessHandler handler(*this, ranges, id); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + return ranges; +} + +bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const +{ + if (a.basetype != b.basetype) + return false; + if (a.width != b.width) + return false; + if (a.vecsize != b.vecsize) + return false; + if (a.columns != b.columns) + return false; + if (a.array.size() != b.array.size()) + return false; + + size_t array_count = a.array.size(); + if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0) + return false; + + if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage) + { + if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0) + return false; + } + + if (a.member_types.size() != b.member_types.size()) + return false; + + size_t member_types = a.member_types.size(); + for (size_t i = 0; i < member_types; i++) + { + if (!types_are_logically_equivalent(get<SPIRType>(a.member_types[i]), get<SPIRType>(b.member_types[i]))) + return false; + } + + return true; +} + +const Bitset &Compiler::get_execution_mode_bitset() const +{ + return get_entry_point().flags; +} + +void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2) +{ + auto &execution = get_entry_point(); + + execution.flags.set(mode); + switch (mode) + { + case ExecutionModeLocalSize: + execution.workgroup_size.x = arg0; + execution.workgroup_size.y = arg1; + execution.workgroup_size.z = arg2; + break; + + case ExecutionModeInvocations: + execution.invocations = arg0; + break; + + case ExecutionModeOutputVertices: + execution.output_vertices = arg0; + break; + + default: + break; + } +} + +void Compiler::unset_execution_mode(ExecutionMode mode) +{ + auto &execution = get_entry_point(); + execution.flags.clear(mode); +} + +uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y, + SpecializationConstant &z) const +{ + auto &execution = get_entry_point(); + x = { 0, 0 }; + y = { 0, 0 }; + z = { 0, 0 }; + + if (execution.workgroup_size.constant != 0) + { + auto &c = get<SPIRConstant>(execution.workgroup_size.constant); + + if (c.m.c[0].id[0] != 0) + { + x.id = c.m.c[0].id[0]; + x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId); + } + + if (c.m.c[0].id[1] != 0) + { + y.id = c.m.c[0].id[1]; + y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId); + } + + if (c.m.c[0].id[2] != 0) + { + z.id = c.m.c[0].id[2]; + z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId); + } + } + + return execution.workgroup_size.constant; +} + +uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const +{ + auto &execution = get_entry_point(); + switch (mode) + { + case ExecutionModeLocalSize: + switch (index) + { + case 0: + return execution.workgroup_size.x; + case 1: + return execution.workgroup_size.y; + case 2: + return execution.workgroup_size.z; + default: + return 0; + } + + case ExecutionModeInvocations: + return execution.invocations; + + case ExecutionModeOutputVertices: + return execution.output_vertices; + + default: + return 0; + } +} + +ExecutionModel Compiler::get_execution_model() const +{ + auto &execution = get_entry_point(); + return execution.model; +} + +bool Compiler::is_tessellation_shader(ExecutionModel model) +{ + return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; +} + +bool Compiler::is_tessellation_shader() const +{ + return is_tessellation_shader(get_execution_model()); +} + +void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable) +{ + get<SPIRVariable>(id).remapped_variable = remap_enable; +} + +bool Compiler::get_remapped_variable_state(uint32_t id) const +{ + return get<SPIRVariable>(id).remapped_variable; +} + +void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components) +{ + get<SPIRVariable>(id).remapped_components = components; +} + +uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const +{ + return get<SPIRVariable>(id).remapped_components; +} + +void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source) +{ + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + if (itr == end(e.implied_read_expressions)) + e.implied_read_expressions.push_back(source); +} + +void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source) +{ + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + if (itr == end(e.implied_read_expressions)) + e.implied_read_expressions.push_back(source); +} + +void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression) +{ + // Don't inherit any expression dependencies if the expression in dst + // is not a forwarded temporary. + if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) || + forced_temporaries.find(dst) != end(forced_temporaries)) + { + return; + } + + auto &e = get<SPIRExpression>(dst); + auto *phi = maybe_get<SPIRVariable>(source_expression); + if (phi && phi->phi_variable) + { + // We have used a phi variable, which can change at the end of the block, + // so make sure we take a dependency on this phi variable. + phi->dependees.push_back(dst); + } + + auto *s = maybe_get<SPIRExpression>(source_expression); + if (!s) + return; + + auto &e_deps = e.expression_dependencies; + auto &s_deps = s->expression_dependencies; + + // If we depend on a expression, we also depend on all sub-dependencies from source. + e_deps.push_back(source_expression); + e_deps.insert(end(e_deps), begin(s_deps), end(s_deps)); + + // Eliminate duplicated dependencies. + sort(begin(e_deps), end(e_deps)); + e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps)); +} + +vector<EntryPoint> Compiler::get_entry_points_and_stages() const +{ + vector<EntryPoint> entries; + for (auto &entry : ir.entry_points) + entries.push_back({ entry.second.orig_name, entry.second.model }); + return entries; +} + +void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model) +{ + auto &entry = get_entry_point(old_name, model); + entry.orig_name = new_name; + entry.name = new_name; +} + +void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model) +{ + auto &entry = get_entry_point(name, model); + ir.default_entry_point = entry.self; +} + +SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) +{ + auto itr = find_if( + begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const +{ + auto itr = find_if( + begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) +{ + auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { + return entry.second.orig_name == name && entry.second.model == model; + }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const +{ + auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), + [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { + return entry.second.orig_name == name && entry.second.model == model; + }); + + if (itr == end(ir.entry_points)) + SPIRV_CROSS_THROW("Entry point does not exist."); + + return itr->second; +} + +const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const +{ + return get_entry_point(name, model).name; +} + +const SPIREntryPoint &Compiler::get_entry_point() const +{ + return ir.entry_points.find(ir.default_entry_point)->second; +} + +SPIREntryPoint &Compiler::get_entry_point() +{ + return ir.entry_points.find(ir.default_entry_point)->second; +} + +bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const +{ + auto &var = get<SPIRVariable>(id); + if (var.storage != StorageClassInput && var.storage != StorageClassOutput && + var.storage != StorageClassUniformConstant) + SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface."); + + // This is to avoid potential problems with very old glslang versions which did + // not emit input/output interfaces properly. + // We can assume they only had a single entry point, and single entry point + // shaders could easily be assumed to use every interface variable anyways. + if (ir.entry_points.size() <= 1) + return true; + + auto &execution = get_entry_point(); + return find(begin(execution.interface_variables), end(execution.interface_variables), id) != + end(execution.interface_variables); +} + +void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args, + uint32_t length) +{ + // If possible, pipe through a remapping table so that parameters know + // which variables they actually bind to in this scope. + unordered_map<uint32_t, uint32_t> remapping; + for (uint32_t i = 0; i < length; i++) + remapping[func.arguments[i].id] = remap_parameter(args[i]); + parameter_remapping.push(move(remapping)); +} + +void Compiler::CombinedImageSamplerHandler::pop_remap_parameters() +{ + parameter_remapping.pop(); +} + +uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id) +{ + auto *var = compiler.maybe_get_backing_variable(id); + if (var) + id = var->self; + + if (parameter_remapping.empty()) + return id; + + auto &remapping = parameter_remapping.top(); + auto itr = remapping.find(id); + if (itr != end(remapping)) + return itr->second; + else + return id; +} + +bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &callee = compiler.get<SPIRFunction>(args[2]); + args += 3; + length -= 3; + push_remap_parameters(callee, args, length); + functions.push(&callee); + return true; +} + +bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &callee = compiler.get<SPIRFunction>(args[2]); + args += 3; + + // There are two types of cases we have to handle, + // a callee might call sampler2D(texture2D, sampler) directly where + // one or more parameters originate from parameters. + // Alternatively, we need to provide combined image samplers to our callees, + // and in this case we need to add those as well. + + pop_remap_parameters(); + + // Our callee has now been processed at least once. + // No point in doing it again. + callee.do_combined_parameters = false; + + auto ¶ms = functions.top()->combined_parameters; + functions.pop(); + if (functions.empty()) + return true; + + auto &caller = *functions.top(); + if (caller.do_combined_parameters) + { + for (auto ¶m : params) + { + uint32_t image_id = param.global_image ? param.image_id : args[param.image_id]; + uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id]; + + auto *i = compiler.maybe_get_backing_variable(image_id); + auto *s = compiler.maybe_get_backing_variable(sampler_id); + if (i) + image_id = i->self; + if (s) + sampler_id = s->self; + + register_combined_image_sampler(caller, image_id, sampler_id, param.depth); + } + } + + return true; +} + +void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, uint32_t image_id, + uint32_t sampler_id, bool depth) +{ + // We now have a texture ID and a sampler ID which will either be found as a global + // or a parameter in our own function. If both are global, they will not need a parameter, + // otherwise, add it to our list. + SPIRFunction::CombinedImageSamplerParameter param = { + 0u, image_id, sampler_id, true, true, depth, + }; + + auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments), + [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; }); + auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments), + [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; }); + + if (texture_itr != end(caller.arguments)) + { + param.global_image = false; + param.image_id = uint32_t(texture_itr - begin(caller.arguments)); + } + + if (sampler_itr != end(caller.arguments)) + { + param.global_sampler = false; + param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments)); + } + + if (param.global_image && param.global_sampler) + return; + + auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters), + [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) { + return param.image_id == p.image_id && param.sampler_id == p.sampler_id && + param.global_image == p.global_image && param.global_sampler == p.global_sampler; + }); + + if (itr == end(caller.combined_parameters)) + { + uint32_t id = compiler.ir.increase_bound_by(3); + auto type_id = id + 0; + auto ptr_type_id = id + 1; + auto combined_id = id + 2; + auto &base = compiler.expression_type(image_id); + auto &type = compiler.set<SPIRType>(type_id); + auto &ptr_type = compiler.set<SPIRType>(ptr_type_id); + + type = base; + type.self = type_id; + type.basetype = SPIRType::SampledImage; + type.pointer = false; + type.storage = StorageClassGeneric; + type.image.depth = depth; + + ptr_type = type; + ptr_type.pointer = true; + ptr_type.storage = StorageClassUniformConstant; + ptr_type.parent_type = type_id; + + // Build new variable. + compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0); + + // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). + auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags; + auto &old_flags = compiler.ir.meta[sampler_id].decoration.decoration_flags; + new_flags.reset(); + if (old_flags.get(DecorationRelaxedPrecision)) + new_flags.set(DecorationRelaxedPrecision); + + param.id = combined_id; + + compiler.set_name(combined_id, + join("SPIRV_Cross_Combined", compiler.to_name(image_id), compiler.to_name(sampler_id))); + + caller.combined_parameters.push_back(param); + caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u, true }); + } +} + +bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + if (need_dummy_sampler) + { + // No need to traverse further, we know the result. + return false; + } + + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get<SPIRType>(result_type); + bool separate_image = + type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; + + // If not separate image, don't bother. + if (!separate_image) + return true; + + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set<SPIRExpression>(id, "", result_type, true); + compiler.register_read(id, ptr, true); + break; + } + + case OpImageFetch: + case OpImageQuerySizeLod: + case OpImageQuerySize: + case OpImageQueryLevels: + case OpImageQuerySamples: + { + // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler. + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + { + auto &type = compiler.get<SPIRType>(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + need_dummy_sampler = true; + } + + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + auto &type = compiler.get<SPIRType>(result_type); + bool separate_image = + type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; + if (!separate_image) + return true; + + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set<SPIRExpression>(id, "", result_type, true); + compiler.register_read(id, ptr, true); + + // Other backends might use SPIRAccessChain for this later. + compiler.ir.ids[id].set_allow_type_rewrite(); + break; + } + + default: + break; + } + + return true; +} + +bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need. + bool is_fetch = false; + + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get<SPIRType>(result_type); + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + + // If not separate image or sampler, don't bother. + if (!separate_image && !separate_sampler) + return true; + + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set<SPIRExpression>(id, "", result_type, true); + compiler.register_read(id, ptr, true); + return true; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially + // impossible to implement, since we don't know which concrete sampler we are accessing. + // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds, + // but this seems ridiculously complicated for a problem which is easy to work around. + // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense. + + uint32_t result_type = args[0]; + + auto &type = compiler.get<SPIRType>(result_type); + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (separate_sampler) + SPIRV_CROSS_THROW( + "Attempting to use arrays or structs of separate samplers. This is not possible to statically " + "remap to plain GLSL."); + + if (separate_image) + { + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set<SPIRExpression>(id, "", result_type, true); + compiler.register_read(id, ptr, true); + } + return true; + } + + case OpImageFetch: + case OpImageQuerySizeLod: + case OpImageQuerySize: + case OpImageQueryLevels: + case OpImageQuerySamples: + { + // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler. + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (!var) + return true; + + auto &type = compiler.get<SPIRType>(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + { + if (compiler.dummy_sampler_id == 0) + SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with " + "build_dummy_sampler_for_combined_images()."); + + // Do it outside. + is_fetch = true; + break; + } + + return true; + } + + case OpSampledImage: + // Do it outside. + break; + + default: + return true; + } + + // Registers sampler2D calls used in case they are parameters so + // that their callees know which combined image samplers to propagate down the call stack. + if (!functions.empty()) + { + auto &callee = *functions.top(); + if (callee.do_combined_parameters) + { + uint32_t image_id = args[2]; + + auto *image = compiler.maybe_get_backing_variable(image_id); + if (image) + image_id = image->self; + + uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3]; + auto *sampler = compiler.maybe_get_backing_variable(sampler_id); + if (sampler) + sampler_id = sampler->self; + + auto &combined_type = compiler.get<SPIRType>(args[0]); + register_combined_image_sampler(callee, image_id, sampler_id, combined_type.image.depth); + } + } + + // For function calls, we need to remap IDs which are function parameters into global variables. + // This information is statically known from the current place in the call stack. + // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know + // which backing variable the image/sample came from. + uint32_t image_id = remap_parameter(args[2]); + uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); + + auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), + [image_id, sampler_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == sampler_id; + }); + + if (itr == end(compiler.combined_image_samplers)) + { + uint32_t sampled_type; + if (is_fetch) + { + // Have to invent the sampled image type. + sampled_type = compiler.ir.increase_bound_by(1); + auto &type = compiler.set<SPIRType>(sampled_type); + type = compiler.expression_type(args[2]); + type.self = sampled_type; + type.basetype = SPIRType::SampledImage; + type.image.depth = false; + } + else + { + sampled_type = args[0]; + } + + auto id = compiler.ir.increase_bound_by(2); + auto type_id = id + 0; + auto combined_id = id + 1; + + // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type. + // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes. + auto &type = compiler.set<SPIRType>(type_id); + auto &base = compiler.get<SPIRType>(sampled_type); + type = base; + type.pointer = true; + type.storage = StorageClassUniformConstant; + type.parent_type = type_id; + + // Build new variable. + compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0); + + // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). + auto &new_flags = compiler.ir.meta[combined_id].decoration.decoration_flags; + // Fetch inherits precision from the image, not sampler (there is no sampler). + auto &old_flags = compiler.ir.meta[is_fetch ? image_id : sampler_id].decoration.decoration_flags; + new_flags.reset(); + if (old_flags.get(DecorationRelaxedPrecision)) + new_flags.set(DecorationRelaxedPrecision); + + // Propagate the array type for the original image as well. + auto *var = compiler.maybe_get_backing_variable(image_id); + if (var) + { + auto &parent_type = compiler.get<SPIRType>(var->basetype); + type.array = parent_type.array; + type.array_size_literal = parent_type.array_size_literal; + } + + compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id }); + } + + return true; +} + +uint32_t Compiler::build_dummy_sampler_for_combined_images() +{ + DummySamplerForCombinedImageHandler handler(*this); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + if (handler.need_dummy_sampler) + { + uint32_t offset = ir.increase_bound_by(3); + auto type_id = offset + 0; + auto ptr_type_id = offset + 1; + auto var_id = offset + 2; + + SPIRType sampler_type; + auto &sampler = set<SPIRType>(type_id); + sampler.basetype = SPIRType::Sampler; + + auto &ptr_sampler = set<SPIRType>(ptr_type_id); + ptr_sampler = sampler; + ptr_sampler.self = type_id; + ptr_sampler.storage = StorageClassUniformConstant; + ptr_sampler.pointer = true; + ptr_sampler.parent_type = type_id; + + set<SPIRVariable>(var_id, ptr_type_id, StorageClassUniformConstant, 0); + set_name(var_id, "SPIRV_Cross_DummySampler"); + dummy_sampler_id = var_id; + return var_id; + } + else + return 0; +} + +void Compiler::build_combined_image_samplers() +{ + ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) { + func.combined_parameters.clear(); + func.shadow_arguments.clear(); + func.do_combined_parameters = true; + }); + + combined_image_samplers.clear(); + CombinedImageSamplerHandler handler(*this); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); +} + +vector<SpecializationConstant> Compiler::get_specialization_constants() const +{ + vector<SpecializationConstant> spec_consts; + ir.for_each_typed_id<SPIRConstant>([&](uint32_t, const SPIRConstant &c) { + if (c.specialization && has_decoration(c.self, DecorationSpecId)) + spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) }); + }); + return spec_consts; +} + +SPIRConstant &Compiler::get_constant(uint32_t id) +{ + return get<SPIRConstant>(id); +} + +const SPIRConstant &Compiler::get_constant(uint32_t id) const +{ + return get<SPIRConstant>(id); +} + +static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks) +{ + // This block accesses the variable. + if (blocks.find(block) != end(blocks)) + return false; + + // We are at the end of the CFG. + if (cfg.get_succeeding_edges(block).empty()) + return true; + + // If any of our successors have a path to the end, there exists a path from block. + for (auto &succ : cfg.get_succeeding_edges(block)) + if (exists_unaccessed_path_to_return(cfg, succ, blocks)) + return true; + + return false; +} + +void Compiler::analyze_parameter_preservation( + SPIRFunction &entry, const CFG &cfg, const unordered_map<uint32_t, unordered_set<uint32_t>> &variable_to_blocks, + const unordered_map<uint32_t, unordered_set<uint32_t>> &complete_write_blocks) +{ + for (auto &arg : entry.arguments) + { + // Non-pointers are always inputs. + auto &type = get<SPIRType>(arg.type); + if (!type.pointer) + continue; + + // Opaque argument types are always in + bool potential_preserve; + switch (type.basetype) + { + case SPIRType::Sampler: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::AtomicCounter: + potential_preserve = false; + break; + + default: + potential_preserve = true; + break; + } + + if (!potential_preserve) + continue; + + auto itr = variable_to_blocks.find(arg.id); + if (itr == end(variable_to_blocks)) + { + // Variable is never accessed. + continue; + } + + // We have accessed a variable, but there was no complete writes to that variable. + // We deduce that we must preserve the argument. + itr = complete_write_blocks.find(arg.id); + if (itr == end(complete_write_blocks)) + { + arg.read_count++; + continue; + } + + // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state + // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function. + // Major case here is if a function is + // void foo(int &var) { if (cond) var = 10; } + // Using read/write counts, we will think it's just an out variable, but it really needs to be inout, + // because if we don't write anything whatever we put into the function must return back to the caller. + if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second)) + arg.read_count++; + } +} + +Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_, + SPIRFunction &entry_) + : compiler(compiler_) + , entry(entry_) +{ +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &) +{ + // Only analyze within this function. + return false; +} + +void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block) +{ + current_block = █ + + // If we're branching to a block which uses OpPhi, in GLSL + // this will be a variable write when we branch, + // so we need to track access to these variables as well to + // have a complete picture. + const auto test_phi = [this, &block](uint32_t to) { + auto &next = compiler.get<SPIRBlock>(to); + for (auto &phi : next.phi_variables) + { + if (phi.parent == block.self) + { + accessed_variables_to_block[phi.function_variable].insert(block.self); + // Phi variables are also accessed in our target branch block. + accessed_variables_to_block[phi.function_variable].insert(next.self); + + notify_variable_access(phi.local_variable, block.self); + } + } + }; + + switch (block.terminator) + { + case SPIRBlock::Direct: + notify_variable_access(block.condition, block.self); + test_phi(block.next_block); + break; + + case SPIRBlock::Select: + notify_variable_access(block.condition, block.self); + test_phi(block.true_block); + test_phi(block.false_block); + break; + + case SPIRBlock::MultiSelect: + notify_variable_access(block.condition, block.self); + for (auto &target : block.cases) + test_phi(target.block); + if (block.default_block) + test_phi(block.default_block); + break; + + default: + break; + } +} + +void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block) +{ + if (id_is_phi_variable(id)) + accessed_variables_to_block[id].insert(block); + else if (id_is_potential_temporary(id)) + accessed_temporaries_to_block[id].insert(block); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + auto *var = compiler.maybe_get<SPIRVariable>(id); + return var && var->phi_variable; +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const +{ + if (id >= compiler.get_current_id_bound()) + return false; + + // Temporaries are not created before we start emitting code. + return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression); +} + +bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + // Keep track of the types of temporaries, so we can hoist them out as necessary. + uint32_t result_type, result_id; + if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) + result_id_to_type[result_id] = result_type; + + switch (op) + { + case OpStore: + { + if (length < 2) + return false; + + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // If we store through an access chain, we have a partial write. + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == ptr) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + // Might try to store a Phi variable here. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get<SPIRVariable>(ptr); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + for (uint32_t i = 3; i < length; i++) + notify_variable_access(args[i], current_block->self); + + // The result of an access chain is a fixed expression and is not really considered a temporary. + auto &e = compiler.set<SPIRExpression>(args[1], "", args[0], true); + auto *backing_variable = compiler.maybe_get_backing_variable(ptr); + e.loaded_from = backing_variable ? backing_variable->self : 0; + + // Other backends might use SPIRAccessChain for this later. + compiler.ir.ids[args[1]].set_allow_type_rewrite(); + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t lhs = args[0]; + uint32_t rhs = args[1]; + auto *var = compiler.maybe_get_backing_variable(lhs); + + // If we store through an access chain, we have a partial write. + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + if (var->self == lhs) + complete_write_variables_to_block[var->self].insert(current_block->self); + else + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + var = compiler.maybe_get_backing_variable(rhs); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + break; + } + + case OpCopyObject: + { + if (length < 3) + return false; + + auto *var = compiler.maybe_get_backing_variable(args[2]); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Might try to copy a Phi variable here. + notify_variable_access(args[2], current_block->self); + break; + } + + case OpLoad: + { + if (length < 3) + return false; + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + accessed_variables_to_block[var->self].insert(current_block->self); + + // Loaded value is a temporary. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpFunctionCall: + { + if (length < 3) + return false; + + length -= 3; + args += 3; + + for (uint32_t i = 0; i < length; i++) + { + auto *var = compiler.maybe_get_backing_variable(args[i]); + if (var) + { + accessed_variables_to_block[var->self].insert(current_block->self); + // Assume we can get partial writes to this variable. + partial_write_variables_to_block[var->self].insert(current_block->self); + } + + // Cannot easily prove if argument we pass to a function is completely written. + // Usually, functions write to a dummy variable, + // which is then copied to in full to the real argument. + + // Might try to copy a Phi variable here. + notify_variable_access(args[i], current_block->self); + } + + // Return value may be a temporary. + notify_variable_access(args[1], current_block->self); + break; + } + + case OpExtInst: + { + for (uint32_t i = 4; i < length; i++) + notify_variable_access(args[i], current_block->self); + notify_variable_access(args[1], current_block->self); + break; + } + + case OpArrayLength: + // Uses literals, but cannot be a phi variable or temporary, so ignore. + break; + + // Atomics shouldn't be able to access function-local variables. + // Some GLSL builtins access a pointer. + + case OpCompositeInsert: + case OpVectorShuffle: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 4; i++) + notify_variable_access(args[i], current_block->self); + break; + + case OpCompositeExtract: + // Specialize for opcode which contains literals. + for (uint32_t i = 1; i < 3; i++) + notify_variable_access(args[i], current_block->self); + break; + + case OpImageWrite: + for (uint32_t i = 0; i < length; i++) + { + // Argument 3 is a literal. + if (i != 3) + notify_variable_access(args[i], current_block->self); + } + break; + + case OpImageSampleImplicitLod: + case OpImageSampleExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageFetch: + case OpImageSparseFetch: + case OpImageRead: + case OpImageSparseRead: + for (uint32_t i = 1; i < length; i++) + { + // Argument 4 is a literal. + if (i != 4) + notify_variable_access(args[i], current_block->self); + } + break; + + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageGather: + case OpImageSparseGather: + case OpImageDrefGather: + case OpImageSparseDrefGather: + for (uint32_t i = 1; i < length; i++) + { + // Argument 5 is a literal. + if (i != 5) + notify_variable_access(args[i], current_block->self); + } + break; + + default: + { + // Rather dirty way of figuring out where Phi variables are used. + // As long as only IDs are used, we can scan through instructions and try to find any evidence that + // the ID of a variable has been used. + // There are potential false positives here where a literal is used in-place of an ID, + // but worst case, it does not affect the correctness of the compile. + // Exhaustive analysis would be better here, but it's not worth it for now. + for (uint32_t i = 0; i < length; i++) + notify_variable_access(args[i], current_block->self); + break; + } + } + return true; +} + +Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_) + : compiler(compiler_) + , variable_id(variable_id_) +{ +} + +bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &) +{ + return false; +} + +bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) +{ + switch (op) + { + case OpStore: + if (length < 2) + return false; + if (args[0] == variable_id) + { + static_expression = args[1]; + write_count++; + } + break; + + case OpLoad: + if (length < 3) + return false; + if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized. + return false; + break; + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + if (length < 3) + return false; + if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail. + return false; + break; + + default: + break; + } + + return true; +} + +void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler, + bool single_function) +{ + auto &cfg = *function_cfgs.find(entry.self)->second; + + // For each variable which is statically accessed. + for (auto &accessed_var : handler.accessed_variables_to_block) + { + auto &blocks = accessed_var.second; + auto &var = get<SPIRVariable>(accessed_var.first); + auto &type = expression_type(accessed_var.first); + + // Only consider function local variables here. + // If we only have a single function in our CFG, private storage is also fine, + // since it behaves like a function local variable. + bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate); + if (!allow_lut) + continue; + + // We cannot be a phi variable. + if (var.phi_variable) + continue; + + // Only consider arrays here. + if (type.array.empty()) + continue; + + // If the variable has an initializer, make sure it is a constant expression. + uint32_t static_constant_expression = 0; + if (var.initializer) + { + if (ir.ids[var.initializer].get_type() != TypeConstant) + continue; + static_constant_expression = var.initializer; + + // There can be no stores to this variable, we have now proved we have a LUT. + if (handler.complete_write_variables_to_block.count(var.self) != 0 || + handler.partial_write_variables_to_block.count(var.self) != 0) + continue; + } + else + { + // We can have one, and only one write to the variable, and that write needs to be a constant. + + // No partial writes allowed. + if (handler.partial_write_variables_to_block.count(var.self) != 0) + continue; + + auto itr = handler.complete_write_variables_to_block.find(var.self); + + // No writes? + if (itr == end(handler.complete_write_variables_to_block)) + continue; + + // We write to the variable in more than one block. + auto &write_blocks = itr->second; + if (write_blocks.size() != 1) + continue; + + // The write needs to happen in the dominating block. + DominatorBuilder builder(cfg); + for (auto &block : blocks) + builder.add_block(block); + uint32_t dominator = builder.get_dominator(); + + // The complete write happened in a branch or similar, cannot deduce static expression. + if (write_blocks.count(dominator) == 0) + continue; + + // Find the static expression for this variable. + StaticExpressionAccessHandler static_expression_handler(*this, var.self); + traverse_all_reachable_opcodes(get<SPIRBlock>(dominator), static_expression_handler); + + // We want one, and exactly one write + if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0) + continue; + + // Is it a constant expression? + if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant) + continue; + + // We found a LUT! + static_constant_expression = static_expression_handler.static_expression; + } + + get<SPIRConstant>(static_constant_expression).is_used_as_lut = true; + var.static_expression = static_constant_expression; + var.statically_assigned = true; + var.remapped_variable = true; + } +} + +void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler) +{ + // First, we map out all variable access within a function. + // Essentially a map of block -> { variables accessed in the basic block } + traverse_all_reachable_opcodes(entry, handler); + + auto &cfg = *function_cfgs.find(entry.self)->second; + + // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier. + analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, + handler.complete_write_variables_to_block); + + unordered_map<uint32_t, uint32_t> potential_loop_variables; + + // For each variable which is statically accessed. + for (auto &var : handler.accessed_variables_to_block) + { + // Only deal with variables which are considered local variables in this function. + if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables)) + continue; + + DominatorBuilder builder(cfg); + auto &blocks = var.second; + auto &type = expression_type(var.first); + + // Figure out which block is dominating all accesses of those variables. + for (auto &block : blocks) + { + // If we're accessing a variable inside a continue block, this variable might be a loop variable. + // We can only use loop variables with scalars, as we cannot track static expressions for vectors. + if (is_continue(block)) + { + // Potentially awkward case to check for. + // We might have a variable inside a loop, which is touched by the continue block, + // but is not actually a loop variable. + // The continue block is dominated by the inner part of the loop, which does not make sense in high-level + // language output because it will be declared before the body, + // so we will have to lift the dominator up to the relevant loop header instead. + builder.add_block(ir.continue_block_to_loop_header[block]); + + // Arrays or structs cannot be loop variables. + if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty()) + { + // The variable is used in multiple continue blocks, this is not a loop + // candidate, signal that by setting block to -1u. + auto &potential = potential_loop_variables[var.first]; + + if (potential == 0) + potential = block; + else + potential = ~(0u); + } + } + builder.add_block(block); + } + + builder.lift_continue_block_dominator(); + + // Add it to a per-block list of variables. + uint32_t dominating_block = builder.get_dominator(); + + // If all blocks here are dead code, this will be 0, so the variable in question + // will be completely eliminated. + if (dominating_block) + { + auto &block = get<SPIRBlock>(dominating_block); + block.dominated_variables.push_back(var.first); + get<SPIRVariable>(var.first).dominator = dominating_block; + } + } + + for (auto &var : handler.accessed_temporaries_to_block) + { + auto itr = handler.result_id_to_type.find(var.first); + + if (itr == end(handler.result_id_to_type)) + { + // We found a false positive ID being used, ignore. + // This should probably be an assert. + continue; + } + + // There is no point in doing domination analysis for opaque types. + auto &type = get<SPIRType>(itr->second); + if (type_is_opaque_value(type)) + continue; + + DominatorBuilder builder(cfg); + bool force_temporary = false; + + // Figure out which block is dominating all accesses of those temporaries. + auto &blocks = var.second; + for (auto &block : blocks) + { + builder.add_block(block); + + // If a temporary is used in more than one block, we might have to lift continue block + // access up to loop header like we did for variables. + if (blocks.size() != 1 && is_continue(block)) + builder.add_block(ir.continue_block_to_loop_header[block]); + else if (blocks.size() != 1 && is_single_block_loop(block)) + { + // Awkward case, because the loop header is also the continue block. + force_temporary = true; + } + } + + uint32_t dominating_block = builder.get_dominator(); + if (dominating_block) + { + // If we touch a variable in the dominating block, this is the expected setup. + // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops. + bool first_use_is_dominator = blocks.count(dominating_block) != 0; + + if (!first_use_is_dominator || force_temporary) + { + // This should be very rare, but if we try to declare a temporary inside a loop, + // and that temporary is used outside the loop as well (spirv-opt inliner likes this) + // we should actually emit the temporary outside the loop. + hoisted_temporaries.insert(var.first); + forced_temporaries.insert(var.first); + + auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary; + block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); + } + else if (blocks.size() > 1) + { + // Keep track of the temporary as we might have to declare this temporary. + // This can happen if the loop header dominates a temporary, but we have a complex fallback loop. + // In this case, the header is actually inside the for (;;) {} block, and we have problems. + // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block + // declares the temporary. + auto &block_temporaries = get<SPIRBlock>(dominating_block).potential_declare_temporary; + block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); + } + } + } + + unordered_set<uint32_t> seen_blocks; + + // Now, try to analyze whether or not these variables are actually loop variables. + for (auto &loop_variable : potential_loop_variables) + { + auto &var = get<SPIRVariable>(loop_variable.first); + auto dominator = var.dominator; + auto block = loop_variable.second; + + // The variable was accessed in multiple continue blocks, ignore. + if (block == ~(0u) || block == 0) + continue; + + // Dead code. + if (dominator == 0) + continue; + + uint32_t header = 0; + + // Find the loop header for this block if we are a continue block. + { + auto itr = ir.continue_block_to_loop_header.find(block); + if (itr != end(ir.continue_block_to_loop_header)) + { + header = itr->second; + } + else if (get<SPIRBlock>(block).continue_block == block) + { + // Also check for self-referential continue block. + header = block; + } + } + + assert(header); + auto &header_block = get<SPIRBlock>(header); + auto &blocks = handler.accessed_variables_to_block[loop_variable.first]; + + // If a loop variable is not used before the loop, it's probably not a loop variable. + bool has_accessed_variable = blocks.count(header) != 0; + + // Now, there are two conditions we need to meet for the variable to be a loop variable. + // 1. The dominating block must have a branch-free path to the loop header, + // this way we statically know which expression should be part of the loop variable initializer. + + // Walk from the dominator, if there is one straight edge connecting + // dominator and loop header, we statically know the loop initializer. + bool static_loop_init = true; + while (dominator != header) + { + if (blocks.count(dominator) != 0) + has_accessed_variable = true; + + auto &succ = cfg.get_succeeding_edges(dominator); + if (succ.size() != 1) + { + static_loop_init = false; + break; + } + + auto &pred = cfg.get_preceding_edges(succ.front()); + if (pred.size() != 1 || pred.front() != dominator) + { + static_loop_init = false; + break; + } + + dominator = succ.front(); + } + + if (!static_loop_init || !has_accessed_variable) + continue; + + // The second condition we need to meet is that no access after the loop + // merge can occur. Walk the CFG to see if we find anything. + + seen_blocks.clear(); + cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) { + // We found a block which accesses the variable outside the loop. + if (blocks.find(walk_block) != end(blocks)) + static_loop_init = false; + }); + + if (!static_loop_init) + continue; + + // We have a loop variable. + header_block.loop_variables.push_back(loop_variable.first); + // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order + // will break reproducability in regression runs. + sort(begin(header_block.loop_variables), end(header_block.loop_variables)); + get<SPIRVariable>(loop_variable.first).loop_variable = true; + } +} + +Bitset Compiler::get_buffer_block_flags(uint32_t id) const +{ + return ir.get_buffer_block_flags(get<SPIRVariable>(id)); +} + +bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type) +{ + if (type.basetype == SPIRType::Struct) + { + base_type = SPIRType::Unknown; + for (auto &member_type : type.member_types) + { + SPIRType::BaseType member_base; + if (!get_common_basic_type(get<SPIRType>(member_type), member_base)) + return false; + + if (base_type == SPIRType::Unknown) + base_type = member_base; + else if (base_type != member_base) + return false; + } + return true; + } + else + { + base_type = type.basetype; + return true; + } +} + +void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin, + const Bitset &decoration_flags) +{ + // If used, we will need to explicitly declare a new array size for these builtins. + + if (builtin == BuiltInClipDistance) + { + if (!type.array_size_literal[0]) + SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal."); + uint32_t array_size = type.array[0]; + if (array_size == 0) + SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized."); + compiler.clip_distance_count = array_size; + } + else if (builtin == BuiltInCullDistance) + { + if (!type.array_size_literal[0]) + SPIRV_CROSS_THROW("Array size for CullDistance must be a literal."); + uint32_t array_size = type.array[0]; + if (array_size == 0) + SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized."); + compiler.cull_distance_count = array_size; + } + else if (builtin == BuiltInPosition) + { + if (decoration_flags.get(DecorationInvariant)) + compiler.position_invariant = true; + } +} + +bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length) +{ + const auto add_if_builtin = [&](uint32_t id) { + // Only handles variables here. + // Builtins which are part of a block are handled in AccessChain. + auto *var = compiler.maybe_get<SPIRVariable>(id); + auto &decorations = compiler.ir.meta[id].decoration; + if (var && decorations.builtin) + { + auto &type = compiler.get<SPIRType>(var->basetype); + auto &flags = + type.storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; + flags.set(decorations.builtin_type); + handle_builtin(type, decorations.builtin_type, decorations.decoration_flags); + } + }; + + switch (opcode) + { + case OpStore: + if (length < 1) + return false; + + add_if_builtin(args[0]); + break; + + case OpCopyMemory: + if (length < 2) + return false; + + add_if_builtin(args[0]); + add_if_builtin(args[1]); + break; + + case OpCopyObject: + case OpLoad: + if (length < 3) + return false; + + add_if_builtin(args[2]); + break; + + case OpSelect: + if (length < 5) + return false; + + add_if_builtin(args[3]); + add_if_builtin(args[4]); + break; + + case OpPhi: + { + if (length < 2) + return false; + + uint32_t count = length - 2; + args += 2; + for (uint32_t i = 0; i < count; i += 2) + add_if_builtin(args[i]); + break; + } + + case OpFunctionCall: + { + if (length < 3) + return false; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + add_if_builtin(args[i]); + break; + } + + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + { + if (length < 4) + return false; + + // Only consider global variables, cannot consider variables in functions yet, or other + // access chains as they have not been created yet. + auto *var = compiler.maybe_get<SPIRVariable>(args[2]); + if (!var) + break; + + // Required if we access chain into builtins like gl_GlobalInvocationID. + add_if_builtin(args[2]); + + // Start traversing type hierarchy at the proper non-pointer types. + auto *type = &compiler.get_variable_data_type(*var); + + auto &flags = + var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; + + uint32_t count = length - 3; + args += 3; + for (uint32_t i = 0; i < count; i++) + { + // Pointers + if (opcode == OpPtrAccessChain && i == 0) + { + type = &compiler.get<SPIRType>(type->parent_type); + continue; + } + + // Arrays + if (!type->array.empty()) + { + type = &compiler.get<SPIRType>(type->parent_type); + } + // Structs + else if (type->basetype == SPIRType::Struct) + { + uint32_t index = compiler.get<SPIRConstant>(args[i]).scalar(); + + if (index < uint32_t(compiler.ir.meta[type->self].members.size())) + { + auto &decorations = compiler.ir.meta[type->self].members[index]; + if (decorations.builtin) + { + flags.set(decorations.builtin_type); + handle_builtin(compiler.get<SPIRType>(type->member_types[index]), decorations.builtin_type, + decorations.decoration_flags); + } + } + + type = &compiler.get<SPIRType>(type->member_types[index]); + } + else + { + // No point in traversing further. We won't find any extra builtins. + break; + } + } + break; + } + + default: + break; + } + + return true; +} + +void Compiler::update_active_builtins() +{ + active_input_builtins.reset(); + active_output_builtins.reset(); + cull_distance_count = 0; + clip_distance_count = 0; + ActiveBuiltinHandler handler(*this); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); +} + +// Returns whether this shader uses a builtin of the storage class +bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) +{ + const Bitset *flags; + switch (storage) + { + case StorageClassInput: + flags = &active_input_builtins; + break; + case StorageClassOutput: + flags = &active_output_builtins; + break; + + default: + return false; + } + return flags->get(builtin); +} + +void Compiler::analyze_image_and_sampler_usage() +{ + CombinedImageSamplerDrefHandler dref_handler(*this); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), dref_handler); + + CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + comparison_ids = move(handler.comparison_ids); + need_subpass_input = handler.need_subpass_input; + + // Forward information from separate images and samplers into combined image samplers. + for (auto &combined : combined_image_samplers) + if (comparison_ids.count(combined.sampler_id)) + comparison_ids.insert(combined.combined_id); +} + +bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t) +{ + // Mark all sampled images which are used with Dref. + switch (opcode) + { + case OpImageSampleDrefExplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageDrefGather: + case OpImageSparseDrefGather: + dref_combined_samplers.insert(args[2]); + return true; + + default: + break; + } + + return true; +} + +void Compiler::build_function_control_flow_graphs_and_analyze() +{ + CFGBuilder handler(*this); + handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get<SPIRFunction>(ir.default_entry_point))); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + function_cfgs = move(handler.function_cfgs); + bool single_function = function_cfgs.size() <= 1; + + for (auto &f : function_cfgs) + { + auto &func = get<SPIRFunction>(f.first); + AnalyzeVariableScopeAccessHandler scope_handler(*this, func); + analyze_variable_scope(func, scope_handler); + find_function_local_luts(func, scope_handler, single_function); + + // Check if we can actually use the loop variables we found in analyze_variable_scope. + // To use multiple initializers, we need the same type and qualifiers. + for (auto block : func.blocks) + { + auto &b = get<SPIRBlock>(block); + if (b.loop_variables.size() < 2) + continue; + + auto &flags = get_decoration_bitset(b.loop_variables.front()); + uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype; + bool invalid_initializers = false; + for (auto loop_variable : b.loop_variables) + { + if (flags != get_decoration_bitset(loop_variable) || + type != get<SPIRVariable>(b.loop_variables.front()).basetype) + { + invalid_initializers = true; + break; + } + } + + if (invalid_initializers) + { + for (auto loop_variable : b.loop_variables) + get<SPIRVariable>(loop_variable).loop_variable = false; + b.loop_variables.clear(); + } + } + } +} + +Compiler::CFGBuilder::CFGBuilder(spirv_cross::Compiler &compiler_) + : compiler(compiler_) +{ +} + +bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t) +{ + return true; +} + +bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) +{ + if (function_cfgs.find(func.self) == end(function_cfgs)) + { + function_cfgs[func.self].reset(new CFG(compiler, func)); + return true; + } + else + return false; +} + +bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + auto &func = compiler.get<SPIRFunction>(args[2]); + const auto *arg = &args[3]; + length -= 3; + + for (uint32_t i = 0; i < length; i++) + { + auto &argument = func.arguments[i]; + dependency_hierarchy[argument.id].insert(arg[i]); + } + + return true; +} + +void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id) +{ + // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids. + comparison_ids.insert(id); + for (auto &dep_id : dependency_hierarchy[id]) + add_hierarchy_to_comparison_ids(dep_id); +} + +bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + switch (opcode) + { + case OpAccessChain: + case OpInBoundsAccessChain: + case OpPtrAccessChain: + case OpLoad: + { + if (length < 3) + return false; + dependency_hierarchy[args[1]].insert(args[2]); + + // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs. + // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord. + auto &type = compiler.get<SPIRType>(args[0]); + if (type.image.dim == DimSubpassData) + need_subpass_input = true; + + // If we load a SampledImage and it will be used with Dref, propagate the state up. + if (dref_combined_samplers.count(args[1]) != 0) + add_hierarchy_to_comparison_ids(args[1]); + break; + } + + case OpSampledImage: + { + if (length < 4) + return false; + + uint32_t result_type = args[0]; + uint32_t result_id = args[1]; + auto &type = compiler.get<SPIRType>(result_type); + if (type.image.depth || dref_combined_samplers.count(result_id) != 0) + { + // This image must be a depth image. + uint32_t image = args[2]; + add_hierarchy_to_comparison_ids(image); + + // This sampler must be a SamplerComparisonState, and not a regular SamplerState. + uint32_t sampler = args[3]; + add_hierarchy_to_comparison_ids(sampler); + + // Mark the OpSampledImage itself as being comparison state. + comparison_ids.insert(result_id); + } + return true; + } + + default: + break; + } + + return true; +} + +bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const +{ + auto *m = ir.find_meta(id); + return m && m->hlsl_is_magic_counter_buffer; +} + +bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const +{ + auto *m = ir.find_meta(id); + + // First, check for the proper decoration. + if (m && m->hlsl_magic_counter_buffer != 0) + { + counter_id = m->hlsl_magic_counter_buffer; + return true; + } + else + return false; +} + +void Compiler::make_constant_null(uint32_t id, uint32_t type) +{ + auto &constant_type = get<SPIRType>(type); + + if (constant_type.pointer) + { + auto &constant = set<SPIRConstant>(id, type); + constant.make_null(constant_type); + } + else if (!constant_type.array.empty()) + { + assert(constant_type.parent_type); + uint32_t parent_id = ir.increase_bound_by(1); + make_constant_null(parent_id, constant_type.parent_type); + + if (!constant_type.array_size_literal.back()) + SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); + + vector<uint32_t> elements(constant_type.array.back()); + for (uint32_t i = 0; i < constant_type.array.back(); i++) + elements[i] = parent_id; + set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false); + } + else if (!constant_type.member_types.empty()) + { + uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size())); + vector<uint32_t> elements(constant_type.member_types.size()); + for (uint32_t i = 0; i < constant_type.member_types.size(); i++) + { + make_constant_null(member_ids + i, constant_type.member_types[i]); + elements[i] = member_ids + i; + } + set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false); + } + else + { + auto &constant = set<SPIRConstant>(id, type); + constant.make_null(constant_type); + } +} + +const std::vector<spv::Capability> &Compiler::get_declared_capabilities() const +{ + return ir.declared_capabilities; +} + +const std::vector<std::string> &Compiler::get_declared_extensions() const +{ + return ir.declared_extensions; +} + +std::string Compiler::get_remapped_declared_block_name(uint32_t id) const +{ + auto itr = declared_block_names.find(id); + if (itr != end(declared_block_names)) + return itr->second; + else + { + auto &var = get<SPIRVariable>(id); + auto &type = get<SPIRType>(var.basetype); + + auto *type_meta = ir.find_meta(type.self); + auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr; + return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name; + } +} + +bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, + uint32_t length) +{ + // Most instructions follow the pattern of <result-type> <result-id> <arguments>. + // There are some exceptions. + switch (op) + { + case OpStore: + case OpCopyMemory: + case OpCopyMemorySized: + case OpImageWrite: + case OpAtomicStore: + case OpAtomicFlagClear: + case OpEmitStreamVertex: + case OpEndStreamPrimitive: + case OpControlBarrier: + case OpMemoryBarrier: + case OpGroupWaitEvents: + case OpRetainEvent: + case OpReleaseEvent: + case OpSetUserEventStatus: + case OpCaptureEventProfilingInfo: + case OpCommitReadPipe: + case OpCommitWritePipe: + case OpGroupCommitReadPipe: + case OpGroupCommitWritePipe: + return false; + + default: + if (length > 1 && maybe_get<SPIRType>(args[0]) != nullptr) + { + result_type = args[0]; + result_id = args[1]; + return true; + } + else + return false; + } +} + +Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const +{ + Bitset flags; + auto *type_meta = ir.find_meta(type.self); + + if (type_meta) + { + auto &memb = type_meta->members; + if (index >= memb.size()) + return flags; + auto &dec = memb[index]; + + // If our type is a struct, traverse all the members as well recursively. + flags.merge_or(dec.decoration_flags); + for (uint32_t i = 0; i < type.member_types.size(); i++) + flags.merge_or(combined_decoration_for_member(get<SPIRType>(type.member_types[i]), i)); + } + + return flags; +} + +bool Compiler::is_desktop_only_format(spv::ImageFormat format) +{ + switch (format) + { + // Desktop-only formats + case ImageFormatR11fG11fB10f: + case ImageFormatR16f: + case ImageFormatRgb10A2: + case ImageFormatR8: + case ImageFormatRg8: + case ImageFormatR16: + case ImageFormatRg16: + case ImageFormatRgba16: + case ImageFormatR16Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRgba16Snorm: + case ImageFormatR8Snorm: + case ImageFormatRg8Snorm: + case ImageFormatR8ui: + case ImageFormatRg8ui: + case ImageFormatR16ui: + case ImageFormatRgb10a2ui: + case ImageFormatR8i: + case ImageFormatRg8i: + case ImageFormatR16i: + return true; + default: + break; + } + + return false; +} + +bool Compiler::image_is_comparison(const spirv_cross::SPIRType &type, uint32_t id) const +{ + return type.image.depth || (comparison_ids.count(id) != 0); +} + +bool Compiler::type_is_opaque_value(const spirv_cross::SPIRType &type) const +{ + return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image || + type.basetype == SPIRType::Sampler); +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp new file mode 100644 index 0000000..4edc836 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp @@ -0,0 +1,969 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_HPP +#define SPIRV_CROSS_HPP + +#include "spirv.hpp" +#include "spirv_cfg.hpp" +#include "spirv_cross_parsed_ir.hpp" + +namespace spirv_cross +{ +struct Resource +{ + // Resources are identified with their SPIR-V ID. + // This is the ID of the OpVariable. + uint32_t id; + + // The type ID of the variable which includes arrays and all type modifications. + // This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general + // since these modifications typically happen on the base_type_id. + uint32_t type_id; + + // The base type of the declared resource. + // This type is the base type which ignores pointers and arrays of the type_id. + // This is mostly useful to parse decorations of the underlying type. + // base_type_id can also be obtained with get_type(get_type(type_id).self). + uint32_t base_type_id; + + // The declared name (OpName) of the resource. + // For Buffer blocks, the name actually reflects the externally + // visible Block name. + // + // This name can be retrieved again by using either + // get_name(id) or get_name(base_type_id) depending if it's a buffer block or not. + // + // This name can be an empty string in which case get_fallback_name(id) can be + // used which obtains a suitable fallback identifier for an ID. + std::string name; +}; + +struct ShaderResources +{ + std::vector<Resource> uniform_buffers; + std::vector<Resource> storage_buffers; + std::vector<Resource> stage_inputs; + std::vector<Resource> stage_outputs; + std::vector<Resource> subpass_inputs; + std::vector<Resource> storage_images; + std::vector<Resource> sampled_images; + std::vector<Resource> atomic_counters; + std::vector<Resource> acceleration_structures; + + // There can only be one push constant block, + // but keep the vector in case this restriction is lifted in the future. + std::vector<Resource> push_constant_buffers; + + // For Vulkan GLSL and HLSL source, + // these correspond to separate texture2D and samplers respectively. + std::vector<Resource> separate_images; + std::vector<Resource> separate_samplers; +}; + +struct CombinedImageSampler +{ + // The ID of the sampler2D variable. + uint32_t combined_id; + // The ID of the texture2D variable. + uint32_t image_id; + // The ID of the sampler variable. + uint32_t sampler_id; +}; + +struct SpecializationConstant +{ + // The ID of the specialization constant. + uint32_t id; + // The constant ID of the constant, used in Vulkan during pipeline creation. + uint32_t constant_id; +}; + +struct BufferRange +{ + unsigned index; + size_t offset; + size_t range; +}; + +enum BufferPackingStandard +{ + BufferPackingStd140, + BufferPackingStd430, + BufferPackingStd140EnhancedLayout, + BufferPackingStd430EnhancedLayout, + BufferPackingHLSLCbuffer, + BufferPackingHLSLCbufferPackOffset +}; + +struct EntryPoint +{ + std::string name; + spv::ExecutionModel execution_model; +}; + +enum ExtendedDecorations +{ + SPIRVCrossDecorationPacked, + SPIRVCrossDecorationPackedType, + SPIRVCrossDecorationInterfaceMemberIndex, + SPIRVCrossDecorationInterfaceOrigID, + SPIRVCrossDecorationArgumentBufferID +}; + +class Compiler +{ +public: + friend class CFG; + friend class DominatorBuilder; + + // The constructor takes a buffer of SPIR-V words and parses it. + // It will create its own parser, parse the SPIR-V and move the parsed IR + // as if you had called the constructors taking ParsedIR directly. + explicit Compiler(std::vector<uint32_t> ir); + Compiler(const uint32_t *ir, size_t word_count); + + // This is more modular. We can also consume a ParsedIR structure directly, either as a move, or copy. + // With copy, we can reuse the same parsed IR for multiple Compiler instances. + explicit Compiler(const ParsedIR &ir); + explicit Compiler(ParsedIR &&ir); + + virtual ~Compiler() = default; + + // After parsing, API users can modify the SPIR-V via reflection and call this + // to disassemble the SPIR-V into the desired langauage. + // Sub-classes actually implement this. + virtual std::string compile(); + + // Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned. + const std::string &get_name(uint32_t id) const; + + // Applies a decoration to an ID. Effectively injects OpDecorate. + void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); + + // Overrides the identifier OpName of an ID. + // Identifiers beginning with underscores or identifiers which contain double underscores + // are reserved by the implementation. + void set_name(uint32_t id, const std::string &name); + + // Gets a bitmask for the decorations which are applied to ID. + // I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar) + const Bitset &get_decoration_bitset(uint32_t id) const; + + // Returns whether the decoration has been applied to the ID. + bool has_decoration(uint32_t id, spv::Decoration decoration) const; + + // Gets the value for decorations which take arguments. + // If the decoration is a boolean (i.e. spv::DecorationNonWritable), + // 1 will be returned. + // If decoration doesn't exist or decoration is not recognized, + // 0 will be returned. + uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; + const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; + + // Removes the decoration for an ID. + void unset_decoration(uint32_t id, spv::Decoration decoration); + + // Gets the SPIR-V type associated with ID. + // Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource. + const SPIRType &get_type(uint32_t id) const; + + // Gets the SPIR-V type of a variable. + const SPIRType &get_type_from_variable(uint32_t id) const; + + // Gets the underlying storage class for an OpVariable. + spv::StorageClass get_storage_class(uint32_t id) const; + + // If get_name() is an empty string, get the fallback name which will be used + // instead in the disassembled source. + virtual const std::string get_fallback_name(uint32_t id) const; + + // If get_name() of a Block struct is an empty string, get the fallback name. + // This needs to be per-variable as multiple variables can use the same block type. + virtual const std::string get_block_fallback_name(uint32_t id) const; + + // Given an OpTypeStruct in ID, obtain the identifier for member number "index". + // This may be an empty string. + const std::string &get_member_name(uint32_t id, uint32_t index) const; + + // Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index". + uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; + + // Sets the member identifier for OpTypeStruct ID, member number "index". + void set_member_name(uint32_t id, uint32_t index, const std::string &name); + + // Returns the qualified member identifier for OpTypeStruct ID, member number "index", + // or an empty string if no qualified alias exists + const std::string &get_member_qualified_name(uint32_t type_id, uint32_t index) const; + + // Gets the decoration mask for a member of a struct, similar to get_decoration_mask. + const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; + + // Returns whether the decoration has been applied to a member of a struct. + bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + + // Similar to set_decoration, but for struct members. + void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + const std::string &argument); + + // Unsets a member decoration, similar to unset_decoration. + void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + + // Gets the fallback name for a member, similar to get_fallback_name. + virtual const std::string get_fallback_member_name(uint32_t index) const + { + return join("_", index); + } + + // Returns a vector of which members of a struct are potentially in use by a + // SPIR-V shader. The granularity of this analysis is per-member of a struct. + // This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks. + // ID is the Resource::id obtained from get_shader_resources(). + std::vector<BufferRange> get_active_buffer_ranges(uint32_t id) const; + + // Returns the effective size of a buffer block. + size_t get_declared_struct_size(const SPIRType &struct_type) const; + + // Returns the effective size of a buffer block, with a given array size + // for a runtime array. + // SSBOs are typically declared as runtime arrays. get_declared_struct_size() will return 0 for the size. + // This is not very helpful for applications which might need to know the array stride of its last member. + // This can be done through the API, but it is not very intuitive how to accomplish this, so here we provide a helper function + // to query the size of the buffer, assuming that the last member has a certain size. + // If the buffer does not contain a runtime array, array_size is ignored, and the function will behave as + // get_declared_struct_size(). + // To get the array stride of the last member, something like: + // get_declared_struct_size_runtime_array(type, 1) - get_declared_struct_size_runtime_array(type, 0) will work. + size_t get_declared_struct_size_runtime_array(const SPIRType &struct_type, size_t array_size) const; + + // Returns the effective size of a buffer block struct member. + virtual size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const; + + // Returns a set of all global variables which are statically accessed + // by the control flow graph from the current entry point. + // Only variables which change the interface for a shader are returned, that is, + // variables with storage class of Input, Output, Uniform, UniformConstant, PushConstant and AtomicCounter + // storage classes are returned. + // + // To use the returned set as the filter for which variables are used during compilation, + // this set can be moved to set_enabled_interface_variables(). + std::unordered_set<uint32_t> get_active_interface_variables() const; + + // Sets the interface variables which are used during compilation. + // By default, all variables are used. + // Once set, compile() will only consider the set in active_variables. + void set_enabled_interface_variables(std::unordered_set<uint32_t> active_variables); + + // Query shader resources, use ids with reflection interface to modify or query binding points, etc. + ShaderResources get_shader_resources() const; + + // Query shader resources, but only return the variables which are part of active_variables. + // E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically + // accessed. + ShaderResources get_shader_resources(const std::unordered_set<uint32_t> &active_variables) const; + + // Remapped variables are considered built-in variables and a backend will + // not emit a declaration for this variable. + // This is mostly useful for making use of builtins which are dependent on extensions. + void set_remapped_variable_state(uint32_t id, bool remap_enable); + bool get_remapped_variable_state(uint32_t id) const; + + // For subpassInput variables which are remapped to plain variables, + // the number of components in the remapped + // variable must be specified as the backing type of subpass inputs are opaque. + void set_subpass_input_remapped_components(uint32_t id, uint32_t components); + uint32_t get_subpass_input_remapped_components(uint32_t id) const; + + // All operations work on the current entry point. + // Entry points can be swapped out with set_entry_point(). + // Entry points should be set right after the constructor completes as some reflection functions traverse the graph from the entry point. + // Resource reflection also depends on the entry point. + // By default, the current entry point is set to the first OpEntryPoint which appears in the SPIR-V module. + + // Some shader languages restrict the names that can be given to entry points, and the + // corresponding backend will automatically rename an entry point name, during the call + // to compile() if it is illegal. For example, the common entry point name main() is + // illegal in MSL, and is renamed to an alternate name by the MSL backend. + // Given the original entry point name contained in the SPIR-V, this function returns + // the name, as updated by the backend during the call to compile(). If the name is not + // illegal, and has not been renamed, or if this function is called before compile(), + // this function will simply return the same name. + + // New variants of entry point query and reflection. + // Names for entry points in the SPIR-V module may alias if they belong to different execution models. + // To disambiguate, we must pass along with the entry point names the execution model. + std::vector<EntryPoint> get_entry_points_and_stages() const; + void set_entry_point(const std::string &entry, spv::ExecutionModel execution_model); + + // Renames an entry point from old_name to new_name. + // If old_name is currently selected as the current entry point, it will continue to be the current entry point, + // albeit with a new name. + // get_entry_points() is essentially invalidated at this point. + void rename_entry_point(const std::string &old_name, const std::string &new_name, + spv::ExecutionModel execution_model); + const SPIREntryPoint &get_entry_point(const std::string &name, spv::ExecutionModel execution_model) const; + SPIREntryPoint &get_entry_point(const std::string &name, spv::ExecutionModel execution_model); + const std::string &get_cleansed_entry_point_name(const std::string &name, + spv::ExecutionModel execution_model) const; + + // Query and modify OpExecutionMode. + const Bitset &get_execution_mode_bitset() const; + + void unset_execution_mode(spv::ExecutionMode mode); + void set_execution_mode(spv::ExecutionMode mode, uint32_t arg0 = 0, uint32_t arg1 = 0, uint32_t arg2 = 0); + + // Gets argument for an execution mode (LocalSize, Invocations, OutputVertices). + // For LocalSize, the index argument is used to select the dimension (X = 0, Y = 1, Z = 2). + // For execution modes which do not have arguments, 0 is returned. + uint32_t get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index = 0) const; + spv::ExecutionModel get_execution_model() const; + + bool is_tessellation_shader() const; + + // In SPIR-V, the compute work group size can be represented by a constant vector, in which case + // the LocalSize execution mode is ignored. + // + // This constant vector can be a constant vector, specialization constant vector, or partly specialized constant vector. + // To modify and query work group dimensions which are specialization constants, SPIRConstant values must be modified + // directly via get_constant() rather than using LocalSize directly. This function will return which constants should be modified. + // + // To modify dimensions which are *not* specialization constants, set_execution_mode should be used directly. + // Arguments to set_execution_mode which are specialization constants are effectively ignored during compilation. + // NOTE: This is somewhat different from how SPIR-V works. In SPIR-V, the constant vector will completely replace LocalSize, + // while in this interface, LocalSize is only ignored for specialization constants. + // + // The specialization constant will be written to x, y and z arguments. + // If the component is not a specialization constant, a zeroed out struct will be written. + // The return value is the constant ID of the builtin WorkGroupSize, but this is not expected to be useful + // for most use cases. + uint32_t get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y, + SpecializationConstant &z) const; + + // Analyzes all OpImageFetch (texelFetch) opcodes and checks if there are instances where + // said instruction is used without a combined image sampler. + // GLSL targets do not support the use of texelFetch without a sampler. + // To workaround this, we must inject a dummy sampler which can be used to form a sampler2D at the call-site of + // texelFetch as necessary. + // + // This must be called before build_combined_image_samplers(). + // build_combined_image_samplers() may refer to the ID returned by this method if the returned ID is non-zero. + // The return value will be the ID of a sampler object if a dummy sampler is necessary, or 0 if no sampler object + // is required. + // + // If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile(). + // Calling this function also invalidates get_active_interface_variables(), so this should be called + // before that function. + uint32_t build_dummy_sampler_for_combined_images(); + + // Analyzes all separate image and samplers used from the currently selected entry point, + // and re-routes them all to a combined image sampler instead. + // This is required to "support" separate image samplers in targets which do not natively support + // this feature, like GLSL/ESSL. + // + // This must be called before compile() if such remapping is desired. + // This call will add new sampled images to the SPIR-V, + // so it will appear in reflection if get_shader_resources() is called after build_combined_image_samplers. + // + // If any image/sampler remapping was found, no separate image/samplers will appear in the decompiled output, + // but will still appear in reflection. + // + // The resulting samplers will be void of any decorations like name, descriptor sets and binding points, + // so this can be added before compile() if desired. + // + // Combined image samplers originating from this set are always considered active variables. + // Arrays of separate samplers are not supported, but arrays of separate images are supported. + // Array of images + sampler -> Array of combined image samplers. + void build_combined_image_samplers(); + + // Gets a remapping for the combined image samplers. + const std::vector<CombinedImageSampler> &get_combined_image_samplers() const + { + return combined_image_samplers; + } + + // Set a new variable type remap callback. + // The type remapping is designed to allow global interface variable to assume more special types. + // A typical example here is to remap sampler2D into samplerExternalOES, which currently isn't supported + // directly by SPIR-V. + // + // In compile() while emitting code, + // for every variable that is declared, including function parameters, the callback will be called + // and the API user has a chance to change the textual representation of the type used to declare the variable. + // The API user can detect special patterns in names to guide the remapping. + void set_variable_type_remap_callback(VariableTypeRemapCallback cb) + { + variable_remap_callback = std::move(cb); + } + + // API for querying which specialization constants exist. + // To modify a specialization constant before compile(), use get_constant(constant.id), + // then update constants directly in the SPIRConstant data structure. + // For composite types, the subconstants can be iterated over and modified. + // constant_type is the SPIRType for the specialization constant, + // which can be queried to determine which fields in the unions should be poked at. + std::vector<SpecializationConstant> get_specialization_constants() const; + SPIRConstant &get_constant(uint32_t id); + const SPIRConstant &get_constant(uint32_t id) const; + + uint32_t get_current_id_bound() const + { + return uint32_t(ir.ids.size()); + } + + // API for querying buffer objects. + // The type passed in here should be the base type of a resource, i.e. + // get_type(resource.base_type_id) + // as decorations are set in the basic Block type. + // The type passed in here must have these decorations set, or an exception is raised. + // Only UBOs and SSBOs or sub-structs which are part of these buffer types will have these decorations set. + uint32_t type_struct_member_offset(const SPIRType &type, uint32_t index) const; + uint32_t type_struct_member_array_stride(const SPIRType &type, uint32_t index) const; + uint32_t type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const; + + // Gets the offset in SPIR-V words (uint32_t) for a decoration which was originally declared in the SPIR-V binary. + // The offset will point to one or more uint32_t literals which can be modified in-place before using the SPIR-V binary. + // Note that adding or removing decorations using the reflection API will not change the behavior of this function. + // If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true, + // otherwise, returns false. + // If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false. + bool get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const; + + // HLSL counter buffer reflection interface. + // Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where + // one buffer implements the storage, and a single buffer containing just a lone "int" implements the counter. + // To SPIR-V these will be exposed as two separate buffers, but glslang HLSL frontend emits a special indentifier + // which lets us link the two buffers together. + + // Queries if a variable ID is a counter buffer which "belongs" to a regular buffer object. + + // If SPV_GOOGLE_hlsl_functionality1 is used, this can be used even with a stripped SPIR-V module. + // Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will + // only return true if OpSource was reported HLSL. + // To rely on this functionality, ensure that the SPIR-V module is not stripped. + + bool buffer_is_hlsl_counter_buffer(uint32_t id) const; + + // Queries if a buffer object has a neighbor "counter" buffer. + // If so, the ID of that counter buffer will be returned in counter_id. + // If SPV_GOOGLE_hlsl_functionality1 is used, this can be used even with a stripped SPIR-V module. + // Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will + // only return true if OpSource was reported HLSL. + // To rely on this functionality, ensure that the SPIR-V module is not stripped. + bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const; + + // Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module. + const std::vector<spv::Capability> &get_declared_capabilities() const; + + // Gets the list of all SPIR-V extensions which were declared in the SPIR-V module. + const std::vector<std::string> &get_declared_extensions() const; + + // When declaring buffer blocks in GLSL, the name declared in the GLSL source + // might not be the same as the name declared in the SPIR-V module due to naming conflicts. + // In this case, SPIRV-Cross needs to find a fallback-name, and it might only + // be possible to know this name after compiling to GLSL. + // This is particularly important for HLSL input and UAVs which tends to reuse the same block type + // for multiple distinct blocks. For these cases it is not possible to modify the name of the type itself + // because it might be unique. Instead, you can use this interface to check after compilation which + // name was actually used if your input SPIR-V tends to have this problem. + // For other names like remapped names for variables, etc, it's generally enough to query the name of the variables + // after compiling, block names are an exception to this rule. + // ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type. + // + // This also applies to HLSL cbuffers. + std::string get_remapped_declared_block_name(uint32_t id) const; + + // For buffer block variables, get the decorations for that variable. + // Sometimes, decorations for buffer blocks are found in member decorations instead + // of direct decorations on the variable itself. + // The most common use here is to check if a buffer is readonly or writeonly. + Bitset get_buffer_block_flags(uint32_t id) const; + +protected: + const uint32_t *stream(const Instruction &instr) const + { + // If we're not going to use any arguments, just return nullptr. + // We want to avoid case where we return an out of range pointer + // that trips debug assertions on some platforms. + if (!instr.length) + return nullptr; + + if (instr.offset + instr.length > ir.spirv.size()) + SPIRV_CROSS_THROW("Compiler::stream() out of range."); + return &ir.spirv[instr.offset]; + } + + ParsedIR ir; + // Marks variables which have global scope and variables which can alias with other variables + // (SSBO, image load store, etc) + std::vector<uint32_t> global_variables; + std::vector<uint32_t> aliased_variables; + + SPIRFunction *current_function = nullptr; + SPIRBlock *current_block = nullptr; + std::unordered_set<uint32_t> active_interface_variables; + bool check_active_interface_variables = false; + + // If our IDs are out of range here as part of opcodes, throw instead of + // undefined behavior. + template <typename T, typename... P> + T &set(uint32_t id, P &&... args) + { + ir.add_typed_id(static_cast<Types>(T::type), id); + auto &var = variant_set<T>(ir.ids[id], std::forward<P>(args)...); + var.self = id; + return var; + } + + template <typename T> + T &get(uint32_t id) + { + return variant_get<T>(ir.ids[id]); + } + + template <typename T> + T *maybe_get(uint32_t id) + { + if (id >= ir.ids.size()) + return nullptr; + else if (ir.ids[id].get_type() == static_cast<Types>(T::type)) + return &get<T>(id); + else + return nullptr; + } + + template <typename T> + const T &get(uint32_t id) const + { + return variant_get<T>(ir.ids[id]); + } + + template <typename T> + const T *maybe_get(uint32_t id) const + { + if (ir.ids[id].get_type() == static_cast<Types>(T::type)) + return &get<T>(id); + else + return nullptr; + } + + // Gets the id of SPIR-V type underlying the given type_id, which might be a pointer. + uint32_t get_pointee_type_id(uint32_t type_id) const; + + // Gets the SPIR-V type underlying the given type, which might be a pointer. + const SPIRType &get_pointee_type(const SPIRType &type) const; + + // Gets the SPIR-V type underlying the given type_id, which might be a pointer. + const SPIRType &get_pointee_type(uint32_t type_id) const; + + // Gets the ID of the SPIR-V type underlying a variable. + uint32_t get_variable_data_type_id(const SPIRVariable &var) const; + + // Gets the SPIR-V type underlying a variable. + SPIRType &get_variable_data_type(const SPIRVariable &var); + + // Gets the SPIR-V type underlying a variable. + const SPIRType &get_variable_data_type(const SPIRVariable &var) const; + + // Gets the SPIR-V element type underlying an array variable. + SPIRType &get_variable_element_type(const SPIRVariable &var); + + // Gets the SPIR-V element type underlying an array variable. + const SPIRType &get_variable_element_type(const SPIRVariable &var) const; + + // Sets the qualified member identifier for OpTypeStruct ID, member number "index". + void set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name); + void set_qualified_name(uint32_t id, const std::string &name); + + // Returns if the given type refers to a sampled image. + bool is_sampled_image_type(const SPIRType &type); + + const SPIREntryPoint &get_entry_point() const; + SPIREntryPoint &get_entry_point(); + static bool is_tessellation_shader(spv::ExecutionModel model); + + virtual std::string to_name(uint32_t id, bool allow_alias = true) const; + bool is_builtin_variable(const SPIRVariable &var) const; + bool is_builtin_type(const SPIRType &type) const; + bool is_hidden_variable(const SPIRVariable &var, bool include_builtins = false) const; + bool is_immutable(uint32_t id) const; + bool is_member_builtin(const SPIRType &type, uint32_t index, spv::BuiltIn *builtin) const; + bool is_scalar(const SPIRType &type) const; + bool is_vector(const SPIRType &type) const; + bool is_matrix(const SPIRType &type) const; + bool is_array(const SPIRType &type) const; + uint32_t expression_type_id(uint32_t id) const; + const SPIRType &expression_type(uint32_t id) const; + bool expression_is_lvalue(uint32_t id) const; + bool variable_storage_is_aliased(const SPIRVariable &var); + SPIRVariable *maybe_get_backing_variable(uint32_t chain); + + void register_read(uint32_t expr, uint32_t chain, bool forwarded); + void register_write(uint32_t chain); + + inline bool is_continue(uint32_t next) const + { + return (ir.block_meta[next] & ParsedIR::BLOCK_META_CONTINUE_BIT) != 0; + } + + inline bool is_single_block_loop(uint32_t next) const + { + auto &block = get<SPIRBlock>(next); + return block.merge == SPIRBlock::MergeLoop && block.continue_block == next; + } + + inline bool is_break(uint32_t next) const + { + return (ir.block_meta[next] & + (ParsedIR::BLOCK_META_LOOP_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT)) != 0; + } + + inline bool is_loop_break(uint32_t next) const + { + return (ir.block_meta[next] & ParsedIR::BLOCK_META_LOOP_MERGE_BIT) != 0; + } + + inline bool is_conditional(uint32_t next) const + { + return (ir.block_meta[next] & + (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT)) != 0; + } + + // Dependency tracking for temporaries read from variables. + void flush_dependees(SPIRVariable &var); + void flush_all_active_variables(); + void flush_control_dependent_expressions(uint32_t block); + void flush_all_atomic_capable_variables(); + void flush_all_aliased_variables(); + void register_global_read_dependencies(const SPIRBlock &func, uint32_t id); + void register_global_read_dependencies(const SPIRFunction &func, uint32_t id); + std::unordered_set<uint32_t> invalid_expressions; + + void update_name_cache(std::unordered_set<std::string> &cache, std::string &name); + + // A variant which takes two sets of names. The secondary is only used to verify there are no collisions, + // but the set is not updated when we have found a new name. + // Used primarily when adding block interface names. + void update_name_cache(std::unordered_set<std::string> &cache_primary, + const std::unordered_set<std::string> &cache_secondary, std::string &name); + + bool function_is_pure(const SPIRFunction &func); + bool block_is_pure(const SPIRBlock &block); + bool block_is_outside_flow_control_from_block(const SPIRBlock &from, const SPIRBlock &to); + + bool execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const; + bool execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const; + SPIRBlock::ContinueBlockType continue_block_type(const SPIRBlock &continue_block) const; + + bool force_recompile = false; + + bool block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const; + + bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; + void inherit_expression_dependencies(uint32_t dst, uint32_t source); + void add_implied_read_expression(SPIRExpression &e, uint32_t source); + void add_implied_read_expression(SPIRAccessChain &e, uint32_t source); + + // For proper multiple entry point support, allow querying if an Input or Output + // variable is part of that entry points interface. + bool interface_variable_exists_in_entry_point(uint32_t id) const; + + std::vector<CombinedImageSampler> combined_image_samplers; + + void remap_variable_type_name(const SPIRType &type, const std::string &var_name, std::string &type_name) const + { + if (variable_remap_callback) + variable_remap_callback(type, var_name, type_name); + } + + void set_ir(const ParsedIR &parsed); + void set_ir(ParsedIR &&parsed); + void parse_fixup(); + + // Used internally to implement various traversals for queries. + struct OpcodeHandler + { + virtual ~OpcodeHandler() = default; + + // Return true if traversal should continue. + // If false, traversal will end immediately. + virtual bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) = 0; + + virtual bool follow_function_call(const SPIRFunction &) + { + return true; + } + + virtual void set_current_block(const SPIRBlock &) + { + } + + virtual bool begin_function_scope(const uint32_t *, uint32_t) + { + return true; + } + + virtual bool end_function_scope(const uint32_t *, uint32_t) + { + return true; + } + }; + + struct BufferAccessHandler : OpcodeHandler + { + BufferAccessHandler(const Compiler &compiler_, std::vector<BufferRange> &ranges_, uint32_t id_) + : compiler(compiler_) + , ranges(ranges_) + , id(id_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + const Compiler &compiler; + std::vector<BufferRange> &ranges; + uint32_t id; + + std::unordered_set<uint32_t> seen; + }; + + struct InterfaceVariableAccessHandler : OpcodeHandler + { + InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set<uint32_t> &variables_) + : compiler(compiler_) + , variables(variables_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + const Compiler &compiler; + std::unordered_set<uint32_t> &variables; + }; + + struct CombinedImageSamplerHandler : OpcodeHandler + { + CombinedImageSamplerHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + + // Each function in the call stack needs its own remapping for parameters so we can deduce which global variable each texture/sampler the parameter is statically bound to. + std::stack<std::unordered_map<uint32_t, uint32_t>> parameter_remapping; + std::stack<SPIRFunction *> functions; + + uint32_t remap_parameter(uint32_t id); + void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length); + void pop_remap_parameters(); + void register_combined_image_sampler(SPIRFunction &caller, uint32_t texture_id, uint32_t sampler_id, + bool depth); + }; + + struct DummySamplerForCombinedImageHandler : OpcodeHandler + { + DummySamplerForCombinedImageHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + bool need_dummy_sampler = false; + }; + + struct ActiveBuiltinHandler : OpcodeHandler + { + ActiveBuiltinHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + + void handle_builtin(const SPIRType &type, spv::BuiltIn builtin, const Bitset &decoration_flags); + }; + + bool traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const; + bool traverse_all_reachable_opcodes(const SPIRFunction &block, OpcodeHandler &handler) const; + // This must be an ordered data structure so we always pick the same type aliases. + std::vector<uint32_t> global_struct_cache; + + ShaderResources get_shader_resources(const std::unordered_set<uint32_t> *active_variables) const; + + VariableTypeRemapCallback variable_remap_callback; + + bool get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type); + + std::unordered_set<uint32_t> forced_temporaries; + std::unordered_set<uint32_t> forwarded_temporaries; + std::unordered_set<uint32_t> hoisted_temporaries; + + Bitset active_input_builtins; + Bitset active_output_builtins; + uint32_t clip_distance_count = 0; + uint32_t cull_distance_count = 0; + bool position_invariant = false; + + // Traverses all reachable opcodes and sets active_builtins to a bitmask of all builtin variables which are accessed in the shader. + void update_active_builtins(); + bool has_active_builtin(spv::BuiltIn builtin, spv::StorageClass storage); + + void analyze_parameter_preservation( + SPIRFunction &entry, const CFG &cfg, + const std::unordered_map<uint32_t, std::unordered_set<uint32_t>> &variable_to_blocks, + const std::unordered_map<uint32_t, std::unordered_set<uint32_t>> &complete_write_blocks); + + // If a variable ID or parameter ID is found in this set, a sampler is actually a shadow/comparison sampler. + // SPIR-V does not support this distinction, so we must keep track of this information outside the type system. + // There might be unrelated IDs found in this set which do not correspond to actual variables. + // This set should only be queried for the existence of samplers which are already known to be variables or parameter IDs. + // Similar is implemented for images, as well as if subpass inputs are needed. + std::unordered_set<uint32_t> comparison_ids; + bool need_subpass_input = false; + + // In certain backends, we will need to use a dummy sampler to be able to emit code. + // GLSL does not support texelFetch on texture2D objects, but SPIR-V does, + // so we need to workaround by having the application inject a dummy sampler. + uint32_t dummy_sampler_id = 0; + + void analyze_image_and_sampler_usage(); + + struct CombinedImageSamplerDrefHandler : OpcodeHandler + { + CombinedImageSamplerDrefHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + std::unordered_set<uint32_t> dref_combined_samplers; + }; + + struct CombinedImageSamplerUsageHandler : OpcodeHandler + { + CombinedImageSamplerUsageHandler(Compiler &compiler_, + const std::unordered_set<uint32_t> &dref_combined_samplers_) + : compiler(compiler_) + , dref_combined_samplers(dref_combined_samplers_) + { + } + + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + const std::unordered_set<uint32_t> &dref_combined_samplers; + + std::unordered_map<uint32_t, std::unordered_set<uint32_t>> dependency_hierarchy; + std::unordered_set<uint32_t> comparison_ids; + + void add_hierarchy_to_comparison_ids(uint32_t ids); + bool need_subpass_input = false; + }; + + void build_function_control_flow_graphs_and_analyze(); + std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs; + struct CFGBuilder : OpcodeHandler + { + CFGBuilder(Compiler &compiler_); + + bool follow_function_call(const SPIRFunction &func) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + Compiler &compiler; + std::unordered_map<uint32_t, std::unique_ptr<CFG>> function_cfgs; + }; + + struct AnalyzeVariableScopeAccessHandler : OpcodeHandler + { + AnalyzeVariableScopeAccessHandler(Compiler &compiler_, SPIRFunction &entry_); + + bool follow_function_call(const SPIRFunction &) override; + void set_current_block(const SPIRBlock &block) override; + + void notify_variable_access(uint32_t id, uint32_t block); + bool id_is_phi_variable(uint32_t id) const; + bool id_is_potential_temporary(uint32_t id) const; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + SPIRFunction &entry; + std::unordered_map<uint32_t, std::unordered_set<uint32_t>> accessed_variables_to_block; + std::unordered_map<uint32_t, std::unordered_set<uint32_t>> accessed_temporaries_to_block; + std::unordered_map<uint32_t, uint32_t> result_id_to_type; + std::unordered_map<uint32_t, std::unordered_set<uint32_t>> complete_write_variables_to_block; + std::unordered_map<uint32_t, std::unordered_set<uint32_t>> partial_write_variables_to_block; + const SPIRBlock *current_block = nullptr; + }; + + struct StaticExpressionAccessHandler : OpcodeHandler + { + StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_); + bool follow_function_call(const SPIRFunction &) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t variable_id; + uint32_t static_expression = 0; + uint32_t write_count = 0; + }; + + void analyze_variable_scope(SPIRFunction &function, AnalyzeVariableScopeAccessHandler &handler); + void find_function_local_luts(SPIRFunction &function, const AnalyzeVariableScopeAccessHandler &handler, + bool single_function); + + void make_constant_null(uint32_t id, uint32_t type); + + std::unordered_map<uint32_t, std::string> declared_block_names; + + bool instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, + uint32_t length); + + Bitset combined_decoration_for_member(const SPIRType &type, uint32_t index) const; + static bool is_desktop_only_format(spv::ImageFormat format); + + bool image_is_comparison(const SPIRType &type, uint32_t id) const; + + void set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value = 0); + uint32_t get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const; + bool has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const; + void unset_extended_decoration(uint32_t id, ExtendedDecorations decoration); + + void set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, + uint32_t value = 0); + uint32_t get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const; + bool has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const; + void unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration); + +private: + // Used only to implement the old deprecated get_entry_point() interface. + const SPIREntryPoint &get_first_entry_point(const std::string &name) const; + SPIREntryPoint &get_first_entry_point(const std::string &name); + + void fixup_type_alias(); + bool type_is_block_like(const SPIRType &type) const; + bool type_is_opaque_value(const SPIRType &type) const; +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp new file mode 100644 index 0000000..f41d216 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp @@ -0,0 +1,1864 @@ +/* + * Copyright 2019 Hans-Kristian Arntzen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross_c.h" + +#if SPIRV_CROSS_C_API_CPP +#include "spirv_cpp.hpp" +#endif +#if SPIRV_CROSS_C_API_GLSL +#include "spirv_glsl.hpp" +#else +#include "spirv_cross.hpp" +#endif +#if SPIRV_CROSS_C_API_HLSL +#include "spirv_hlsl.hpp" +#endif +#if SPIRV_CROSS_C_API_MSL +#include "spirv_msl.hpp" +#endif +#if SPIRV_CROSS_C_API_REFLECT +#include "spirv_reflect.hpp" +#endif +#include "spirv_parser.hpp" +#include <string.h> +#include <memory> +#include <new> + +// clang-format off + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#define SPVC_BEGIN_SAFE_SCOPE try +#else +#define SPVC_BEGIN_SAFE_SCOPE +#endif + +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#define SPVC_END_SAFE_SCOPE(context, error) \ + catch (const std::exception &e) \ + { \ + (context)->report_error(e.what()); \ + return (error); \ + } +#else +#define SPVC_END_SAFE_SCOPE(context, error) +#endif + +using namespace std; +using namespace spirv_cross; + +struct ScratchMemoryAllocation +{ + virtual ~ScratchMemoryAllocation() = default; +}; + +struct StringAllocation : ScratchMemoryAllocation +{ + explicit StringAllocation(const char *name) + : str(name) + { + } + + explicit StringAllocation(std::string name) + : str(std::move(name)) + { + } + + std::string str; +}; + +template <typename T> +struct TemporaryBuffer : ScratchMemoryAllocation +{ + std::vector<T> buffer; +}; + +template <typename T, typename... Ts> +static inline std::unique_ptr<T> spvc_allocate(Ts &&... ts) +{ + return std::unique_ptr<T>(new T(std::forward<Ts>(ts)...)); +} + +struct spvc_context_s +{ + string last_error; + vector<unique_ptr<ScratchMemoryAllocation>> allocations; + const char *allocate_name(const std::string &name); + + spvc_error_callback callback = nullptr; + void *callback_userdata = nullptr; + void report_error(std::string msg); +}; + +void spvc_context_s::report_error(std::string msg) +{ + last_error = std::move(msg); + if (callback) + callback(callback_userdata, last_error.c_str()); +} + +const char *spvc_context_s::allocate_name(const std::string &name) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto alloc = spvc_allocate<StringAllocation>(name); + auto *ret = alloc->str.c_str(); + allocations.emplace_back(std::move(alloc)); + return ret; + } + SPVC_END_SAFE_SCOPE(this, nullptr) +} + +struct spvc_parsed_ir_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + ParsedIR parsed; +}; + +struct spvc_compiler_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + unique_ptr<Compiler> compiler; + spvc_backend backend = SPVC_BACKEND_NONE; +}; + +struct spvc_compiler_options_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + uint32_t backend_flags = 0; +#if SPIRV_CROSS_C_API_GLSL + CompilerGLSL::Options glsl; +#endif +#if SPIRV_CROSS_C_API_MSL + CompilerMSL::Options msl; +#endif +#if SPIRV_CROSS_C_API_HLSL + CompilerHLSL::Options hlsl; +#endif +}; + +struct spvc_set_s : ScratchMemoryAllocation +{ + std::unordered_set<uint32_t> set; +}; + +// Dummy-inherit to we can keep our opaque type handle type safe in C-land as well, +// and avoid just throwing void * around. +struct spvc_type_s : SPIRType +{ +}; + +struct spvc_constant_s : SPIRConstant +{ +}; + +struct spvc_resources_s : ScratchMemoryAllocation +{ + spvc_context context = nullptr; + std::vector<spvc_reflected_resource> uniform_buffers; + std::vector<spvc_reflected_resource> storage_buffers; + std::vector<spvc_reflected_resource> stage_inputs; + std::vector<spvc_reflected_resource> stage_outputs; + std::vector<spvc_reflected_resource> subpass_inputs; + std::vector<spvc_reflected_resource> storage_images; + std::vector<spvc_reflected_resource> sampled_images; + std::vector<spvc_reflected_resource> atomic_counters; + std::vector<spvc_reflected_resource> push_constant_buffers; + std::vector<spvc_reflected_resource> separate_images; + std::vector<spvc_reflected_resource> separate_samplers; + std::vector<spvc_reflected_resource> acceleration_structures; + + bool copy_resources(std::vector<spvc_reflected_resource> &outputs, const std::vector<Resource> &inputs); + bool copy_resources(const ShaderResources &resources); +}; + +spvc_result spvc_context_create(spvc_context *context) +{ + auto *ctx = new (std::nothrow) spvc_context_s; + if (!ctx) + return SPVC_ERROR_OUT_OF_MEMORY; + + *context = ctx; + return SPVC_SUCCESS; +} + +void spvc_context_destroy(spvc_context context) +{ + delete context; +} + +void spvc_context_release_allocations(spvc_context context) +{ + context->allocations.clear(); +} + +const char *spvc_context_get_last_error_string(spvc_context context) +{ + return context->last_error.c_str(); +} + +SPVC_PUBLIC_API void spvc_context_set_error_callback(spvc_context context, spvc_error_callback cb, void *userdata) +{ + context->callback = cb; + context->callback_userdata = userdata; +} + +spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, size_t word_count, + spvc_parsed_ir *parsed_ir) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr<spvc_parsed_ir_s> pir(new (std::nothrow) spvc_parsed_ir_s); + if (!pir) + { + context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + pir->context = context; + Parser parser(spirv, word_count); + parser.parse(); + pir->parsed = move(parser.get_parsed_ir()); + *parsed_ir = pir.get(); + context->allocations.push_back(std::move(pir)); + } + SPVC_END_SAFE_SCOPE(context, SPVC_ERROR_INVALID_SPIRV) + return SPVC_SUCCESS; +} + +spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend backend, spvc_parsed_ir parsed_ir, + spvc_capture_mode mode, spvc_compiler *compiler) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr<spvc_compiler_s> comp(new (std::nothrow) spvc_compiler_s); + if (!comp) + { + context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + comp->backend = backend; + comp->context = context; + + if (mode != SPVC_CAPTURE_MODE_COPY && mode != SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + { + context->report_error("Invalid argument for capture mode."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + switch (backend) + { + case SPVC_BACKEND_NONE: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new Compiler(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new Compiler(parsed_ir->parsed)); + break; + +#if SPIRV_CROSS_C_API_GLSL + case SPVC_BACKEND_GLSL: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerGLSL(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerGLSL(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_BACKEND_HLSL: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerHLSL(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerHLSL(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_MSL + case SPVC_BACKEND_MSL: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerMSL(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerMSL(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_CPP + case SPVC_BACKEND_CPP: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerCPP(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerCPP(parsed_ir->parsed)); + break; +#endif + +#if SPIRV_CROSS_C_API_REFLECT + case SPVC_BACKEND_JSON: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerReflection(move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerReflection(parsed_ir->parsed)); + break; +#endif + + default: + context->report_error("Invalid backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + *compiler = comp.get(); + context->allocations.push_back(std::move(comp)); + } + SPVC_END_SAFE_SCOPE(context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_create_compiler_options(spvc_compiler compiler, spvc_compiler_options *options) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr<spvc_compiler_options_s> opt(new (std::nothrow) spvc_compiler_options_s); + if (!opt) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + opt->context = compiler->context; + opt->backend_flags = 0; + switch (compiler->backend) + { +#if SPIRV_CROSS_C_API_MSL + case SPVC_BACKEND_MSL: + opt->backend_flags |= SPVC_COMPILER_OPTION_MSL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; + opt->glsl = static_cast<CompilerMSL *>(compiler->compiler.get())->get_common_options(); + opt->msl = static_cast<CompilerMSL *>(compiler->compiler.get())->get_msl_options(); + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_BACKEND_HLSL: + opt->backend_flags |= SPVC_COMPILER_OPTION_HLSL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; + opt->glsl = static_cast<CompilerHLSL *>(compiler->compiler.get())->get_common_options(); + opt->hlsl = static_cast<CompilerHLSL *>(compiler->compiler.get())->get_hlsl_options(); + break; +#endif + +#if SPIRV_CROSS_C_API_GLSL + case SPVC_BACKEND_GLSL: + opt->backend_flags |= SPVC_COMPILER_OPTION_GLSL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; + opt->glsl = static_cast<CompilerGLSL *>(compiler->compiler.get())->get_common_options(); + break; +#endif + + default: + break; + } + + *options = opt.get(); + compiler->context->allocations.push_back(std::move(opt)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_options_set_bool(spvc_compiler_options options, spvc_compiler_option option, + spvc_bool value) +{ + return spvc_compiler_options_set_uint(options, option, value ? 1 : 0); +} + +spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_compiler_option option, unsigned value) +{ + (void)value; + (void)option; + uint32_t supported_mask = options->backend_flags; + uint32_t required_mask = option & SPVC_COMPILER_OPTION_LANG_BITS; + if ((required_mask | supported_mask) != supported_mask) + { + options->context->report_error("Option is not supported by current backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + switch (option) + { +#if SPIRV_CROSS_C_API_GLSL + case SPVC_COMPILER_OPTION_FORCE_TEMPORARY: + options->glsl.force_temporary = value != 0; + break; + case SPVC_COMPILER_OPTION_FLATTEN_MULTIDIMENSIONAL_ARRAYS: + options->glsl.flatten_multidimensional_arrays = value != 0; + break; + case SPVC_COMPILER_OPTION_FIXUP_DEPTH_CONVENTION: + options->glsl.vertex.fixup_clipspace = value != 0; + break; + case SPVC_COMPILER_OPTION_FLIP_VERTEX_Y: + options->glsl.vertex.flip_vert_y = value != 0; + break; + + case SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE: + options->glsl.vertex.support_nonzero_base_instance = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_SEPARATE_SHADER_OBJECTS: + options->glsl.separate_shader_objects = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION: + options->glsl.enable_420pack_extension = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_VERSION: + options->glsl.version = value; + break; + case SPVC_COMPILER_OPTION_GLSL_ES: + options->glsl.es = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS: + options->glsl.vulkan_semantics = value != 0; + break; + case SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_FLOAT_PRECISION_HIGHP: + options->glsl.fragment.default_float_precision = + value != 0 ? CompilerGLSL::Options::Precision::Highp : CompilerGLSL::Options::Precision::Mediump; + break; + case SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_INT_PRECISION_HIGHP: + options->glsl.fragment.default_int_precision = + value != 0 ? CompilerGLSL::Options::Precision::Highp : CompilerGLSL::Options::Precision::Mediump; + break; + case SPVC_COMPILER_OPTION_GLSL_EMIT_PUSH_CONSTANT_AS_UNIFORM_BUFFER: + options->glsl.emit_push_constant_as_uniform_buffer = value != 0; + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL: + options->hlsl.shader_model = value; + break; + + case SPVC_COMPILER_OPTION_HLSL_POINT_SIZE_COMPAT: + options->hlsl.point_size_compat = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_POINT_COORD_COMPAT: + options->hlsl.point_coord_compat = value != 0; + break; + + case SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE: + options->hlsl.support_nonzero_base_vertex_base_instance = value != 0; + break; +#endif + +#if SPIRV_CROSS_C_API_MSL + case SPVC_COMPILER_OPTION_MSL_VERSION: + options->msl.msl_version = value; + break; + + case SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH: + options->msl.texel_buffer_texture_width = value; + break; + + case SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX: + options->msl.aux_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX: + options->msl.indirect_params_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX: + options->msl.shader_output_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX: + options->msl.shader_patch_output_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_TESS_FACTOR_OUTPUT_BUFFER_INDEX: + options->msl.shader_tess_factor_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_WORKGROUP_INDEX: + options->msl.shader_input_wg_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_BUILTIN: + options->msl.enable_point_size_builtin = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DISABLE_RASTERIZATION: + options->msl.disable_rasterization = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_CAPTURE_OUTPUT_TO_BUFFER: + options->msl.capture_output_to_buffer = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_SWIZZLE_TEXTURE_SAMPLES: + options->msl.swizzle_texture_samples = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS: + options->msl.pad_fragment_output_components = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_TESS_DOMAIN_ORIGIN_LOWER_LEFT: + options->msl.tess_domain_origin_lower_left = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_PLATFORM: + options->msl.platform = static_cast<CompilerMSL::Options::Platform>(value); + break; + + case SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS: + options->msl.argument_buffers = value != 0; + break; +#endif + + default: + options->context->report_error("Unknown option."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_install_compiler_options(spvc_compiler compiler, spvc_compiler_options options) +{ + (void)options; + switch (compiler->backend) + { +#if SPIRV_CROSS_C_API_GLSL + case SPVC_BACKEND_GLSL: + static_cast<CompilerGLSL &>(*compiler->compiler).set_common_options(options->glsl); + break; +#endif + +#if SPIRV_CROSS_C_API_HLSL + case SPVC_BACKEND_HLSL: + static_cast<CompilerHLSL &>(*compiler->compiler).set_common_options(options->glsl); + static_cast<CompilerHLSL &>(*compiler->compiler).set_hlsl_options(options->hlsl); + break; +#endif + +#if SPIRV_CROSS_C_API_MSL + case SPVC_BACKEND_MSL: + static_cast<CompilerMSL &>(*compiler->compiler).set_common_options(options->glsl); + static_cast<CompilerMSL &>(*compiler->compiler).set_msl_options(options->msl); + break; +#endif + + default: + break; + } + + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_add_header_line(spvc_compiler compiler, const char *line) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast<CompilerGLSL *>(compiler->compiler.get())->add_header_line(line); + return SPVC_SUCCESS; +#else + (void)line; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *line) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast<CompilerGLSL *>(compiler->compiler.get())->require_extension(line); + return SPVC_SUCCESS; +#else + (void)line; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id) +{ +#if SPIRV_CROSS_C_API_GLSL + if (compiler->backend == SPVC_BACKEND_NONE) + { + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + static_cast<CompilerGLSL *>(compiler->compiler.get())->flatten_buffer_block(id); + return SPVC_SUCCESS; +#else + (void)id; + compiler->context->report_error("Cross-compilation related option used on NONE backend which only supports reflection."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler, + const spvc_hlsl_root_constants *constant_info, + size_t count) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &hlsl = *static_cast<CompilerHLSL *>(compiler->compiler.get()); + std::vector<RootConstants> roots; + roots.reserve(count); + for (size_t i = 0; i < count; i++) + { + RootConstants root; + root.binding = constant_info[i].binding; + root.space = constant_info[i].space; + root.start = constant_info[i].start; + root.end = constant_info[i].end; + roots.push_back(root); + } + + hlsl.set_root_constant_layouts(std::move(roots)); + return SPVC_SUCCESS; +#else + (void)constant_info; + (void)count; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_compiler compiler, + const spvc_hlsl_vertex_attribute_remap *remap, + size_t count) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + HLSLVertexAttributeRemap re; + auto &hlsl = *static_cast<CompilerHLSL *>(compiler->compiler.get()); + for (size_t i = 0; i < count; i++) + { + re.location = remap[i].location; + re.semantic = remap[i].semantic; + hlsl.add_vertex_attribute_remap(re); + } + + return SPVC_SUCCESS; +#else + (void)remap; + (void)count; + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return 0; + } + + auto &hlsl = *static_cast<CompilerHLSL *>(compiler->compiler.get()); + return hlsl.remap_num_workgroups_builtin(); +#else + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return 0; +#endif +} + +spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.get_is_rasterization_disabled() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.needs_aux_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.needs_output_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.needs_patch_output_buffer() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.needs_input_threadgroup_mem() ? SPVC_TRUE : SPVC_FALSE; +#else + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, const spvc_msl_vertex_attribute *va) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + MSLVertexAttr attr; + attr.location = va->location; + attr.msl_buffer = va->msl_buffer; + attr.msl_offset = va->msl_offset; + attr.msl_stride = va->msl_stride; + attr.format = static_cast<MSLVertexFormat>(va->format); + attr.builtin = static_cast<spv::BuiltIn>(va->builtin); + attr.per_instance = va->per_instance; + msl.add_msl_vertex_attribute(attr); + return SPVC_SUCCESS; +#else + (void)va; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, + const spvc_msl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + MSLResourceBinding bind; + bind.binding = binding->binding; + bind.desc_set = binding->desc_set; + bind.stage = static_cast<spv::ExecutionModel>(binding->stage); + bind.msl_buffer = binding->msl_buffer; + bind.msl_texture = binding->msl_texture; + bind.msl_sampler = binding->msl_sampler; + msl.add_msl_resource_binding(bind); + return SPVC_SUCCESS; +#else + (void)binding; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + msl.add_discrete_descriptor_set(desc_set); + return SPVC_SUCCESS; +#else + (void)desc_set; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.is_msl_vertex_attribute_used(location) ? SPVC_TRUE : SPVC_FALSE; +#else + (void)location; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, unsigned set, + unsigned binding) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + return msl.is_msl_resource_binding_used(static_cast<spv::ExecutionModel>(model), set, binding) ? SPVC_TRUE : + SPVC_FALSE; +#else + (void)model; + (void)set; + (void)binding; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_FALSE; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + MSLConstexprSampler samp; + samp.s_address = static_cast<MSLSamplerAddress>(sampler->s_address); + samp.t_address = static_cast<MSLSamplerAddress>(sampler->t_address); + samp.r_address = static_cast<MSLSamplerAddress>(sampler->r_address); + samp.lod_clamp_min = sampler->lod_clamp_min; + samp.lod_clamp_max = sampler->lod_clamp_max; + samp.lod_clamp_enable = sampler->lod_clamp_enable; + samp.min_filter = static_cast<MSLSamplerFilter>(sampler->min_filter); + samp.mag_filter = static_cast<MSLSamplerFilter>(sampler->mag_filter); + samp.mip_filter = static_cast<MSLSamplerMipFilter>(sampler->mip_filter); + samp.compare_enable = sampler->compare_enable; + samp.anisotropy_enable = sampler->anisotropy_enable; + samp.max_anisotropy = sampler->max_anisotropy; + samp.compare_func = static_cast<MSLSamplerCompareFunc>(sampler->compare_func); + samp.coord = static_cast<MSLSamplerCoord>(sampler->coord); + samp.border_color = static_cast<MSLSamplerBorderColor>(sampler->border_color); + msl.remap_constexpr_sampler(id, samp); + return SPVC_SUCCESS; +#else + (void)id; + (void)sampler; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, + unsigned components) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get()); + msl.set_fragment_output_components(location, components); + return SPVC_SUCCESS; +#else + (void)location; + (void)components; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto result = compiler->compiler->compile(); + if (result.empty()) + { + compiler->context->report_error("Unsupported SPIR-V."); + return SPVC_ERROR_UNSUPPORTED_SPIRV; + } + + *source = compiler->context->allocate_name(result); + if (!*source) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + return SPVC_SUCCESS; + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_UNSUPPORTED_SPIRV) +} + +bool spvc_resources_s::copy_resources(std::vector<spvc_reflected_resource> &outputs, + const std::vector<Resource> &inputs) +{ + for (auto &i : inputs) + { + spvc_reflected_resource r; + r.base_type_id = i.base_type_id; + r.type_id = i.type_id; + r.id = i.id; + r.name = context->allocate_name(i.name); + if (!r.name) + return false; + + outputs.push_back(r); + } + + return true; +} + +bool spvc_resources_s::copy_resources(const ShaderResources &resources) +{ + if (!copy_resources(uniform_buffers, resources.uniform_buffers)) + return false; + if (!copy_resources(storage_buffers, resources.storage_buffers)) + return false; + if (!copy_resources(stage_inputs, resources.stage_inputs)) + return false; + if (!copy_resources(stage_outputs, resources.stage_outputs)) + return false; + if (!copy_resources(subpass_inputs, resources.subpass_inputs)) + return false; + if (!copy_resources(storage_images, resources.storage_images)) + return false; + if (!copy_resources(sampled_images, resources.sampled_images)) + return false; + if (!copy_resources(atomic_counters, resources.atomic_counters)) + return false; + if (!copy_resources(push_constant_buffers, resources.push_constant_buffers)) + return false; + if (!copy_resources(separate_images, resources.separate_images)) + return false; + if (!copy_resources(separate_samplers, resources.separate_samplers)) + return false; + if (!copy_resources(acceleration_structures, resources.acceleration_structures)) + return false; + + return true; +} + +spvc_result spvc_compiler_get_active_interface_variables(spvc_compiler compiler, spvc_set *set) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr<spvc_set_s> ptr(new (std::nothrow) spvc_set_s); + if (!ptr) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + auto active = compiler->compiler->get_active_interface_variables(); + ptr->set = std::move(active); + *set = ptr.get(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_set_enabled_interface_variables(spvc_compiler compiler, spvc_set set) +{ + SPVC_BEGIN_SAFE_SCOPE + { + compiler->compiler->set_enabled_interface_variables(set->set); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_create_shader_resources_for_active_variables(spvc_compiler compiler, spvc_resources *resources, + spvc_set set) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr<spvc_resources_s> res(new (std::nothrow) spvc_resources_s); + if (!res) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + res->context = compiler->context; + auto accessed_resources = compiler->compiler->get_shader_resources(set->set); + + if (!res->copy_resources(accessed_resources)) + { + res->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + *resources = res.get(); + compiler->context->allocations.push_back(std::move(res)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_create_shader_resources(spvc_compiler compiler, spvc_resources *resources) +{ + SPVC_BEGIN_SAFE_SCOPE + { + std::unique_ptr<spvc_resources_s> res(new (std::nothrow) spvc_resources_s); + if (!res) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + res->context = compiler->context; + auto accessed_resources = compiler->compiler->get_shader_resources(); + + if (!res->copy_resources(accessed_resources)) + { + res->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + + *resources = res.get(); + compiler->context->allocations.push_back(std::move(res)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, spvc_resource_type type, + const spvc_reflected_resource **resource_list, + size_t *resource_size) +{ + const std::vector<spvc_reflected_resource> *list = nullptr; + switch (type) + { + case SPVC_RESOURCE_TYPE_UNIFORM_BUFFER: + list = &resources->uniform_buffers; + break; + + case SPVC_RESOURCE_TYPE_STORAGE_BUFFER: + list = &resources->storage_buffers; + break; + + case SPVC_RESOURCE_TYPE_STAGE_INPUT: + list = &resources->stage_inputs; + break; + + case SPVC_RESOURCE_TYPE_STAGE_OUTPUT: + list = &resources->stage_outputs; + break; + + case SPVC_RESOURCE_TYPE_SUBPASS_INPUT: + list = &resources->subpass_inputs; + break; + + case SPVC_RESOURCE_TYPE_STORAGE_IMAGE: + list = &resources->storage_images; + break; + + case SPVC_RESOURCE_TYPE_SAMPLED_IMAGE: + list = &resources->sampled_images; + break; + + case SPVC_RESOURCE_TYPE_ATOMIC_COUNTER: + list = &resources->atomic_counters; + break; + + case SPVC_RESOURCE_TYPE_PUSH_CONSTANT: + list = &resources->push_constant_buffers; + break; + + case SPVC_RESOURCE_TYPE_SEPARATE_IMAGE: + list = &resources->separate_images; + break; + + case SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS: + list = &resources->separate_samplers; + break; + + case SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE: + list = &resources->acceleration_structures; + break; + + default: + break; + } + + if (!list) + { + resources->context->report_error("Invalid argument."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + *resource_size = list->size(); + *resource_list = list->data(); + return SPVC_SUCCESS; +} + +void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, unsigned argument) +{ + compiler->compiler->set_decoration(id, static_cast<spv::Decoration>(decoration), argument); +} + +void spvc_compiler_set_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + const char *argument) +{ + compiler->compiler->set_decoration_string(id, static_cast<spv::Decoration>(decoration), argument); +} + +void spvc_compiler_set_name(spvc_compiler compiler, SpvId id, const char *argument) +{ + compiler->compiler->set_name(id, argument); +} + +void spvc_compiler_set_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration, unsigned argument) +{ + compiler->compiler->set_member_decoration(id, member_index, static_cast<spv::Decoration>(decoration), argument); +} + +void spvc_compiler_set_member_decoration_string(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration, const char *argument) +{ + compiler->compiler->set_member_decoration_string(id, member_index, static_cast<spv::Decoration>(decoration), + argument); +} + +void spvc_compiler_set_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index, const char *argument) +{ + compiler->compiler->set_member_name(id, member_index, argument); +} + +void spvc_compiler_unset_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + compiler->compiler->unset_decoration(id, static_cast<spv::Decoration>(decoration)); +} + +void spvc_compiler_unset_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + compiler->compiler->unset_member_decoration(id, member_index, static_cast<spv::Decoration>(decoration)); +} + +spvc_bool spvc_compiler_has_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + return compiler->compiler->has_decoration(id, static_cast<spv::Decoration>(decoration)) ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_compiler_has_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + return compiler->compiler->has_member_decoration(id, member_index, static_cast<spv::Decoration>(decoration)) ? + SPVC_TRUE : + SPVC_FALSE; +} + +const char *spvc_compiler_get_name(spvc_compiler compiler, SpvId id) +{ + return compiler->compiler->get_name(id).c_str(); +} + +unsigned spvc_compiler_get_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + return compiler->compiler->get_decoration(id, static_cast<spv::Decoration>(decoration)); +} + +const char *spvc_compiler_get_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration) +{ + return compiler->compiler->get_decoration_string(id, static_cast<spv::Decoration>(decoration)).c_str(); +} + +unsigned spvc_compiler_get_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + return compiler->compiler->get_member_decoration(id, member_index, static_cast<spv::Decoration>(decoration)); +} + +const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration) +{ + return compiler->compiler->get_member_decoration_string(id, member_index, static_cast<spv::Decoration>(decoration)) + .c_str(); +} + +spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, const spvc_entry_point **entry_points, + size_t *num_entry_points) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto entries = compiler->compiler->get_entry_points_and_stages(); + std::vector<spvc_entry_point> translated; + translated.reserve(entries.size()); + + for (auto &entry : entries) + { + spvc_entry_point new_entry; + new_entry.execution_model = static_cast<SpvExecutionModel>(entry.execution_model); + new_entry.name = compiler->context->allocate_name(entry.name); + if (!new_entry.name) + { + compiler->context->report_error("Out of memory."); + return SPVC_ERROR_OUT_OF_MEMORY; + } + translated.push_back(new_entry); + } + + auto ptr = spvc_allocate<TemporaryBuffer<spvc_entry_point>>(); + ptr->buffer = std::move(translated); + *entry_points = ptr->buffer.data(); + *num_entry_points = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_set_entry_point(spvc_compiler compiler, const char *name, SpvExecutionModel model) +{ + compiler->compiler->set_entry_point(name, static_cast<spv::ExecutionModel>(model)); + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_rename_entry_point(spvc_compiler compiler, const char *old_name, const char *new_name, + SpvExecutionModel model) +{ + SPVC_BEGIN_SAFE_SCOPE + { + compiler->compiler->rename_entry_point(old_name, new_name, static_cast<spv::ExecutionModel>(model)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +const char *spvc_compiler_get_cleansed_entry_point_name(spvc_compiler compiler, const char *name, + SpvExecutionModel model) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto cleansed_name = + compiler->compiler->get_cleansed_entry_point_name(name, static_cast<spv::ExecutionModel>(model)); + return compiler->context->allocate_name(cleansed_name); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +void spvc_compiler_set_execution_mode(spvc_compiler compiler, SpvExecutionMode mode) +{ + compiler->compiler->set_execution_mode(static_cast<spv::ExecutionMode>(mode)); +} + +void spvc_compiler_set_execution_mode_with_arguments(spvc_compiler compiler, SpvExecutionMode mode, unsigned arg0, + unsigned arg1, + unsigned arg2) +{ + compiler->compiler->set_execution_mode(static_cast<spv::ExecutionMode>(mode), arg0, arg1, arg2); +} + +void spvc_compiler_unset_execution_mode(spvc_compiler compiler, SpvExecutionMode mode) +{ + compiler->compiler->unset_execution_mode(static_cast<spv::ExecutionMode>(mode)); +} + +spvc_result spvc_compiler_get_execution_modes(spvc_compiler compiler, const SpvExecutionMode **modes, size_t *num_modes) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto ptr = spvc_allocate<TemporaryBuffer<SpvExecutionMode>>(); + + compiler->compiler->get_execution_mode_bitset().for_each_bit( + [&](uint32_t bit) { ptr->buffer.push_back(static_cast<SpvExecutionMode>(bit)); }); + + *modes = ptr->buffer.data(); + *num_modes = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler compiler, SpvExecutionMode mode) +{ + return compiler->compiler->get_execution_mode_argument(static_cast<spv::ExecutionMode>(mode)); +} + +unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler, SpvExecutionMode mode, + unsigned index) +{ + return compiler->compiler->get_execution_mode_argument(static_cast<spv::ExecutionMode>(mode), index); +} + +SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler) +{ + return static_cast<SpvExecutionModel>(compiler->compiler->get_execution_model()); +} + +spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id) +{ + // Should only throw if an intentionally garbage ID is passed, but the IDs are not type-safe. + SPVC_BEGIN_SAFE_SCOPE + { + return static_cast<spvc_type>(&compiler->compiler->get_type(id)); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +static spvc_basetype convert_basetype(SPIRType::BaseType type) +{ + // For now the enums match up. + return static_cast<spvc_basetype>(type); +} + +spvc_basetype spvc_type_get_basetype(spvc_type type) +{ + return convert_basetype(type->basetype); +} + +unsigned spvc_type_get_bit_width(spvc_type type) +{ + return type->width; +} + +unsigned spvc_type_get_vector_size(spvc_type type) +{ + return type->vecsize; +} + +unsigned spvc_type_get_columns(spvc_type type) +{ + return type->columns; +} + +unsigned spvc_type_get_num_array_dimensions(spvc_type type) +{ + return unsigned(type->array.size()); +} + +spvc_bool spvc_type_array_dimension_is_literal(spvc_type type, unsigned dimension) +{ + return type->array_size_literal[dimension] ? SPVC_TRUE : SPVC_FALSE; +} + +SpvId spvc_type_get_array_dimension(spvc_type type, unsigned dimension) +{ + return type->array[dimension]; +} + +unsigned spvc_type_get_num_member_types(spvc_type type) +{ + return unsigned(type->member_types.size()); +} + +spvc_type_id spvc_type_get_member_type(spvc_type type, unsigned index) +{ + return type->member_types[index]; +} + +SpvStorageClass spvc_type_get_storage_class(spvc_type type) +{ + return static_cast<SpvStorageClass>(type->storage); +} + +// Image type query. +spvc_type_id spvc_type_get_image_sampled_type(spvc_type type) +{ + return type->image.type; +} + +SpvDim spvc_type_get_image_dimension(spvc_type type) +{ + return static_cast<SpvDim>(type->image.dim); +} + +spvc_bool spvc_type_get_image_is_depth(spvc_type type) +{ + return type->image.depth ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_type_get_image_arrayed(spvc_type type) +{ + return type->image.arrayed ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_type_get_image_multisampled(spvc_type type) +{ + return type->image.ms ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_type_get_image_is_storage(spvc_type type) +{ + return type->image.sampled == 2 ? SPVC_TRUE : SPVC_FALSE; +} + +SpvImageFormat spvc_type_get_image_storage_format(spvc_type type) +{ + return static_cast<SpvImageFormat>(static_cast<const SPIRType *>(type)->image.format); +} + +SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_type type) +{ + return static_cast<SpvAccessQualifier>(static_cast<const SPIRType *>(type)->image.access); +} + +spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *size = compiler->compiler->get_declared_struct_size(*static_cast<const SPIRType *>(struct_type)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, spvc_type struct_type, + size_t array_size, size_t *size) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *size = compiler->compiler->get_declared_struct_size_runtime_array(*static_cast<const SPIRType *>(struct_type), + array_size); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *offset) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *offset = compiler->compiler->type_struct_member_offset(*static_cast<const SPIRType *>(type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_type_struct_member_array_stride(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *stride) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *stride = compiler->compiler->type_struct_member_array_stride(*static_cast<const SPIRType *>(type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_type_struct_member_matrix_stride(spvc_compiler compiler, spvc_type type, unsigned index, unsigned *stride) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *stride = compiler->compiler->type_struct_member_matrix_stride(*static_cast<const SPIRType *>(type), index); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_build_dummy_sampler_for_combined_images(spvc_compiler compiler, spvc_variable_id *id) +{ + SPVC_BEGIN_SAFE_SCOPE + { + *id = compiler->compiler->build_dummy_sampler_for_combined_images(); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_build_combined_image_samplers(spvc_compiler compiler) +{ + SPVC_BEGIN_SAFE_SCOPE + { + compiler->compiler->build_combined_image_samplers(); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_UNSUPPORTED_SPIRV) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_combined_image_samplers(spvc_compiler compiler, + const spvc_combined_image_sampler **samplers, + size_t *num_samplers) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto combined = compiler->compiler->get_combined_image_samplers(); + std::vector<spvc_combined_image_sampler> translated; + translated.reserve(combined.size()); + for (auto &c : combined) + { + spvc_combined_image_sampler trans = { c.combined_id, c.image_id, c.sampler_id }; + translated.push_back(trans); + } + + auto ptr = spvc_allocate<TemporaryBuffer<spvc_combined_image_sampler>>(); + ptr->buffer = std::move(translated); + *samplers = ptr->buffer.data(); + *num_samplers = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_specialization_constants(spvc_compiler compiler, + const spvc_specialization_constant **constants, + size_t *num_constants) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto spec_constants = compiler->compiler->get_specialization_constants(); + std::vector<spvc_specialization_constant> translated; + translated.reserve(spec_constants.size()); + for (auto &c : spec_constants) + { + spvc_specialization_constant trans = { c.id, c.constant_id }; + translated.push_back(trans); + } + + auto ptr = spvc_allocate<TemporaryBuffer<spvc_specialization_constant>>(); + ptr->buffer = std::move(translated); + *constants = ptr->buffer.data(); + *num_constants = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +spvc_constant spvc_compiler_get_constant_handle(spvc_compiler compiler, spvc_variable_id id) +{ + SPVC_BEGIN_SAFE_SCOPE + { + return static_cast<spvc_constant>(&compiler->compiler->get_constant(id)); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc_compiler compiler, + spvc_specialization_constant *x, + spvc_specialization_constant *y, + spvc_specialization_constant *z) +{ + SpecializationConstant tmpx; + SpecializationConstant tmpy; + SpecializationConstant tmpz; + spvc_constant_id ret = compiler->compiler->get_work_group_size_specialization_constants(tmpx, tmpy, tmpz); + x->id = tmpx.id; + x->constant_id = tmpx.constant_id; + y->id = tmpy.id; + y->constant_id = tmpy.constant_id; + z->id = tmpz.id; + z->constant_id = tmpz.constant_id; + return ret; +} + +float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_f16(column, row); +} + +float spvc_constant_get_scalar_fp32(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_f32(column, row); +} + +double spvc_constant_get_scalar_fp64(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_f64(column, row); +} + +unsigned spvc_constant_get_scalar_u32(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar(column, row); +} + +int spvc_constant_get_scalar_i32(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_i32(column, row); +} + +unsigned spvc_constant_get_scalar_u16(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_u16(column, row); +} + +int spvc_constant_get_scalar_i16(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_i16(column, row); +} + +unsigned spvc_constant_get_scalar_u8(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_u8(column, row); +} + +int spvc_constant_get_scalar_i8(spvc_constant constant, unsigned column, unsigned row) +{ + return constant->scalar_i8(column, row); +} + +void spvc_constant_get_subconstants(spvc_constant constant, const spvc_constant_id **constituents, size_t *count) +{ + static_assert(sizeof(spvc_constant_id) == sizeof(constant->subconstants.front()), "ID size is not consistent."); + *constituents = reinterpret_cast<spvc_constant_id *>(constant->subconstants.data()); + *count = constant->subconstants.size(); +} + +spvc_type_id spvc_constant_get_type(spvc_constant constant) +{ + return constant->constant_type; +} + +spvc_bool spvc_compiler_get_binary_offset_for_decoration(spvc_compiler compiler, spvc_variable_id id, + SpvDecoration decoration, + unsigned *word_offset) +{ + uint32_t off = 0; + bool ret = compiler->compiler->get_binary_offset_for_decoration(id, static_cast<spv::Decoration>(decoration), off); + if (ret) + { + *word_offset = off; + return SPVC_TRUE; + } + else + return SPVC_FALSE; +} + +spvc_bool spvc_compiler_buffer_is_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id) +{ + return compiler->compiler->buffer_is_hlsl_counter_buffer(id) ? SPVC_TRUE : SPVC_FALSE; +} + +spvc_bool spvc_compiler_buffer_get_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id, + spvc_variable_id *counter_id) +{ + uint32_t buffer; + bool ret = compiler->compiler->buffer_get_hlsl_counter_buffer(id, buffer); + if (ret) + { + *counter_id = buffer; + return SPVC_TRUE; + } + else + return SPVC_FALSE; +} + +spvc_result spvc_compiler_get_declared_capabilities(spvc_compiler compiler, const SpvCapability **capabilities, + size_t *num_capabilities) +{ + auto &caps = compiler->compiler->get_declared_capabilities(); + static_assert(sizeof(SpvCapability) == sizeof(spv::Capability), "Enum size mismatch."); + *capabilities = reinterpret_cast<const SpvCapability *>(caps.data()); + *num_capabilities = caps.size(); + return SPVC_SUCCESS; +} + +spvc_result spvc_compiler_get_declared_extensions(spvc_compiler compiler, const char ***extensions, + size_t *num_extensions) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto &exts = compiler->compiler->get_declared_extensions(); + std::vector<const char *> duped; + duped.reserve(exts.size()); + for (auto &ext : exts) + duped.push_back(compiler->context->allocate_name(ext)); + + auto ptr = spvc_allocate<TemporaryBuffer<const char *>>(); + ptr->buffer = std::move(duped); + *extensions = ptr->buffer.data(); + *num_extensions = ptr->buffer.size(); + compiler->context->allocations.push_back(std::move(ptr)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_OUT_OF_MEMORY) + return SPVC_SUCCESS; +} + +const char *spvc_compiler_get_remapped_declared_block_name(spvc_compiler compiler, spvc_variable_id id) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto name = compiler->compiler->get_remapped_declared_block_name(id); + return compiler->context->allocate_name(name); + } + SPVC_END_SAFE_SCOPE(compiler->context, nullptr) +} + +spvc_result spvc_compiler_get_buffer_block_decorations(spvc_compiler compiler, spvc_variable_id id, + const SpvDecoration **decorations, size_t *num_decorations) +{ + SPVC_BEGIN_SAFE_SCOPE + { + auto flags = compiler->compiler->get_buffer_block_flags(id); + auto bitset = spvc_allocate<TemporaryBuffer<SpvDecoration>>(); + + flags.for_each_bit([&](uint32_t bit) { bitset->buffer.push_back(static_cast<SpvDecoration>(bit)); }); + + *decorations = bitset->buffer.data(); + *num_decorations = bitset->buffer.size(); + compiler->context->allocations.push_back(std::move(bitset)); + } + SPVC_END_SAFE_SCOPE(compiler->context, SPVC_ERROR_INVALID_ARGUMENT) + return SPVC_SUCCESS; +} + +unsigned spvc_msl_get_aux_buffer_struct_version(void) +{ + return SPVC_MSL_AUX_BUFFER_STRUCT_VERSION; +} + +void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr) +{ +#if SPIRV_CROSS_C_API_MSL + // Crude, but works. + MSLVertexAttr attr_default; + attr->location = attr_default.location; + attr->per_instance = attr_default.per_instance ? SPVC_TRUE : SPVC_FALSE; + attr->format = static_cast<spvc_msl_vertex_format>(attr_default.format); + attr->builtin = static_cast<SpvBuiltIn>(attr_default.builtin); + attr->msl_buffer = attr_default.msl_buffer; + attr->msl_offset = attr_default.msl_offset; + attr->msl_stride = attr_default.msl_stride; +#else + memset(attr, 0, sizeof(*attr)); +#endif +} + +void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding) +{ +#if SPIRV_CROSS_C_API_MSL + MSLResourceBinding binding_default; + binding->desc_set = binding_default.desc_set; + binding->binding = binding_default.binding; + binding->msl_buffer = binding_default.msl_buffer; + binding->msl_texture = binding_default.msl_texture; + binding->msl_sampler = binding_default.msl_sampler; + binding->stage = static_cast<SpvExecutionModel>(binding_default.stage); +#else + memset(binding, 0, sizeof(*binding)); +#endif +} + +void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler) +{ +#if SPIRV_CROSS_C_API_MSL + MSLConstexprSampler defaults; + sampler->anisotropy_enable = defaults.anisotropy_enable ? SPVC_TRUE : SPVC_FALSE; + sampler->border_color = static_cast<spvc_msl_sampler_border_color>(defaults.border_color); + sampler->compare_enable = defaults.compare_enable ? SPVC_TRUE : SPVC_FALSE; + sampler->coord = static_cast<spvc_msl_sampler_coord>(defaults.coord); + sampler->compare_func = static_cast<spvc_msl_sampler_compare_func>(defaults.compare_func); + sampler->lod_clamp_enable = defaults.lod_clamp_enable ? SPVC_TRUE : SPVC_FALSE; + sampler->lod_clamp_max = defaults.lod_clamp_max; + sampler->lod_clamp_min = defaults.lod_clamp_min; + sampler->mag_filter = static_cast<spvc_msl_sampler_filter>(defaults.mag_filter); + sampler->min_filter = static_cast<spvc_msl_sampler_filter>(defaults.min_filter); + sampler->mip_filter = static_cast<spvc_msl_sampler_mip_filter>(defaults.mip_filter); + sampler->max_anisotropy = defaults.max_anisotropy; + sampler->s_address = static_cast<spvc_msl_sampler_address>(defaults.s_address); + sampler->t_address = static_cast<spvc_msl_sampler_address>(defaults.t_address); + sampler->r_address = static_cast<spvc_msl_sampler_address>(defaults.r_address); +#else + memset(sampler, 0, sizeof(*sampler)); +#endif +} + +unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler) +{ + return compiler->compiler->get_current_id_bound(); +} + +void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch) +{ + *major = SPVC_C_API_VERSION_MAJOR; + *minor = SPVC_C_API_VERSION_MINOR; + *patch = SPVC_C_API_VERSION_PATCH; +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h new file mode 100644 index 0000000..5491a2e --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h @@ -0,0 +1,703 @@ +/* + * Copyright 2019 Hans-Kristian Arntzen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_C_API_H +#define SPIRV_CROSS_C_API_H + +#include <stddef.h> +#include "spirv.h" + +/* + * C89-compatible wrapper for SPIRV-Cross' API. + * Documentation here is sparse unless the behavior does not map 1:1 with C++ API. + * It is recommended to look at the canonical C++ API for more detailed information. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Bumped if ABI or API breaks backwards compatibility. */ +#define SPVC_C_API_VERSION_MAJOR 0 +/* Bumped if APIs or enumerations are added in a backwards compatible way. */ +#define SPVC_C_API_VERSION_MINOR 5 +/* Bumped if internal implementation details change. */ +#define SPVC_C_API_VERSION_PATCH 0 + +#if !defined(SPVC_PUBLIC_API) +#if defined(SPVC_EXPORT_SYMBOLS) +/* Exports symbols. Standard C calling convention is used. */ +#if defined(__GNUC__) +#define SPVC_PUBLIC_API __attribute__((visibility("default"))) +#elif defined(_MSC_VER) +#define SPVC_PUBLIC_API __declspec(dllexport) +#else +#define SPVC_PUBLIC_API +#endif +#else +#define SPVC_PUBLIC_API +#endif +#endif + +/* + * Gets the SPVC_C_API_VERSION_* used to build this library. + * Can be used to check for ABI mismatch if so-versioning did not catch it. + */ +SPVC_PUBLIC_API void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch); + +/* These types are opaque to the user. */ +typedef struct spvc_context_s *spvc_context; +typedef struct spvc_parsed_ir_s *spvc_parsed_ir; +typedef struct spvc_compiler_s *spvc_compiler; +typedef struct spvc_compiler_options_s *spvc_compiler_options; +typedef struct spvc_resources_s *spvc_resources; +struct spvc_type_s; +typedef const struct spvc_type_s *spvc_type; +typedef struct spvc_constant_s *spvc_constant; +struct spvc_set_s; +typedef const struct spvc_set_s *spvc_set; + +/* + * Shallow typedefs. All SPIR-V IDs are plain 32-bit numbers, but this helps communicate which data is used. + * Maps to a SPIRType. + */ +typedef SpvId spvc_type_id; +/* Maps to a SPIRVariable. */ +typedef SpvId spvc_variable_id; +/* Maps to a SPIRConstant. */ +typedef SpvId spvc_constant_id; + +/* See C++ API. */ +typedef struct spvc_reflected_resource +{ + spvc_variable_id id; + spvc_type_id base_type_id; + spvc_type_id type_id; + const char *name; +} spvc_reflected_resource; + +/* See C++ API. */ +typedef struct spvc_entry_point +{ + SpvExecutionModel execution_model; + const char *name; +} spvc_entry_point; + +/* See C++ API. */ +typedef struct spvc_combined_image_sampler +{ + spvc_variable_id combined_id; + spvc_variable_id image_id; + spvc_variable_id sampler_id; +} spvc_combined_image_sampler; + +/* See C++ API. */ +typedef struct spvc_specialization_constant +{ + spvc_constant_id id; + unsigned constant_id; +} spvc_specialization_constant; + +/* See C++ API. */ +typedef struct spvc_hlsl_root_constants +{ + unsigned start; + unsigned end; + unsigned binding; + unsigned space; +} spvc_hlsl_root_constants; + +/* See C++ API. */ +typedef struct spvc_hlsl_vertex_attribute_remap +{ + unsigned location; + const char *semantic; +} spvc_hlsl_vertex_attribute_remap; + +/* + * Be compatible with non-C99 compilers, which do not have stdbool. + * Only recent MSVC compilers supports this for example, and ideally SPIRV-Cross should be linkable + * from a wide range of compilers in its C wrapper. + */ +typedef unsigned char spvc_bool; +#define SPVC_TRUE ((spvc_bool)1) +#define SPVC_FALSE ((spvc_bool)0) + +typedef enum spvc_result +{ + /* Success. */ + SPVC_SUCCESS = 0, + + /* The SPIR-V is invalid. Should have been caught by validation ideally. */ + SPVC_ERROR_INVALID_SPIRV = -1, + + /* The SPIR-V might be valid or invalid, but SPIRV-Cross currently cannot correctly translate this to your target language. */ + SPVC_ERROR_UNSUPPORTED_SPIRV = -2, + + /* If for some reason we hit this, new or malloc failed. */ + SPVC_ERROR_OUT_OF_MEMORY = -3, + + /* Invalid API argument. */ + SPVC_ERROR_INVALID_ARGUMENT = -4, + + SPVC_ERROR_INT_MAX = 0x7fffffff +} spvc_result; + +typedef enum spvc_capture_mode +{ + /* The Parsed IR payload will be copied, and the handle can be reused to create other compiler instances. */ + SPVC_CAPTURE_MODE_COPY = 0, + + /* + * The payload will now be owned by the compiler. + * parsed_ir should now be considered a dead blob and must not be used further. + * This is optimal for performance and should be the go-to option. + */ + SPVC_CAPTURE_MODE_TAKE_OWNERSHIP = 1, + + SPVC_CAPTURE_MODE_INT_MAX = 0x7fffffff +} spvc_capture_mode; + +typedef enum spvc_backend +{ + /* This backend can only perform reflection, no compiler options are supported. Maps to spirv_cross::Compiler. */ + SPVC_BACKEND_NONE = 0, + SPVC_BACKEND_GLSL = 1, /* spirv_cross::CompilerGLSL */ + SPVC_BACKEND_HLSL = 2, /* CompilerHLSL */ + SPVC_BACKEND_MSL = 3, /* CompilerMSL */ + SPVC_BACKEND_CPP = 4, /* CompilerCPP */ + SPVC_BACKEND_JSON = 5, /* CompilerReflection w/ JSON backend */ + SPVC_BACKEND_INT_MAX = 0x7fffffff +} spvc_backend; + +/* Maps to C++ API. */ +typedef enum spvc_resource_type +{ + SPVC_RESOURCE_TYPE_UNKNOWN = 0, + SPVC_RESOURCE_TYPE_UNIFORM_BUFFER = 1, + SPVC_RESOURCE_TYPE_STORAGE_BUFFER = 2, + SPVC_RESOURCE_TYPE_STAGE_INPUT = 3, + SPVC_RESOURCE_TYPE_STAGE_OUTPUT = 4, + SPVC_RESOURCE_TYPE_SUBPASS_INPUT = 5, + SPVC_RESOURCE_TYPE_STORAGE_IMAGE = 6, + SPVC_RESOURCE_TYPE_SAMPLED_IMAGE = 7, + SPVC_RESOURCE_TYPE_ATOMIC_COUNTER = 8, + SPVC_RESOURCE_TYPE_PUSH_CONSTANT = 9, + SPVC_RESOURCE_TYPE_SEPARATE_IMAGE = 10, + SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS = 11, + SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE = 12, + SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff +} spvc_resource_type; + +/* Maps to spirv_cross::SPIRType::BaseType. */ +typedef enum spvc_basetype +{ + SPVC_BASETYPE_UNKNOWN = 0, + SPVC_BASETYPE_VOID = 1, + SPVC_BASETYPE_BOOLEAN = 2, + SPVC_BASETYPE_INT8 = 3, + SPVC_BASETYPE_UINT8 = 4, + SPVC_BASETYPE_INT16 = 5, + SPVC_BASETYPE_UINT16 = 6, + SPVC_BASETYPE_INT32 = 7, + SPVC_BASETYPE_UINT32 = 8, + SPVC_BASETYPE_INT64 = 9, + SPVC_BASETYPE_UINT64 = 10, + SPVC_BASETYPE_ATOMIC_COUNTER = 11, + SPVC_BASETYPE_FP16 = 12, + SPVC_BASETYPE_FP32 = 13, + SPVC_BASETYPE_FP64 = 14, + SPVC_BASETYPE_STRUCT = 15, + SPVC_BASETYPE_IMAGE = 16, + SPVC_BASETYPE_SAMPLED_IMAGE = 17, + SPVC_BASETYPE_SAMPLER = 18, + SPVC_BASETYPE_ACCELERATION_STRUCTURE = 19, + + SPVC_BASETYPE_INT_MAX = 0x7fffffff +} spvc_basetype; + +#define SPVC_COMPILER_OPTION_COMMON_BIT 0x1000000 +#define SPVC_COMPILER_OPTION_GLSL_BIT 0x2000000 +#define SPVC_COMPILER_OPTION_HLSL_BIT 0x4000000 +#define SPVC_COMPILER_OPTION_MSL_BIT 0x8000000 +#define SPVC_COMPILER_OPTION_LANG_BITS 0x0f000000 +#define SPVC_COMPILER_OPTION_ENUM_BITS 0xffffff + +#define SPVC_MAKE_MSL_VERSION(major, minor, patch) ((major) * 10000 + (minor) * 100 + (patch)) + +/* Maps to C++ API. */ +typedef enum spvc_msl_platform +{ + SPVC_MSL_PLATFORM_IOS = 0, + SPVC_MSL_PLATFORM_MACOS = 1, + SPVC_MSL_PLATFORM_MAX_INT = 0x7fffffff +} spvc_msl_platform; + +/* Maps to C++ API. */ +typedef enum spvc_msl_vertex_format +{ + SPVC_MSL_VERTEX_FORMAT_OTHER = 0, + SPVC_MSL_VERTEX_FORMAT_UINT8 = 1, + SPVC_MSL_VERTEX_FORMAT_UINT16 = 2 +} spvc_msl_vertex_format; + +/* Maps to C++ API. */ +typedef struct spvc_msl_vertex_attribute +{ + unsigned location; + unsigned msl_buffer; + unsigned msl_offset; + unsigned msl_stride; + spvc_bool per_instance; + spvc_msl_vertex_format format; + SpvBuiltIn builtin; +} spvc_msl_vertex_attribute; + +/* + * Initializes the vertex attribute struct. + */ +SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr); + +/* Maps to C++ API. */ +typedef struct spvc_msl_resource_binding +{ + SpvExecutionModel stage; + unsigned desc_set; + unsigned binding; + unsigned msl_buffer; + unsigned msl_texture; + unsigned msl_sampler; +} spvc_msl_resource_binding; + +/* + * Initializes the resource binding struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding); + +#define SPVC_MSL_PUSH_CONSTANT_DESC_SET (~(0u)) +#define SPVC_MSL_PUSH_CONSTANT_BINDING (0) +#define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1 + +/* Runtime check for incompatibility. */ +SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void); + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_coord +{ + SPVC_MSL_SAMPLER_COORD_NORMALIZED = 0, + SPVC_MSL_SAMPLER_COORD_PIXEL = 1, + SPVC_MSL_SAMPLER_INT_MAX = 0x7fffffff +} spvc_msl_sampler_coord; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_filter +{ + SPVC_MSL_SAMPLER_FILTER_NEAREST = 0, + SPVC_MSL_SAMPLER_FILTER_LINEAR = 1, + SPVC_MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff +} spvc_msl_sampler_filter; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_mip_filter +{ + SPVC_MSL_SAMPLER_MIP_FILTER_NONE = 0, + SPVC_MSL_SAMPLER_MIP_FILTER_NEAREST = 1, + SPVC_MSL_SAMPLER_MIP_FILTER_LINEAR = 2, + SPVC_MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff +} spvc_msl_sampler_mip_filter; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_address +{ + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, + SPVC_MSL_SAMPLER_ADDRESS_REPEAT = 3, + SPVC_MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, + SPVC_MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff +} spvc_msl_sampler_address; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_compare_func +{ + SPVC_MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, + SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS = 1, + SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, + SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, + SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, + SPVC_MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, + SPVC_MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, + SPVC_MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, + SPVC_MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff +} spvc_msl_sampler_compare_func; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_border_color +{ + SPVC_MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, + SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, + SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, + SPVC_MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff +} spvc_msl_sampler_border_color; + +/* Maps to C++ API. */ +typedef struct spvc_msl_constexpr_sampler +{ + spvc_msl_sampler_coord coord; + spvc_msl_sampler_filter min_filter; + spvc_msl_sampler_filter mag_filter; + spvc_msl_sampler_mip_filter mip_filter; + spvc_msl_sampler_address s_address; + spvc_msl_sampler_address t_address; + spvc_msl_sampler_address r_address; + spvc_msl_sampler_compare_func compare_func; + spvc_msl_sampler_border_color border_color; + float lod_clamp_min; + float lod_clamp_max; + int max_anisotropy; + + spvc_bool compare_enable; + spvc_bool lod_clamp_enable; + spvc_bool anisotropy_enable; +} spvc_msl_constexpr_sampler; + +/* + * Initializes the constexpr sampler struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler); + +/* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ +typedef enum spvc_compiler_option +{ + SPVC_COMPILER_OPTION_UNKNOWN = 0, + + SPVC_COMPILER_OPTION_FORCE_TEMPORARY = 1 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FLATTEN_MULTIDIMENSIONAL_ARRAYS = 2 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FIXUP_DEPTH_CONVENTION = 3 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FLIP_VERTEX_Y = 4 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE = 5 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_SEPARATE_SHADER_OBJECTS = 6 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION = 7 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_VERSION = 8 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES = 9 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS = 10 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_FLOAT_PRECISION_HIGHP = 11 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_INT_PRECISION_HIGHP = 12 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL = 13 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_POINT_SIZE_COMPAT = 14 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_POINT_COORD_COMPAT = 15 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE = 16 | SPVC_COMPILER_OPTION_HLSL_BIT, + + SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_TESS_FACTOR_OUTPUT_BUFFER_INDEX = 23 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_WORKGROUP_INDEX = 24 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_BUILTIN = 25 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISABLE_RASTERIZATION = 26 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_CAPTURE_OUTPUT_TO_BUFFER = 27 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SWIZZLE_TEXTURE_SAMPLES = 28 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS = 29 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TESS_DOMAIN_ORIGIN_LOWER_LEFT = 30 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_PLATFORM = 31 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS = 32 | SPVC_COMPILER_OPTION_MSL_BIT, + + SPVC_COMPILER_OPTION_GLSL_EMIT_PUSH_CONSTANT_AS_UNIFORM_BUFFER = 33 | SPVC_COMPILER_OPTION_GLSL_BIT, + + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff +} spvc_compiler_option; + +/* + * Context is the highest-level API construct. + * The context owns all memory allocations made by its child object hierarchy, including various non-opaque structs and strings. + * This means that the API user only has to care about one "destroy" call ever when using the C API. + * All pointers handed out by the APIs are only valid as long as the context + * is alive and spvc_context_release_allocations has not been called. + */ +SPVC_PUBLIC_API spvc_result spvc_context_create(spvc_context *context); + +/* Frees all memory allocations and objects associated with the context and its child objects. */ +SPVC_PUBLIC_API void spvc_context_destroy(spvc_context context); + +/* Frees all memory allocations and objects associated with the context and its child objects, but keeps the context alive. */ +SPVC_PUBLIC_API void spvc_context_release_allocations(spvc_context context); + +/* Get the string for the last error which was logged. */ +SPVC_PUBLIC_API const char *spvc_context_get_last_error_string(spvc_context context); + +/* Get notified in a callback when an error triggers. Useful for debugging. */ +typedef void (*spvc_error_callback)(void *userdata, const char *error); +SPVC_PUBLIC_API void spvc_context_set_error_callback(spvc_context context, spvc_error_callback cb, void *userdata); + +/* SPIR-V parsing interface. Maps to Parser which then creates a ParsedIR, and that IR is extracted into the handle. */ +SPVC_PUBLIC_API spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, size_t word_count, + spvc_parsed_ir *parsed_ir); + +/* + * Create a compiler backend. Capture mode controls if we construct by copy or move semantics. + * It is always recommended to use SPVC_CAPTURE_MODE_TAKE_OWNERSHIP if you only intend to cross-compile the IR once. + */ +SPVC_PUBLIC_API spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend backend, + spvc_parsed_ir parsed_ir, spvc_capture_mode mode, + spvc_compiler *compiler); + +/* Maps directly to C++ API. */ +SPVC_PUBLIC_API unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler); + +/* Create compiler options, which will initialize defaults. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_create_compiler_options(spvc_compiler compiler, + spvc_compiler_options *options); +/* Override options. Will return error if e.g. MSL options are used for the HLSL backend, etc. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_bool(spvc_compiler_options options, + spvc_compiler_option option, spvc_bool value); +SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, + spvc_compiler_option option, unsigned value); +/* Set compiler options. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_install_compiler_options(spvc_compiler compiler, + spvc_compiler_options options); + +/* Compile IR into a string. *source is owned by the context, and caller must not free it themselves. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source); + +/* Maps to C++ API. */ +SPVC_PUBLIC_API spvc_result spvc_compiler_add_header_line(spvc_compiler compiler, const char *line); +SPVC_PUBLIC_API spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *ext); +SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id); + +/* + * HLSL specifics. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler, + const spvc_hlsl_root_constants *constant_info, + size_t count); +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_compiler compiler, + const spvc_hlsl_vertex_attribute_remap *remap, + size_t remaps); +SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler); + +/* + * MSL specifics. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, + const spvc_msl_vertex_attribute *attrs); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, + const spvc_msl_resource_binding *binding); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location); +SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, + SpvExecutionModel model, + unsigned set, + unsigned binding); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components); + +/* + * Reflect resources. + * Maps almost 1:1 to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_interface_variables(spvc_compiler compiler, spvc_set *set); +SPVC_PUBLIC_API spvc_result spvc_compiler_set_enabled_interface_variables(spvc_compiler compiler, spvc_set set); +SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources(spvc_compiler compiler, spvc_resources *resources); +SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources_for_active_variables(spvc_compiler compiler, + spvc_resources *resources, + spvc_set active); +SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, spvc_resource_type type, + const spvc_reflected_resource **resource_list, + size_t *resource_size); + +/* + * Decorations. + * Maps to C++ API. + */ +SPVC_PUBLIC_API void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + unsigned argument); +SPVC_PUBLIC_API void spvc_compiler_set_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + const char *argument); +SPVC_PUBLIC_API void spvc_compiler_set_name(spvc_compiler compiler, SpvId id, const char *argument); +SPVC_PUBLIC_API void spvc_compiler_set_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + SpvDecoration decoration, unsigned argument); +SPVC_PUBLIC_API void spvc_compiler_set_member_decoration_string(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration, + const char *argument); +SPVC_PUBLIC_API void spvc_compiler_set_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + const char *argument); +SPVC_PUBLIC_API void spvc_compiler_unset_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); +SPVC_PUBLIC_API void spvc_compiler_unset_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); + +SPVC_PUBLIC_API spvc_bool spvc_compiler_has_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); +SPVC_PUBLIC_API spvc_bool spvc_compiler_has_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_name(spvc_compiler compiler, SpvId id); +SPVC_PUBLIC_API unsigned spvc_compiler_get_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_decoration_string(spvc_compiler compiler, SpvId id, + SpvDecoration decoration); +SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); +SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); + +/* + * Entry points. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, + const spvc_entry_point **entry_points, + size_t *num_entry_points); +SPVC_PUBLIC_API spvc_result spvc_compiler_set_entry_point(spvc_compiler compiler, const char *name, + SpvExecutionModel model); +SPVC_PUBLIC_API spvc_result spvc_compiler_rename_entry_point(spvc_compiler compiler, const char *old_name, + const char *new_name, SpvExecutionModel model); +SPVC_PUBLIC_API const char *spvc_compiler_get_cleansed_entry_point_name(spvc_compiler compiler, const char *name, + SpvExecutionModel model); +SPVC_PUBLIC_API void spvc_compiler_set_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); +SPVC_PUBLIC_API void spvc_compiler_unset_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); +SPVC_PUBLIC_API void spvc_compiler_set_execution_mode_with_arguments(spvc_compiler compiler, SpvExecutionMode mode, + unsigned arg0, unsigned arg1, unsigned arg2); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_execution_modes(spvc_compiler compiler, const SpvExecutionMode **modes, + size_t *num_modes); +SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler compiler, SpvExecutionMode mode); +SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler, + SpvExecutionMode mode, unsigned index); +SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler); + +/* + * Type query interface. + * Maps to C++ API, except it's read-only. + */ +SPVC_PUBLIC_API spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id); + +SPVC_PUBLIC_API spvc_basetype spvc_type_get_basetype(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_bit_width(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_vector_size(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_columns(spvc_type type); +SPVC_PUBLIC_API unsigned spvc_type_get_num_array_dimensions(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_array_dimension_is_literal(spvc_type type, unsigned dimension); +SPVC_PUBLIC_API SpvId spvc_type_get_array_dimension(spvc_type type, unsigned dimension); +SPVC_PUBLIC_API unsigned spvc_type_get_num_member_types(spvc_type type); +SPVC_PUBLIC_API spvc_type_id spvc_type_get_member_type(spvc_type type, unsigned index); +SPVC_PUBLIC_API SpvStorageClass spvc_type_get_storage_class(spvc_type type); + +/* Image type query. */ +SPVC_PUBLIC_API spvc_type_id spvc_type_get_image_sampled_type(spvc_type type); +SPVC_PUBLIC_API SpvDim spvc_type_get_image_dimension(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_depth(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_arrayed(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_multisampled(spvc_type type); +SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_storage(spvc_type type); +SPVC_PUBLIC_API SpvImageFormat spvc_type_get_image_storage_format(spvc_type type); +SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_type type); + +/* + * Buffer layout query. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, + spvc_type struct_type, size_t array_size, size_t *size); + +SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, + spvc_type type, unsigned index, unsigned *offset); +SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_array_stride(spvc_compiler compiler, + spvc_type type, unsigned index, unsigned *stride); +SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_matrix_stride(spvc_compiler compiler, + spvc_type type, unsigned index, unsigned *stride); + +/* + * Workaround helper functions. + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_build_dummy_sampler_for_combined_images(spvc_compiler compiler, spvc_variable_id *id); +SPVC_PUBLIC_API spvc_result spvc_compiler_build_combined_image_samplers(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_combined_image_samplers(spvc_compiler compiler, + const spvc_combined_image_sampler **samplers, + size_t *num_samplers); + +/* + * Constants + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_result spvc_compiler_get_specialization_constants(spvc_compiler compiler, + const spvc_specialization_constant **constants, + size_t *num_constants); +SPVC_PUBLIC_API spvc_constant spvc_compiler_get_constant_handle(spvc_compiler compiler, + spvc_constant_id id); + +SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc_compiler compiler, + spvc_specialization_constant *x, + spvc_specialization_constant *y, + spvc_specialization_constant *z); + +/* + * No stdint.h until C99, sigh :( + * For smaller types, the result is sign or zero-extended as appropriate. + * Maps to C++ API. + * TODO: The SPIRConstant query interface and modification interface is not quite complete. + */ +SPVC_PUBLIC_API float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API float spvc_constant_get_scalar_fp32(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API double spvc_constant_get_scalar_fp64(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u32(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API int spvc_constant_get_scalar_i32(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u16(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API int spvc_constant_get_scalar_i16(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u8(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API int spvc_constant_get_scalar_i8(spvc_constant constant, unsigned column, unsigned row); +SPVC_PUBLIC_API void spvc_constant_get_subconstants(spvc_constant constant, const spvc_constant_id **constituents, size_t *count); +SPVC_PUBLIC_API spvc_type_id spvc_constant_get_type(spvc_constant constant); + +/* + * Misc reflection + * Maps to C++ API. + */ +SPVC_PUBLIC_API spvc_bool spvc_compiler_get_binary_offset_for_decoration(spvc_compiler compiler, + spvc_variable_id id, + SpvDecoration decoration, + unsigned *word_offset); + +SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_is_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_get_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id, + spvc_variable_id *counter_id); + +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_capabilities(spvc_compiler compiler, + const SpvCapability **capabilities, + size_t *num_capabilities); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_extensions(spvc_compiler compiler, const char ***extensions, + size_t *num_extensions); + +SPVC_PUBLIC_API const char *spvc_compiler_get_remapped_declared_block_name(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_result spvc_compiler_get_buffer_block_decorations(spvc_compiler compiler, spvc_variable_id id, + const SpvDecoration **decorations, + size_t *num_decorations); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp new file mode 100644 index 0000000..f17c2be --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.cpp @@ -0,0 +1,648 @@ +/* + * Copyright 2018-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross_parsed_ir.hpp" +#include <algorithm> +#include <assert.h> + +using namespace std; +using namespace spv; + +namespace spirv_cross +{ +void ParsedIR::set_id_bounds(uint32_t bounds) +{ + ids.resize(bounds); + block_meta.resize(bounds); +} + +static string ensure_valid_identifier(const string &name, bool member) +{ + // Functions in glslangValidator are mangled with name(<mangled> stuff. + // Normally, we would never see '(' in any legal identifiers, so just strip them out. + auto str = name.substr(0, name.find('(')); + + for (uint32_t i = 0; i < str.size(); i++) + { + auto &c = str[i]; + + if (member) + { + // _m<num> variables are reserved by the internal implementation, + // otherwise, make sure the name is a valid identifier. + if (i == 0) + c = isalpha(c) ? c : '_'; + else if (i == 2 && str[0] == '_' && str[1] == 'm') + c = isalpha(c) ? c : '_'; + else + c = isalnum(c) ? c : '_'; + } + else + { + // _<num> variables are reserved by the internal implementation, + // otherwise, make sure the name is a valid identifier. + if (i == 0 || (str[0] == '_' && i == 1)) + c = isalpha(c) ? c : '_'; + else + c = isalnum(c) ? c : '_'; + } + } + return str; +} + +const string &ParsedIR::get_name(uint32_t id) const +{ + auto *m = find_meta(id); + if (m) + return m->decoration.alias; + else + return empty_string; +} + +const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const +{ + auto *m = find_meta(id); + if (m) + { + if (index >= m->members.size()) + return empty_string; + return m->members[index].alias; + } + else + return empty_string; +} + +void ParsedIR::set_name(uint32_t id, const string &name) +{ + auto &str = meta[id].decoration.alias; + str.clear(); + + if (name.empty()) + return; + + // Reserved for temporaries. + if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + return; + + str = ensure_valid_identifier(name, false); +} + +void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name) +{ + meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); + + auto &str = meta[id].members[index].alias; + str.clear(); + if (name.empty()) + return; + + // Reserved for unnamed members. + if (name[0] == '_' && name.size() >= 3 && name[1] == 'm' && isdigit(name[2])) + return; + + str = ensure_valid_identifier(name, true); +} + +void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const string &argument) +{ + auto &dec = meta[id].decoration; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic = argument; + break; + + default: + break; + } +} + +void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +{ + auto &dec = meta[id].decoration; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast<BuiltIn>(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationComponent: + dec.component = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + case DecorationArrayStride: + dec.array_stride = argument; + break; + + case DecorationMatrixStride: + dec.matrix_stride = argument; + break; + + case DecorationBinding: + dec.binding = argument; + break; + + case DecorationDescriptorSet: + dec.set = argument; + break; + + case DecorationInputAttachmentIndex: + dec.input_attachment = argument; + break; + + case DecorationSpecId: + dec.spec_id = argument; + break; + + case DecorationIndex: + dec.index = argument; + break; + + case DecorationHlslCounterBufferGOOGLE: + meta[id].hlsl_magic_counter_buffer = argument; + meta[argument].hlsl_is_magic_counter_buffer = true; + break; + + case DecorationFPRoundingMode: + dec.fp_rounding_mode = static_cast<FPRoundingMode>(argument); + break; + + default: + break; + } +} + +void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +{ + meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); + auto &dec = meta[id].members[index]; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = true; + dec.builtin_type = static_cast<BuiltIn>(argument); + break; + + case DecorationLocation: + dec.location = argument; + break; + + case DecorationComponent: + dec.component = argument; + break; + + case DecorationBinding: + dec.binding = argument; + break; + + case DecorationOffset: + dec.offset = argument; + break; + + case DecorationSpecId: + dec.spec_id = argument; + break; + + case DecorationMatrixStride: + dec.matrix_stride = argument; + break; + + case DecorationIndex: + dec.index = argument; + break; + + default: + break; + } +} + +// Recursively marks any constants referenced by the specified constant instruction as being used +// as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp). +void ParsedIR::mark_used_as_array_length(uint32_t id) +{ + switch (ids[id].get_type()) + { + case TypeConstant: + get<SPIRConstant>(id).is_used_as_array_length = true; + break; + + case TypeConstantOp: + { + auto &cop = get<SPIRConstantOp>(id); + for (uint32_t arg_id : cop.arguments) + mark_used_as_array_length(arg_id); + break; + } + + case TypeUndef: + break; + + default: + assert(0); + } +} + +Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const +{ + auto &type = get<SPIRType>(var.basetype); + assert(type.basetype == SPIRType::Struct); + + // Some flags like non-writable, non-readable are actually found + // as member decorations. If all members have a decoration set, propagate + // the decoration up as a regular variable decoration. + Bitset base_flags; + auto *m = find_meta(var.self); + if (m) + base_flags = m->decoration.decoration_flags; + + if (type.member_types.empty()) + return base_flags; + + Bitset all_members_flags = get_member_decoration_bitset(type.self, 0); + for (uint32_t i = 1; i < uint32_t(type.member_types.size()); i++) + all_members_flags.merge_and(get_member_decoration_bitset(type.self, i)); + + base_flags.merge_or(all_members_flags); + return base_flags; +} + +const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index) const +{ + auto *m = find_meta(id); + if (m) + { + if (index >= m->members.size()) + return cleared_bitset; + return m->members[index].decoration_flags; + } + else + return cleared_bitset; +} + +bool ParsedIR::has_decoration(uint32_t id, Decoration decoration) const +{ + return get_decoration_bitset(id).get(decoration); +} + +uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const +{ + auto *m = find_meta(id); + if (!m) + return 0; + + auto &dec = m->decoration; + if (!dec.decoration_flags.get(decoration)) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: + return dec.builtin_type; + case DecorationLocation: + return dec.location; + case DecorationComponent: + return dec.component; + case DecorationOffset: + return dec.offset; + case DecorationBinding: + return dec.binding; + case DecorationDescriptorSet: + return dec.set; + case DecorationInputAttachmentIndex: + return dec.input_attachment; + case DecorationSpecId: + return dec.spec_id; + case DecorationArrayStride: + return dec.array_stride; + case DecorationMatrixStride: + return dec.matrix_stride; + case DecorationIndex: + return dec.index; + case DecorationFPRoundingMode: + return dec.fp_rounding_mode; + default: + return 1; + } +} + +const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration) const +{ + auto *m = find_meta(id); + if (!m) + return empty_string; + + auto &dec = m->decoration; + + if (!dec.decoration_flags.get(decoration)) + return empty_string; + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + return dec.hlsl_semantic; + + default: + return empty_string; + } +} + +void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) +{ + auto &dec = meta[id].decoration; + dec.decoration_flags.clear(decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationComponent: + dec.component = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + case DecorationBinding: + dec.binding = 0; + break; + + case DecorationDescriptorSet: + dec.set = 0; + break; + + case DecorationInputAttachmentIndex: + dec.input_attachment = 0; + break; + + case DecorationSpecId: + dec.spec_id = 0; + break; + + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic.clear(); + break; + + case DecorationFPRoundingMode: + dec.fp_rounding_mode = FPRoundingModeMax; + break; + + case DecorationHlslCounterBufferGOOGLE: + { + auto &counter = meta[id].hlsl_magic_counter_buffer; + if (counter) + { + meta[counter].hlsl_is_magic_counter_buffer = false; + counter = 0; + } + break; + } + + default: + break; + } +} + +bool ParsedIR::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +{ + return get_member_decoration_bitset(id, index).get(decoration); +} + +uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +{ + auto *m = find_meta(id); + if (!m) + return 0; + + if (index >= m->members.size()) + return 0; + + auto &dec = m->members[index]; + if (!dec.decoration_flags.get(decoration)) + return 0; + + switch (decoration) + { + case DecorationBuiltIn: + return dec.builtin_type; + case DecorationLocation: + return dec.location; + case DecorationComponent: + return dec.component; + case DecorationBinding: + return dec.binding; + case DecorationOffset: + return dec.offset; + case DecorationSpecId: + return dec.spec_id; + case DecorationIndex: + return dec.index; + default: + return 1; + } +} + +const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const +{ + auto *m = find_meta(id); + if (m) + { + auto &dec = m->decoration; + return dec.decoration_flags; + } + else + return cleared_bitset; +} + +void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration, const string &argument) +{ + meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); + auto &dec = meta[id].members[index]; + dec.decoration_flags.set(decoration); + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic = argument; + break; + + default: + break; + } +} + +const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +{ + auto *m = find_meta(id); + if (m) + { + if (!has_member_decoration(id, index, decoration)) + return empty_string; + + auto &dec = m->members[index]; + + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + return dec.hlsl_semantic; + + default: + return empty_string; + } + } + else + return empty_string; +} + +void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +{ + auto &m = meta[id]; + if (index >= m.members.size()) + return; + + auto &dec = m.members[index]; + + dec.decoration_flags.clear(decoration); + switch (decoration) + { + case DecorationBuiltIn: + dec.builtin = false; + break; + + case DecorationLocation: + dec.location = 0; + break; + + case DecorationComponent: + dec.component = 0; + break; + + case DecorationOffset: + dec.offset = 0; + break; + + case DecorationSpecId: + dec.spec_id = 0; + break; + + case DecorationHlslSemanticGOOGLE: + dec.hlsl_semantic.clear(); + break; + + default: + break; + } +} + +uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount) +{ + auto curr_bound = ids.size(); + auto new_bound = curr_bound + incr_amount; + ids.resize(new_bound); + block_meta.resize(new_bound); + return uint32_t(curr_bound); +} + +void ParsedIR::remove_typed_id(Types type, uint32_t id) +{ + auto &type_ids = ids_for_type[type]; + type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids)); +} + +void ParsedIR::reset_all_of_type(Types type) +{ + for (auto &id : ids_for_type[type]) + if (ids[id].get_type() == type) + ids[id].reset(); + + ids_for_type[type].clear(); +} + +void ParsedIR::add_typed_id(Types type, uint32_t id) +{ + if (loop_iteration_depth) + SPIRV_CROSS_THROW("Cannot add typed ID while looping over it."); + + switch (type) + { + case TypeConstant: + ids_for_constant_or_variable.push_back(id); + ids_for_constant_or_type.push_back(id); + break; + + case TypeVariable: + ids_for_constant_or_variable.push_back(id); + break; + + case TypeType: + case TypeConstantOp: + ids_for_constant_or_type.push_back(id); + break; + + default: + break; + } + + if (ids[id].empty()) + { + ids_for_type[type].push_back(id); + } + else if (ids[id].get_type() != type) + { + remove_typed_id(ids[id].get_type(), id); + ids_for_type[type].push_back(id); + } +} + +const Meta *ParsedIR::find_meta(uint32_t id) const +{ + auto itr = meta.find(id); + if (itr != end(meta)) + return &itr->second; + else + return nullptr; +} + +Meta *ParsedIR::find_meta(uint32_t id) +{ + auto itr = meta.find(id); + if (itr != end(meta)) + return &itr->second; + else + return nullptr; +} + +} // namespace spirv_cross diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp new file mode 100644 index 0000000..c3c4612 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_parsed_ir.hpp @@ -0,0 +1,186 @@ +/* + * Copyright 2018-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_PARSED_IR_HPP +#define SPIRV_CROSS_PARSED_IR_HPP + +#include "spirv_common.hpp" +#include <stdint.h> +#include <unordered_map> +#include <vector> + +namespace spirv_cross +{ + +// This data structure holds all information needed to perform cross-compilation and reflection. +// It is the output of the Parser, but any implementation could create this structure. +// It is intentionally very "open" and struct-like with some helper functions to deal with decorations. +// Parser is the reference implementation of how this data structure should be filled in. + +class ParsedIR +{ +public: + // Resizes ids, meta and block_meta. + void set_id_bounds(uint32_t bounds); + + // The raw SPIR-V, instructions and opcodes refer to this by offset + count. + std::vector<uint32_t> spirv; + + // Holds various data structures which inherit from IVariant. + std::vector<Variant> ids; + + // Various meta data for IDs, decorations, names, etc. + std::unordered_map<uint32_t, Meta> meta; + + // Holds all IDs which have a certain type. + // This is needed so we can iterate through a specific kind of resource quickly, + // and in-order of module declaration. + std::vector<uint32_t> ids_for_type[TypeCount]; + + // Special purpose lists which contain a union of types. + // This is needed so we can declare specialization constants and structs in an interleaved fashion, + // among other things. + // Constants can be of struct type, and struct array sizes can use specialization constants. + std::vector<uint32_t> ids_for_constant_or_type; + std::vector<uint32_t> ids_for_constant_or_variable; + + // Declared capabilities and extensions in the SPIR-V module. + // Not really used except for reflection at the moment. + std::vector<spv::Capability> declared_capabilities; + std::vector<std::string> declared_extensions; + + // Meta data about blocks. The cross-compiler needs to query if a block is either of these types. + // It is a bitset as there can be more than one tag per block. + enum BlockMetaFlagBits + { + BLOCK_META_LOOP_HEADER_BIT = 1 << 0, + BLOCK_META_CONTINUE_BIT = 1 << 1, + BLOCK_META_LOOP_MERGE_BIT = 1 << 2, + BLOCK_META_SELECTION_MERGE_BIT = 1 << 3, + BLOCK_META_MULTISELECT_MERGE_BIT = 1 << 4 + }; + using BlockMetaFlags = uint8_t; + std::vector<BlockMetaFlags> block_meta; + std::unordered_map<uint32_t, uint32_t> continue_block_to_loop_header; + + // Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction. + // Entry points can therefore be seen as some sort of meta structure. + std::unordered_map<uint32_t, SPIREntryPoint> entry_points; + uint32_t default_entry_point = 0; + + struct Source + { + uint32_t version = 0; + bool es = false; + bool known = false; + bool hlsl = false; + + Source() = default; + }; + + Source source; + + // Decoration handling methods. + // Can be useful for simple "raw" reflection. + // However, most members are here because the Parser needs most of these, + // and might as well just have the whole suite of decoration/name handling in one place. + void set_name(uint32_t id, const std::string &name); + const std::string &get_name(uint32_t id) const; + void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); + bool has_decoration(uint32_t id, spv::Decoration decoration) const; + uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; + const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; + const Bitset &get_decoration_bitset(uint32_t id) const; + void unset_decoration(uint32_t id, spv::Decoration decoration); + + // Decoration handling methods (for members of a struct). + void set_member_name(uint32_t id, uint32_t index, const std::string &name); + const std::string &get_member_name(uint32_t id, uint32_t index) const; + void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + const std::string &argument); + uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; + void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + + void mark_used_as_array_length(uint32_t id); + uint32_t increase_bound_by(uint32_t count); + Bitset get_buffer_block_flags(const SPIRVariable &var) const; + + void add_typed_id(Types type, uint32_t id); + void remove_typed_id(Types type, uint32_t id); + + template <typename T, typename Op> + void for_each_typed_id(const Op &op) + { + loop_iteration_depth++; + for (auto &id : ids_for_type[T::type]) + { + if (ids[id].get_type() == static_cast<Types>(T::type)) + op(id, get<T>(id)); + } + loop_iteration_depth--; + } + + template <typename T, typename Op> + void for_each_typed_id(const Op &op) const + { + for (auto &id : ids_for_type[T::type]) + { + if (ids[id].get_type() == static_cast<Types>(T::type)) + op(id, get<T>(id)); + } + } + + template <typename T> + void reset_all_of_type() + { + reset_all_of_type(static_cast<Types>(T::type)); + } + + void reset_all_of_type(Types type); + + Meta *find_meta(uint32_t id); + const Meta *find_meta(uint32_t id) const; + + const std::string &get_empty_string() const + { + return empty_string; + } + +private: + template <typename T> + T &get(uint32_t id) + { + return variant_get<T>(ids[id]); + } + + template <typename T> + const T &get(uint32_t id) const + { + return variant_get<T>(ids[id]); + } + + uint32_t loop_iteration_depth = 0; + std::string empty_string; + Bitset cleared_bitset; +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_util.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_util.cpp new file mode 100644 index 0000000..58c1ddc --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_util.cpp @@ -0,0 +1,70 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_cross_util.hpp" +#include "spirv_common.hpp" + +using namespace spv; +using namespace spirv_cross; + +namespace spirv_cross_util +{ +void rename_interface_variable(spirv_cross::Compiler &compiler, const std::vector<spirv_cross::Resource> &resources, + uint32_t location, const std::string &name) +{ + for (auto &v : resources) + { + if (!compiler.has_decoration(v.id, spv::DecorationLocation)) + continue; + + auto loc = compiler.get_decoration(v.id, spv::DecorationLocation); + if (loc != location) + continue; + + auto &type = compiler.get_type(v.base_type_id); + + // This is more of a friendly variant. If we need to rename interface variables, we might have to rename + // structs as well and make sure all the names match up. + if (type.basetype == SPIRType::Struct) + { + compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", location)); + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i)); + } + + compiler.set_name(v.id, name); + } +} + +void inherit_combined_sampler_bindings(spirv_cross::Compiler &compiler) +{ + auto &samplers = compiler.get_combined_image_samplers(); + for (auto &s : samplers) + { + if (compiler.has_decoration(s.image_id, spv::DecorationDescriptorSet)) + { + uint32_t set = compiler.get_decoration(s.image_id, spv::DecorationDescriptorSet); + compiler.set_decoration(s.combined_id, spv::DecorationDescriptorSet, set); + } + + if (compiler.has_decoration(s.image_id, spv::DecorationBinding)) + { + uint32_t binding = compiler.get_decoration(s.image_id, spv::DecorationBinding); + compiler.set_decoration(s.combined_id, spv::DecorationBinding, binding); + } + } +} +} // namespace spirv_cross_util diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_util.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_util.hpp new file mode 100644 index 0000000..faf0f48 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_util.hpp @@ -0,0 +1,29 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_UTIL_HPP +#define SPIRV_CROSS_UTIL_HPP + +#include "spirv_cross.hpp" + +namespace spirv_cross_util +{ +void rename_interface_variable(spirv_cross::Compiler &compiler, const std::vector<spirv_cross::Resource> &resources, + uint32_t location, const std::string &name); +void inherit_combined_sampler_bindings(spirv_cross::Compiler &compiler); +} // namespace spirv_cross_util + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp new file mode 100644 index 0000000..f35c7d8 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp @@ -0,0 +1,11451 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_glsl.hpp" +#include "GLSL.std.450.h" +#include "spirv_common.hpp" +#include <algorithm> +#include <assert.h> +#include <cmath> +#include <limits> +#include <locale.h> +#include <utility> + +#ifndef _WIN32 +#include <langinfo.h> +#endif +#include <locale.h> + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +static bool is_unsigned_opcode(Op op) +{ + // Don't have to be exhaustive, only relevant for legacy target checking ... + switch (op) + { + case OpShiftRightLogical: + case OpUGreaterThan: + case OpUGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUConvert: + case OpUDiv: + case OpUMod: + case OpUMulExtended: + case OpConvertUToF: + case OpConvertFToU: + return true; + + default: + return false; + } +} + +static bool is_unsigned_glsl_opcode(GLSLstd450 op) +{ + // Don't have to be exhaustive, only relevant for legacy target checking ... + switch (op) + { + case GLSLstd450UClamp: + case GLSLstd450UMin: + case GLSLstd450UMax: + case GLSLstd450FindUMsb: + return true; + + default: + return false; + } +} + +static bool packing_is_vec4_padded(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + case BufferPackingStd140: + case BufferPackingStd140EnhancedLayout: + return true; + + default: + return false; + } +} + +static bool packing_is_hlsl(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + return true; + + default: + return false; + } +} + +static bool packing_has_flexible_offset(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140: + case BufferPackingStd430: + case BufferPackingHLSLCbuffer: + return false; + + default: + return true; + } +} + +static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140EnhancedLayout: + return BufferPackingStd140; + case BufferPackingStd430EnhancedLayout: + return BufferPackingStd430; + case BufferPackingHLSLCbufferPackOffset: + return BufferPackingHLSLCbuffer; + default: + return packing; + } +} + +// Sanitizes underscores for GLSL where multiple underscores in a row are not allowed. +string CompilerGLSL::sanitize_underscores(const string &str) +{ + string res; + res.reserve(str.size()); + + bool last_underscore = false; + for (auto c : str) + { + if (c == '_') + { + if (last_underscore) + continue; + + res += c; + last_underscore = true; + } + else + { + res += c; + last_underscore = false; + } + } + return res; +} + +void CompilerGLSL::init() +{ + if (ir.source.known) + { + options.es = ir.source.es; + options.version = ir.source.version; + } + + // Query the locale to see what the decimal point is. + // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale + // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather + // tricky. +#ifdef _WIN32 + // On Windows, localeconv uses thread-local storage, so it should be fine. + const struct lconv *conv = localeconv(); + if (conv && conv->decimal_point) + current_locale_radix_character = *conv->decimal_point; +#elif defined(__ANDROID__) && __ANDROID_API__ < 26 + // nl_langinfo is not supported on this platform, fall back to the worse alternative. + const struct lconv *conv = localeconv(); + if (conv && conv->decimal_point) + current_locale_radix_character = *conv->decimal_point; +#else + // localeconv, the portable function is not MT safe ... + const char *decimal_point = nl_langinfo(RADIXCHAR); + if (decimal_point && *decimal_point != '\0') + current_locale_radix_character = *decimal_point; +#endif +} + +static const char *to_pls_layout(PlsFormat format) +{ + switch (format) + { + case PlsR11FG11FB10F: + return "layout(r11f_g11f_b10f) "; + case PlsR32F: + return "layout(r32f) "; + case PlsRG16F: + return "layout(rg16f) "; + case PlsRGB10A2: + return "layout(rgb10_a2) "; + case PlsRGBA8: + return "layout(rgba8) "; + case PlsRG16: + return "layout(rg16) "; + case PlsRGBA8I: + return "layout(rgba8i)"; + case PlsRG16I: + return "layout(rg16i) "; + case PlsRGB10A2UI: + return "layout(rgb10_a2ui) "; + case PlsRGBA8UI: + return "layout(rgba8ui) "; + case PlsRG16UI: + return "layout(rg16ui) "; + case PlsR32UI: + return "layout(r32ui) "; + default: + return ""; + } +} + +static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) +{ + switch (format) + { + default: + case PlsR11FG11FB10F: + case PlsR32F: + case PlsRG16F: + case PlsRGB10A2: + case PlsRGBA8: + case PlsRG16: + return SPIRType::Float; + + case PlsRGBA8I: + case PlsRG16I: + return SPIRType::Int; + + case PlsRGB10A2UI: + case PlsRGBA8UI: + case PlsRG16UI: + case PlsR32UI: + return SPIRType::UInt; + } +} + +static uint32_t pls_format_to_components(PlsFormat format) +{ + switch (format) + { + default: + case PlsR32F: + case PlsR32UI: + return 1; + + case PlsRG16F: + case PlsRG16: + case PlsRG16UI: + case PlsRG16I: + return 2; + + case PlsR11FG11FB10F: + return 3; + + case PlsRGB10A2: + case PlsRGBA8: + case PlsRGBA8I: + case PlsRGB10A2UI: + case PlsRGBA8UI: + return 4; + } +} + +static const char *vector_swizzle(int vecsize, int index) +{ + static const char *swizzle[4][4] = { + { ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw" }, { ".xyz", ".yzw" }, { "" } + }; + + assert(vecsize >= 1 && vecsize <= 4); + assert(index >= 0 && index < 4); + assert(swizzle[vecsize - 1][index]); + + return swizzle[vecsize - 1][index]; +} + +void CompilerGLSL::reset() +{ + // We do some speculative optimizations which should pretty much always work out, + // but just in case the SPIR-V is rather weird, recompile until it's happy. + // This typically only means one extra pass. + force_recompile = false; + + // Clear invalid expression tracking. + invalid_expressions.clear(); + current_function = nullptr; + + // Clear temporary usage tracking. + expression_usage_counts.clear(); + forwarded_temporaries.clear(); + + reset_name_caches(); + + ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) { + func.active = false; + func.flush_undeclared = true; + }); + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); + + ir.reset_all_of_type<SPIRExpression>(); + ir.reset_all_of_type<SPIRAccessChain>(); + + statement_count = 0; + indent = 0; +} + +void CompilerGLSL::remap_pls_variables() +{ + for (auto &input : pls_inputs) + { + auto &var = get<SPIRVariable>(input.id); + + bool input_is_target = false; + if (var.storage == StorageClassUniformConstant) + { + auto &type = get<SPIRType>(var.basetype); + input_is_target = type.image.dim == DimSubpassData; + } + + if (var.storage != StorageClassInput && !input_is_target) + SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); + var.remapped_variable = true; + } + + for (auto &output : pls_outputs) + { + auto &var = get<SPIRVariable>(output.id); + if (var.storage != StorageClassOutput) + SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); + var.remapped_variable = true; + } +} + +void CompilerGLSL::find_static_extensions() +{ + ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) { + if (type.basetype == SPIRType::Double) + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); + } + else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) + { + if (options.es) + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile."); + if (!options.es) + require_extension_internal("GL_ARB_gpu_shader_int64"); + } + else if (type.basetype == SPIRType::Half) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_8bit_storage"); + } + else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + }); + + auto &execution = get_entry_point(); + switch (execution.model) + { + case ExecutionModelGLCompute: + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_compute_shader"); + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); + break; + + case ExecutionModelGeometry: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_geometry_shader"); + if (!options.es && options.version < 150) + require_extension_internal("GL_ARB_geometry_shader4"); + + if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1) + { + // Instanced GS is part of 400 core or this extension. + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_gpu_shader5"); + } + break; + + case ExecutionModelTessellationEvaluation: + case ExecutionModelTessellationControl: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_tessellation_shader"); + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_tessellation_shader"); + break; + + case ExecutionModelRayGenerationNV: + case ExecutionModelIntersectionNV: + case ExecutionModelAnyHitNV: + case ExecutionModelClosestHitNV: + case ExecutionModelMissNV: + case ExecutionModelCallableNV: + if (options.es || options.version < 460) + SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above."); + require_extension_internal("GL_NV_ray_tracing"); + break; + + default: + break; + } + + if (!pls_inputs.empty() || !pls_outputs.empty()) + require_extension_internal("GL_EXT_shader_pixel_local_storage"); + + if (options.separate_shader_objects && !options.es && options.version < 410) + require_extension_internal("GL_ARB_separate_shader_objects"); +} + +string CompilerGLSL::compile() +{ + if (options.vulkan_semantics) + backend.allow_precision_qualifiers = true; + backend.force_gl_in_out_block = true; + backend.supports_extensions = true; + + // Scan the SPIR-V to find trivial uses of extensions. + build_function_control_flow_graphs_and_analyze(); + find_static_extensions(); + fixup_image_load_store_access(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr<ostringstream>(new ostringstream()); + + emit_header(); + emit_resources(); + + emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset()); + + pass_count++; + } while (force_recompile); + + // Entry point in GLSL is always main(). + get_entry_point().name = "main"; + + return buffer->str(); +} + +std::string CompilerGLSL::get_partial_source() +{ + return buffer ? buffer->str() : "No compiled source available yet."; +} + +void CompilerGLSL::build_workgroup_size(vector<string> &arguments, const SpecializationConstant &wg_x, + const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) +{ + auto &execution = get_entry_point(); + + if (wg_x.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_x_id = ", wg_x.constant_id)); + else + arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name)); + } + else + arguments.push_back(join("local_size_x = ", execution.workgroup_size.x)); + + if (wg_y.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_y_id = ", wg_y.constant_id)); + else + arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name)); + } + else + arguments.push_back(join("local_size_y = ", execution.workgroup_size.y)); + + if (wg_z.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_z_id = ", wg_z.constant_id)); + else + arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name)); + } + else + arguments.push_back(join("local_size_z = ", execution.workgroup_size.z)); +} + +void CompilerGLSL::emit_header() +{ + auto &execution = get_entry_point(); + statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); + + if (!options.es && options.version < 420) + { + // Needed for binding = # on UBOs, etc. + if (options.enable_420pack_extension) + { + statement("#ifdef GL_ARB_shading_language_420pack"); + statement("#extension GL_ARB_shading_language_420pack : require"); + statement("#endif"); + } + // Needed for: layout(early_fragment_tests) in; + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + require_extension_internal("GL_ARB_shader_image_load_store"); + } + + for (auto &ext : forced_extensions) + { + if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") + { + // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. + // GL_AMD_gpu_shader_half_float is a superset, so try that first. + statement("#if defined(GL_AMD_gpu_shader_half_float)"); + statement("#extension GL_AMD_gpu_shader_half_float : require"); + if (!options.vulkan_semantics) + { + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + } + else + { + statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require"); + } + statement("#else"); + statement("#error No extension available for FP16."); + statement("#endif"); + } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") + { + if (options.vulkan_semantics) + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); + else + { + statement("#if defined(GL_AMD_gpu_shader_int16)"); + statement("#extension GL_AMD_gpu_shader_int16 : require"); + statement("#else"); + statement("#error No extension available for Int16."); + statement("#endif"); + } + } + else + statement("#extension ", ext, " : require"); + } + + for (auto &header : header_lines) + statement(header); + + vector<string> inputs; + vector<string> outputs; + + switch (execution.model) + { + case ExecutionModelGeometry: + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1) + inputs.push_back(join("invocations = ", execution.invocations)); + if (execution.flags.get(ExecutionModeInputPoints)) + inputs.push_back("points"); + if (execution.flags.get(ExecutionModeInputLines)) + inputs.push_back("lines"); + if (execution.flags.get(ExecutionModeInputLinesAdjacency)) + inputs.push_back("lines_adjacency"); + if (execution.flags.get(ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags.get(ExecutionModeInputTrianglesAdjacency)) + inputs.push_back("triangles_adjacency"); + if (execution.flags.get(ExecutionModeOutputTriangleStrip)) + outputs.push_back("triangle_strip"); + if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + if (execution.flags.get(ExecutionModeOutputLineStrip)) + outputs.push_back("line_strip"); + break; + + case ExecutionModelTessellationControl: + if (execution.flags.get(ExecutionModeOutputVertices)) + outputs.push_back(join("vertices = ", execution.output_vertices)); + break; + + case ExecutionModelTessellationEvaluation: + if (execution.flags.get(ExecutionModeQuads)) + inputs.push_back("quads"); + if (execution.flags.get(ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags.get(ExecutionModeIsolines)) + inputs.push_back("isolines"); + if (execution.flags.get(ExecutionModePointMode)) + inputs.push_back("point_mode"); + + if (!execution.flags.get(ExecutionModeIsolines)) + { + if (execution.flags.get(ExecutionModeVertexOrderCw)) + inputs.push_back("cw"); + if (execution.flags.get(ExecutionModeVertexOrderCcw)) + inputs.push_back("ccw"); + } + + if (execution.flags.get(ExecutionModeSpacingFractionalEven)) + inputs.push_back("fractional_even_spacing"); + if (execution.flags.get(ExecutionModeSpacingFractionalOdd)) + inputs.push_back("fractional_odd_spacing"); + if (execution.flags.get(ExecutionModeSpacingEqual)) + inputs.push_back("equal_spacing"); + break; + + case ExecutionModelGLCompute: + { + if (execution.workgroup_size.constant != 0) + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro + // declarations before we can emit the work group size. + if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0))) + build_workgroup_size(inputs, wg_x, wg_y, wg_z); + } + else + { + inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); + inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); + inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); + } + break; + } + + case ExecutionModelFragment: + if (options.es) + { + switch (options.fragment.default_float_precision) + { + case Options::Lowp: + statement("precision lowp float;"); + break; + + case Options::Mediump: + statement("precision mediump float;"); + break; + + case Options::Highp: + statement("precision highp float;"); + break; + + default: + break; + } + + switch (options.fragment.default_int_precision) + { + case Options::Lowp: + statement("precision lowp int;"); + break; + + case Options::Mediump: + statement("precision mediump int;"); + break; + + case Options::Highp: + statement("precision highp int;"); + break; + + default: + break; + } + } + + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + inputs.push_back("early_fragment_tests"); + + if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) + statement("layout(depth_greater) out float gl_FragDepth;"); + else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) + statement("layout(depth_less) out float gl_FragDepth;"); + + break; + + default: + break; + } + + if (!inputs.empty()) + statement("layout(", merge(inputs), ") in;"); + if (!outputs.empty()) + statement("layout(", merge(outputs), ") out;"); + + statement(""); +} + +bool CompilerGLSL::type_is_empty(const SPIRType &type) +{ + return type.basetype == SPIRType::Struct && type.member_types.empty(); +} + +void CompilerGLSL::emit_struct(SPIRType &type) +{ + // Struct types can be stamped out multiple times + // with just different offsets, matrix layouts, etc ... + // Type-punning with these types is legal, which complicates things + // when we are storing struct and array types in an SSBO for example. + // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. + if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationPacked)) + return; + + add_resource_name(type.self); + auto name = type_to_glsl(type); + + statement(!backend.explicit_struct_type ? "struct " : "", name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + bool emitted = false; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + emitted = true; + } + + // Don't declare empty structs in GLSL, this is not allowed. + if (type_is_empty(type) && !backend.supports_empty_struct) + { + statement("int empty_struct_member;"); + emitted = true; + } + + end_scope_decl(); + + if (emitted) + statement(""); +} + +string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) +{ + string res; + //if (flags & (1ull << DecorationSmooth)) + // res += "smooth "; + if (flags.get(DecorationFlat)) + res += "flat "; + if (flags.get(DecorationNoPerspective)) + res += "noperspective "; + if (flags.get(DecorationCentroid)) + res += "centroid "; + if (flags.get(DecorationPatch)) + res += "patch "; + if (flags.get(DecorationSample)) + res += "sample "; + if (flags.get(DecorationInvariant)) + res += "invariant "; + if (flags.get(DecorationExplicitInterpAMD)) + res += "__explicitInterpAMD "; + + return res; +} + +string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) +{ + if (is_legacy()) + return ""; + + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + if (!is_block) + return ""; + + auto &memb = ir.meta[type.self].members; + if (index >= memb.size()) + return ""; + auto &dec = memb[index]; + + vector<string> attr; + + // We can only apply layouts on members in block interfaces. + // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. + // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct + // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. + // + // We would like to go from (SPIR-V style): + // + // struct Foo { layout(row_major) mat4 matrix; }; + // buffer UBO { Foo foo; }; + // + // to + // + // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. + // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. + auto flags = combined_decoration_for_member(type, index); + + if (flags.get(DecorationRowMajor)) + attr.push_back("row_major"); + // We don't emit any global layouts, so column_major is default. + //if (flags & (1ull << DecorationColMajor)) + // attr.push_back("column_major"); + + if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true)) + attr.push_back(join("location = ", dec.location)); + + // Can only declare component if we can declare location. + if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true)) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); + attr.push_back(join("component = ", dec.component)); + } + else + SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); + } + + // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. + // This is only done selectively in GLSL as needed. + if (has_extended_decoration(type.self, SPIRVCrossDecorationPacked) && dec.decoration_flags.get(DecorationOffset)) + attr.push_back(join("offset = ", dec.offset)); + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) +{ + if (options.es && is_desktop_only_format(format)) + SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); + + switch (format) + { + case ImageFormatRgba32f: + return "rgba32f"; + case ImageFormatRgba16f: + return "rgba16f"; + case ImageFormatR32f: + return "r32f"; + case ImageFormatRgba8: + return "rgba8"; + case ImageFormatRgba8Snorm: + return "rgba8_snorm"; + case ImageFormatRg32f: + return "rg32f"; + case ImageFormatRg16f: + return "rg16f"; + case ImageFormatRgba32i: + return "rgba32i"; + case ImageFormatRgba16i: + return "rgba16i"; + case ImageFormatR32i: + return "r32i"; + case ImageFormatRgba8i: + return "rgba8i"; + case ImageFormatRg32i: + return "rg32i"; + case ImageFormatRg16i: + return "rg16i"; + case ImageFormatRgba32ui: + return "rgba32ui"; + case ImageFormatRgba16ui: + return "rgba16ui"; + case ImageFormatR32ui: + return "r32ui"; + case ImageFormatRgba8ui: + return "rgba8ui"; + case ImageFormatRg32ui: + return "rg32ui"; + case ImageFormatRg16ui: + return "rg16ui"; + case ImageFormatR11fG11fB10f: + return "r11f_g11f_b10f"; + case ImageFormatR16f: + return "r16f"; + case ImageFormatRgb10A2: + return "rgb10_a2"; + case ImageFormatR8: + return "r8"; + case ImageFormatRg8: + return "rg8"; + case ImageFormatR16: + return "r16"; + case ImageFormatRg16: + return "rg16"; + case ImageFormatRgba16: + return "rgba16"; + case ImageFormatR16Snorm: + return "r16_snorm"; + case ImageFormatRg16Snorm: + return "rg16_snorm"; + case ImageFormatRgba16Snorm: + return "rgba16_snorm"; + case ImageFormatR8Snorm: + return "r8_snorm"; + case ImageFormatRg8Snorm: + return "rg8_snorm"; + case ImageFormatR8ui: + return "r8ui"; + case ImageFormatRg8ui: + return "rg8ui"; + case ImageFormatR16ui: + return "r16ui"; + case ImageFormatRgb10a2ui: + return "rgb10_a2ui"; + case ImageFormatR8i: + return "r8i"; + case ImageFormatRg8i: + return "rg8i"; + case ImageFormatR16i: + return "r16i"; + default: + case ImageFormatUnknown: + return nullptr; + } +} + +uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) +{ + switch (type.basetype) + { + case SPIRType::Double: + case SPIRType::Int64: + case SPIRType::UInt64: + return 8; + case SPIRType::Float: + case SPIRType::Int: + case SPIRType::UInt: + return 4; + case SPIRType::Half: + case SPIRType::Short: + case SPIRType::UShort: + return 2; + case SPIRType::SByte: + case SPIRType::UByte: + return 1; + + default: + SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size."); + } +} + +uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, + BufferPackingStandard packing) +{ + if (!type.array.empty()) + { + uint32_t minimum_alignment = 1; + if (packing_is_vec4_padded(packing)) + minimum_alignment = 16; + + auto *tmp = &get<SPIRType>(type.parent_type); + while (!tmp->array.empty()) + tmp = &get<SPIRType>(tmp->parent_type); + + // Get the alignment of the base type, then maybe round up. + return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing)); + } + + if (type.basetype == SPIRType::Struct) + { + // Rule 9. Structs alignments are maximum alignment of its members. + uint32_t alignment = 1; + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + alignment = + max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing)); + } + + // In std140, struct alignment is rounded up to 16. + if (packing_is_vec4_padded(packing)) + alignment = max(alignment, 16u); + + return alignment; + } + else + { + const uint32_t base_alignment = type_to_packed_base_size(type, packing); + + // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle + // a vec4, this is handled outside since that part knows our current offset. + if (type.columns == 1 && packing_is_hlsl(packing)) + return base_alignment; + + // From 7.6.2.2 in GL 4.5 core spec. + // Rule 1 + if (type.vecsize == 1 && type.columns == 1) + return base_alignment; + + // Rule 2 + if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) + return type.vecsize * base_alignment; + + // Rule 3 + if (type.vecsize == 3 && type.columns == 1) + return 4 * base_alignment; + + // Rule 4 implied. Alignment does not change in std430. + + // Rule 5. Column-major matrices are stored as arrays of + // vectors. + if (flags.get(DecorationColMajor) && type.columns > 1) + { + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.vecsize == 3) + return 4 * base_alignment; + else + return type.vecsize * base_alignment; + } + + // Rule 6 implied. + + // Rule 7. + if (flags.get(DecorationRowMajor) && type.vecsize > 1) + { + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.columns == 3) + return 4 * base_alignment; + else + return type.columns * base_alignment; + } + + // Rule 8 implied. + } + + SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); +} + +uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, + BufferPackingStandard packing) +{ + // Array stride is equal to aligned size of the underlying type. + uint32_t parent = type.parent_type; + assert(parent); + + auto &tmp = get<SPIRType>(parent); + + uint32_t size = type_to_packed_size(tmp, flags, packing); + if (tmp.array.empty()) + { + uint32_t alignment = type_to_packed_alignment(type, flags, packing); + return (size + alignment - 1) & ~(alignment - 1); + } + else + { + // For multidimensional arrays, array stride always matches size of subtype. + // The alignment cannot change because multidimensional arrays are basically N * M array elements. + return size; + } +} + +uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) +{ + if (!type.array.empty()) + { + return to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); + } + + uint32_t size = 0; + + if (type.basetype == SPIRType::Struct) + { + uint32_t pad_alignment = 1; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + auto &member_type = get<SPIRType>(type.member_types[i]); + + uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing); + uint32_t alignment = max(packed_alignment, pad_alignment); + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (member_type.basetype == SPIRType::Struct) + pad_alignment = packed_alignment; + else + pad_alignment = 1; + + size = (size + alignment - 1) & ~(alignment - 1); + size += type_to_packed_size(member_type, member_flags, packing); + } + } + else + { + const uint32_t base_alignment = type_to_packed_base_size(type, packing); + + if (type.columns == 1) + size = type.vecsize * base_alignment; + + if (flags.get(DecorationColMajor) && type.columns > 1) + { + if (packing_is_vec4_padded(packing)) + size = type.columns * 4 * base_alignment; + else if (type.vecsize == 3) + size = type.columns * 4 * base_alignment; + else + size = type.columns * type.vecsize * base_alignment; + } + + if (flags.get(DecorationRowMajor) && type.vecsize > 1) + { + if (packing_is_vec4_padded(packing)) + size = type.vecsize * 4 * base_alignment; + else if (type.columns == 3) + size = type.vecsize * 4 * base_alignment; + else + size = type.vecsize * type.columns * base_alignment; + } + } + + return size; +} + +bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, + uint32_t start_offset, uint32_t end_offset) +{ + // This is very tricky and error prone, but try to be exhaustive and correct here. + // SPIR-V doesn't directly say if we're using std430 or std140. + // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), + // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. + // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). + // + // It is almost certain that we're using std430, but it gets tricky with arrays in particular. + // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. + // + // The only two differences between std140 and std430 are related to padding alignment/array stride + // in arrays and structs. In std140 they take minimum vec4 alignment. + // std430 only removes the vec4 requirement. + + uint32_t offset = 0; + uint32_t pad_alignment = 1; + + bool is_top_level_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto &memb_type = get<SPIRType>(type.member_types[i]); + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + + // Verify alignment rules. + uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); + + // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g: + // layout(constant_id = 0) const int s = 10; + // const int S = s + 5; // SpecConstantOp + // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here, + // we would need full implementation of compile-time constant folding. :( + // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant + // for our analysis (e.g. unsized arrays). + // This lets us simply ignore that there are spec constant op sized arrays in our buffers. + // Querying size of this member will fail, so just don't call it unless we have to. + // + // This is likely "best effort" we can support without going into unacceptably complicated workarounds. + bool member_can_be_unsized = + is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); + + uint32_t packed_size = 0; + if (!member_can_be_unsized) + packed_size = type_to_packed_size(memb_type, member_flags, packing); + + // We only need to care about this if we have non-array types which can straddle the vec4 boundary. + if (packing_is_hlsl(packing)) + { + // If a member straddles across a vec4 boundary, alignment is actually vec4. + uint32_t begin_word = offset / 16; + uint32_t end_word = (offset + packed_size - 1) / 16; + if (begin_word != end_word) + packed_alignment = max(packed_alignment, 16u); + } + + uint32_t alignment = max(packed_alignment, pad_alignment); + offset = (offset + alignment - 1) & ~(alignment - 1); + + // Field is not in the specified range anymore and we can ignore any further fields. + if (offset >= end_offset) + break; + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (memb_type.basetype == SPIRType::Struct) + pad_alignment = packed_alignment; + else + pad_alignment = 1; + + // Only care about packing if we are in the given range + if (offset >= start_offset) + { + // We only care about offsets in std140, std430, etc ... + // For EnhancedLayout variants, we have the flexibility to choose our own offsets. + if (!packing_has_flexible_offset(packing)) + { + uint32_t actual_offset = type_struct_member_offset(type, i); + if (actual_offset != offset) // This cannot be the packing we're looking for. + return false; + } + + // Verify array stride rules. + if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != + type_struct_member_array_stride(type, i)) + return false; + + // Verify that sub-structs also follow packing rules. + // We cannot use enhanced layouts on substructs, so they better be up to spec. + auto substruct_packing = packing_to_substruct_packing(packing); + + if (!memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing)) + return false; + } + + // Bump size. + offset += packed_size; + } + + return true; +} + +bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block) +{ + // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. + // Be very explicit here about how to solve the issue. + if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) + { + uint32_t minimum_desktop_version = block ? 440 : 410; + // ARB_enhanced_layouts vs ARB_separate_shader_objects ... + + if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects) + return false; + else if (options.es && options.version < 310) + return false; + } + + if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) + { + if (options.es && options.version < 300) + return false; + else if (!options.es && options.version < 330) + return false; + } + + if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant) + { + if (options.es && options.version < 310) + return false; + else if (!options.es && options.version < 430) + return false; + } + + return true; +} + +string CompilerGLSL::layout_for_variable(const SPIRVariable &var) +{ + // FIXME: Come up with a better solution for when to disable layouts. + // Having layouts depend on extensions as well as which types + // of layouts are used. For now, the simple solution is to just disable + // layouts for legacy versions. + if (is_legacy()) + return ""; + + vector<string> attr; + + auto &dec = ir.meta[var.self].decoration; + auto &type = get<SPIRType>(var.basetype); + auto &flags = dec.decoration_flags; + auto typeflags = ir.meta[type.self].decoration.decoration_flags; + + if (options.vulkan_semantics && var.storage == StorageClassPushConstant) + attr.push_back("push_constant"); + + if (flags.get(DecorationRowMajor)) + attr.push_back("row_major"); + if (flags.get(DecorationColMajor)) + attr.push_back("column_major"); + + if (options.vulkan_semantics) + { + if (flags.get(DecorationInputAttachmentIndex)) + attr.push_back(join("input_attachment_index = ", dec.input_attachment)); + } + + bool is_block = has_decoration(type.self, DecorationBlock); + if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block)) + { + Bitset combined_decoration; + for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++) + combined_decoration.merge_or(combined_decoration_for_member(type, i)); + + // If our members have location decorations, we don't need to + // emit location decorations at the top as well (looks weird). + if (!combined_decoration.get(DecorationLocation)) + attr.push_back(join("location = ", dec.location)); + } + + // Can only declare Component if we can declare location. + if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block)) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); + attr.push_back(join("component = ", dec.component)); + } + else + SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); + } + + if (flags.get(DecorationIndex)) + attr.push_back(join("index = ", dec.index)); + + // Do not emit set = decoration in regular GLSL output, but + // we need to preserve it in Vulkan GLSL mode. + if (var.storage != StorageClassPushConstant) + { + if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics) + attr.push_back(join("set = ", dec.set)); + } + + // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ... + bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140); + + bool can_use_binding; + if (options.es) + can_use_binding = options.version >= 310; + else + can_use_binding = options.enable_420pack_extension || (options.version >= 420); + + // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30. + if (!can_use_buffer_blocks && var.storage == StorageClassUniform) + can_use_binding = false; + + if (can_use_binding && flags.get(DecorationBinding)) + attr.push_back(join("binding = ", dec.binding)); + + if (flags.get(DecorationOffset)) + attr.push_back(join("offset = ", dec.offset)); + + bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; + bool ssbo_block = var.storage == StorageClassStorageBuffer || + (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); + bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; + bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock); + + // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. + // If SPIR-V does not comply with either layout, we cannot really work around it. + if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) + { + if (buffer_is_packing_standard(type, BufferPackingStd140)) + attr.push_back("std140"); + else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + { + attr.push_back("std140"); + // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, + // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. + // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Uniform buffer block cannot be expressed as std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + + // This is a very last minute to check for this, but use this unused decoration to mark that we should emit + // explicit offsets for this block type. + // layout_for_variable() will be called before the actual buffer emit. + // The alternative is a full pass before codegen where we deduce this decoration, + // but then we are just doing the exact same work twice, and more complexity. + set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + } + else + { + SPIRV_CROSS_THROW("Uniform buffer cannot be expressed as std140, even with enhanced layouts. You can try " + "flattening this block to " + "support a more flexible layout."); + } + } + else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) + { + if (buffer_is_packing_standard(type, BufferPackingStd430)) + attr.push_back("std430"); + else if (buffer_is_packing_standard(type, BufferPackingStd140)) + attr.push_back("std140"); + else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + { + attr.push_back("std140"); + + // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, + // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. + // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + + set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + } + else if (buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + { + attr.push_back("std430"); + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + + set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + } + else + { + SPIRV_CROSS_THROW("Buffer block cannot be expressed as neither std430 nor std140, even with enhanced " + "layouts. You can try flattening this block to support a more flexible layout."); + } + } + + // For images, the type itself adds a layout qualifer. + // Only emit the format for storage images. + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + { + const char *fmt = format_to_glsl(type.image.format); + if (fmt) + attr.push_back(fmt); + } + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) +{ + if (flattened_buffer_blocks.count(var.self)) + emit_buffer_block_flattened(var); + else if (options.vulkan_semantics) + emit_push_constant_block_vulkan(var); + else if (options.emit_push_constant_as_uniform_buffer) + emit_buffer_block_native(var); + else + emit_push_constant_block_glsl(var); +} + +void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) +{ + emit_buffer_block(var); +} + +void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) +{ + // OpenGL has no concept of push constant blocks, implement it as a uniform struct. + auto &type = get<SPIRType>(var.basetype); + + auto &flags = ir.meta[var.self].decoration.decoration_flags; + flags.clear(DecorationBinding); + flags.clear(DecorationDescriptorSet); + +#if 0 + if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) + SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); +#endif + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + auto &block_flags = ir.meta[type.self].decoration.decoration_flags; + bool block_flag = block_flags.get(DecorationBlock); + block_flags.clear(DecorationBlock); + + emit_struct(type); + + if (block_flag) + block_flags.set(DecorationBlock); + + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) +{ + if (flattened_buffer_blocks.count(var.self)) + emit_buffer_block_flattened(var); + else if (is_legacy() || (!options.es && options.version == 130)) + emit_buffer_block_legacy(var); + else + emit_buffer_block_native(var); +} + +void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + bool ssbo = var.storage == StorageClassStorageBuffer || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + if (ssbo) + SPIRV_CROSS_THROW("SSBOs not supported in legacy targets."); + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + auto &block_flags = ir.meta[type.self].decoration.decoration_flags; + bool block_flag = block_flags.get(DecorationBlock); + block_flags.clear(DecorationBlock); + emit_struct(type); + if (block_flag) + block_flags.set(DecorationBlock); + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + + Bitset flags = ir.get_buffer_block_flags(var); + bool ssbo = var.storage == StorageClassStorageBuffer || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + bool is_restrict = ssbo && flags.get(DecorationRestrict); + bool is_writeonly = ssbo && flags.get(DecorationNonReadable); + bool is_readonly = ssbo && flags.get(DecorationNonWritable); + bool is_coherent = ssbo && flags.get(DecorationCoherent); + + // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... + auto buffer_name = to_name(type.self, false); + + auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names; + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + // If we have a collision for any reason, just fallback immediately. + if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) || + resource_names.find(buffer_name) != end(resource_names)) + { + buffer_name = get_block_fallback_name(var.self); + } + + // Make sure we get something unique for both global name scope and block name scope. + // See GLSL 4.5 spec: section 4.3.9 for details. + add_variable(block_namespace, resource_names, buffer_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. + if (buffer_name.empty()) + buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self); + + block_names.insert(buffer_name); + block_namespace.insert(buffer_name); + + // Save for post-reflection later. + declared_block_names[var.self] = buffer_name; + + statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "", + is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", + buffer_name); + + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + + // var.self can be used as a backup name for the block name, + // so we need to make sure we don't disturb the name here on a recompile. + // It will need to be reset if we have to recompile. + preserve_alias_on_reset(var.self); + add_resource_name(var.self); + end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); + statement(""); +} + +void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + + // Block names should never alias. + auto buffer_name = to_name(type.self, false); + size_t buffer_size = (get_declared_struct_size(type) + 15) / 16; + + SPIRType::BaseType basic_type; + if (get_common_basic_type(type, basic_type)) + { + SPIRType tmp; + tmp.basetype = basic_type; + tmp.vecsize = 4; + if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) + SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); + + auto flags = ir.get_buffer_block_flags(var); + statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", + buffer_size, "];"); + } + else + SPIRV_CROSS_THROW("All basic types in a flattened block must be the same."); +} + +const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) +{ + auto &execution = get_entry_point(); + + if (var.storage == StorageClassInput || var.storage == StorageClassOutput) + { + if (is_legacy() && execution.model == ExecutionModelVertex) + return var.storage == StorageClassInput ? "attribute " : "varying "; + else if (is_legacy() && execution.model == ExecutionModelFragment) + return "varying "; // Fragment outputs are renamed so they never hit this case. + else + return var.storage == StorageClassInput ? "in " : "out "; + } + else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || + var.storage == StorageClassPushConstant) + { + return "uniform "; + } + else if (var.storage == StorageClassRayPayloadNV) + { + return "rayPayloadNV "; + } + else if (var.storage == StorageClassIncomingRayPayloadNV) + { + return "rayPayloadInNV "; + } + else if (var.storage == StorageClassHitAttributeNV) + { + return "hitAttributeNV "; + } + + return ""; +} + +void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) +{ + auto &type = get<SPIRType>(var.basetype); + if (!type.array.empty()) + SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); + + auto old_flags = ir.meta[type.self].decoration.decoration_flags; + // Emit the members as if they are part of a block to get all qualifiers. + ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + auto &membertype = get<SPIRType>(member); + + if (membertype.basetype == SPIRType::Struct) + SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables."); + + // Pass in the varying qualifier here so it will appear in the correct declaration order. + // Replace member name while emitting it so it encodes both struct name and member name. + // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, + // which is not allowed. + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name))); + emit_struct_member(type, member, i, qual); + // Restore member name. + set_member_name(type.self, i, member_name); + i++; + } + + ir.meta[type.self].decoration.decoration_flags = old_flags; + + // Treat this variable as flattened from now on. + flattened_structs.insert(var.self); +} + +void CompilerGLSL::emit_interface_block(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + + // Either make it plain in/out or in/out blocks depending on what shader is doing ... + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + const char *qual = to_storage_qualifiers_glsl(var); + + if (block) + { + // ESSL earlier than 310 and GLSL earlier than 150 did not support + // I/O variables which are struct types. + // To support this, flatten the struct into separate varyings instead. + if ((options.es && options.version < 310) || (!options.es && options.version < 150)) + { + // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. + // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). + emit_flattened_io_block(var, qual); + } + else + { + if (options.es && options.version < 320) + { + // Geometry and tessellation extensions imply this extension. + if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader")) + require_extension_internal("GL_EXT_shader_io_blocks"); + } + + // Block names should never alias. + auto block_name = to_name(type.self, false); + + // The namespace for I/O blocks is separate from other variables in GLSL. + auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names; + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace)) + block_name = get_fallback_name(type.self); + else + block_namespace.insert(block_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + if (block_name.empty()) + block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self); + + // Instance names cannot alias block names. + resource_names.insert(block_name); + + statement(layout_for_variable(var), qual, block_name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + + add_resource_name(var.self); + end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); + statement(""); + } + } + else + { + // ESSL earlier than 310 and GLSL earlier than 150 did not support + // I/O variables which are struct types. + // To support this, flatten the struct into separate varyings instead. + if (type.basetype == SPIRType::Struct && + ((options.es && options.version < 310) || (!options.es && options.version < 150))) + { + emit_flattened_io_block(var, qual); + } + else + { + add_resource_name(var.self); + statement(layout_for_variable(var), to_qualifiers_glsl(var.self), + variable_decl(type, to_name(var.self), var.self), ";"); + + // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). + if (var.storage == StorageClassOutput && var.initializer) + { + auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point); + entry_func.fixup_hooks_in.push_back( + [&]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); }); + } + } + } +} + +void CompilerGLSL::emit_uniform(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + { + if (!options.es && options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); + } + + add_resource_name(var.self); + statement(layout_for_variable(var), variable_decl(var), ";"); +} + +string CompilerGLSL::constant_value_macro_name(uint32_t id) +{ + return join("SPIRV_CROSS_CONSTANT_ID_", id); +} + +void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) +{ + auto &type = get<SPIRType>(constant.basetype); + auto name = to_name(constant.self); + statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); +} + +void CompilerGLSL::emit_constant(const SPIRConstant &constant) +{ + auto &type = get<SPIRType>(constant.constant_type); + auto name = to_name(constant.self); + + SpecializationConstant wg_x, wg_y, wg_z; + uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + // This specialization constant is implicitly declared by emitting layout() in; + if (constant.self == workgroup_size_id) + return; + + // These specialization constants are implicitly declared by emitting layout() in; + // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration + // later can use macro overrides for work group size. + bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id; + + if (options.vulkan_semantics && is_workgroup_size_constant) + { + // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout(). + return; + } + else if (!options.vulkan_semantics && is_workgroup_size_constant && + !has_decoration(constant.self, DecorationSpecId)) + { + // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros. + return; + } + + // Only scalars have constant IDs. + if (has_decoration(constant.self, DecorationSpecId)) + { + if (options.vulkan_semantics) + { + statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ", + variable_decl(type, name), " = ", constant_expression(constant), ";"); + } + else + { + const string ¯o_name = constant.specialization_constant_macro_name; + statement("#ifndef ", macro_name); + statement("#define ", macro_name, " ", constant_expression(constant)); + statement("#endif"); + + // For workgroup size constants, only emit the macros. + if (!is_workgroup_size_constant) + statement("const ", variable_decl(type, name), " = ", macro_name, ";"); + } + } + else + { + statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); + } +} + +void CompilerGLSL::emit_entry_point_declarations() +{ +} + +void CompilerGLSL::replace_illegal_names() +{ + // clang-format off + static const unordered_set<string> keywords = { + "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh", + "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement", + "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor", + "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse", + "ceil", "cos", "cosh", "cross", "degrees", + "dFdx", "dFdxCoarse", "dFdxFine", + "dFdy", "dFdyCoarse", "dFdyFine", + "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2", + "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", + "frexp", "fwidth", "fwidthCoarse", "fwidthFine", + "greaterThan", "greaterThanEqual", "groupMemoryBarrier", + "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor", + "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample", + "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2", + "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared", + "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual", + "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8", + "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow", + "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step", + "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets", + "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad", + "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize", + "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16", + "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow", + + "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer", + "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", + "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", + "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", + "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", + "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", + "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", + "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", + "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", + "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp", + "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump", + "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly", + "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", + "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", + "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", + "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static", + "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", + "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", + "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", + "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", + "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", + "while", "writeonly", + }; + // clang-format on + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { + if (!is_hidden_variable(var)) + { + auto &m = ir.meta[var.self].decoration; + if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + } + }); +} + +void CompilerGLSL::replace_fragment_output(SPIRVariable &var) +{ + auto &m = ir.meta[var.self].decoration; + uint32_t location = 0; + if (m.decoration_flags.get(DecorationLocation)) + location = m.location; + + // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will + // do the access chain part of this for us. + auto &type = get<SPIRType>(var.basetype); + + if (type.array.empty()) + { + // Redirect the write to a specific render target in legacy GLSL. + m.alias = join("gl_FragData[", location, "]"); + + if (is_legacy_es() && location != 0) + require_extension_internal("GL_EXT_draw_buffers"); + } + else if (type.array.size() == 1) + { + // If location is non-zero, we probably have to add an offset. + // This gets really tricky since we'd have to inject an offset in the access chain. + // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. + m.alias = "gl_FragData"; + if (location != 0) + SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " + "This is unimplemented in SPIRV-Cross."); + + if (is_legacy_es()) + require_extension_internal("GL_EXT_draw_buffers"); + } + else + SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); + + var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. +} + +void CompilerGLSL::replace_fragment_outputs() +{ + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) + replace_fragment_output(var); + }); +} + +string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) +{ + if (out_type.vecsize == input_components) + return expr; + else if (input_components == 1 && !backend.can_swizzle_scalar) + return join(type_to_glsl(out_type), "(", expr, ")"); + else + { + // FIXME: This will not work with packed expressions. + auto e = enclose_expression(expr) + "."; + // Just clamp the swizzle index if we have more outputs than inputs. + for (uint32_t c = 0; c < out_type.vecsize; c++) + e += index_to_swizzle(min(c, input_components - 1)); + if (backend.swizzle_is_function && out_type.vecsize > 1) + e += "()"; + + remove_duplicate_swizzle(e); + return e; + } +} + +void CompilerGLSL::emit_pls() +{ + auto &execution = get_entry_point(); + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); + + if (!options.es) + SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); + + if (options.version < 300) + SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); + + if (!pls_inputs.empty()) + { + statement("__pixel_local_inEXT _PLSIn"); + begin_scope(); + for (auto &input : pls_inputs) + statement(pls_decl(input), ";"); + end_scope_decl(); + statement(""); + } + + if (!pls_outputs.empty()) + { + statement("__pixel_local_outEXT _PLSOut"); + begin_scope(); + for (auto &output : pls_outputs) + statement(pls_decl(output), ";"); + end_scope_decl(); + statement(""); + } +} + +void CompilerGLSL::fixup_image_load_store_access() +{ + ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) { + auto &vartype = expression_type(var); + if (vartype.basetype == SPIRType::Image) + { + // Older glslangValidator does not emit required qualifiers here. + // Solve this by making the image access as restricted as possible and loosen up if we need to. + // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. + + auto &flags = ir.meta[var].decoration.decoration_flags; + if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable)) + { + flags.set(DecorationNonWritable); + flags.set(DecorationNonReadable); + } + } + }); +} + +void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) +{ + Bitset emitted_builtins; + Bitset global_builtins; + const SPIRVariable *block_var = nullptr; + bool emitted_block = false; + bool builtin_array = false; + + // Need to use declared size in the type. + // These variables might have been declared, but not statically used, so we haven't deduced their size yet. + uint32_t cull_distance_size = 0; + uint32_t clip_distance_size = 0; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + Bitset builtins; + + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t index = 0; + for (auto &m : ir.meta[type.self].members) + { + if (m.builtin) + { + builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = this->get<SPIRType>(type.member_types[index]).array.front(); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = this->get<SPIRType>(type.member_types[index]).array.front(); + } + index++; + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + // While we're at it, collect all declared global builtins (HLSL mostly ...). + auto &m = ir.meta[var.self].decoration; + if (m.builtin) + { + global_builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = type.array.front(); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = type.array.front(); + } + } + + if (builtins.empty()) + return; + + if (emitted_block) + SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); + + emitted_builtins = builtins; + emitted_block = true; + builtin_array = !type.array.empty(); + block_var = &var; + }); + + global_builtins = + Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | + (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance))); + + // Try to collect all other declared builtins. + if (!emitted_block) + emitted_builtins = global_builtins; + + // Can't declare an empty interface block. + if (emitted_builtins.empty()) + return; + + if (storage == StorageClassOutput) + statement("out gl_PerVertex"); + else + statement("in gl_PerVertex"); + + begin_scope(); + if (emitted_builtins.get(BuiltInPosition)) + statement("vec4 gl_Position;"); + if (emitted_builtins.get(BuiltInPointSize)) + statement("float gl_PointSize;"); + if (emitted_builtins.get(BuiltInClipDistance)) + statement("float gl_ClipDistance[", clip_distance_size, "];"); + if (emitted_builtins.get(BuiltInCullDistance)) + statement("float gl_CullDistance[", cull_distance_size, "];"); + + bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl; + if (builtin_array) + { + // Make sure the array has a supported name in the code. + if (storage == StorageClassOutput) + set_name(block_var->self, "gl_out"); + else if (storage == StorageClassInput) + set_name(block_var->self, "gl_in"); + + if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) + end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]")); + else + end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]")); + } + else + end_scope_decl(); + statement(""); +} + +void CompilerGLSL::declare_undefined_values() +{ + bool emitted = false; + ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) { + statement(variable_decl(this->get<SPIRType>(undef.basetype), to_name(undef.self), undef.self), ";"); + emitted = true; + }); + + if (emitted) + statement(""); +} + +bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const +{ + bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable; + + if (statically_assigned) + { + auto *constant = maybe_get<SPIRConstant>(var.static_expression); + if (constant && constant->is_used_as_lut) + return true; + } + + return false; +} + +void CompilerGLSL::emit_resources() +{ + auto &execution = get_entry_point(); + + replace_illegal_names(); + + // Legacy GL uses gl_FragData[], redeclare all fragment outputs + // with builtins. + if (execution.model == ExecutionModelFragment && is_legacy()) + replace_fragment_outputs(); + + // Emit PLS blocks if we have such variables. + if (!pls_inputs.empty() || !pls_outputs.empty()) + emit_pls(); + + // Emit custom gl_PerVertex for SSO compatibility. + if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) + { + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + emit_declared_builtin_block(StorageClassInput, execution.model); + emit_declared_builtin_block(StorageClassOutput, execution.model); + break; + + case ExecutionModelVertex: + emit_declared_builtin_block(StorageClassOutput, execution.model); + break; + + default: + break; + } + } + else + { + // Need to redeclare clip/cull distance with explicit size to use them. + // SPIR-V mandates these builtins have a size declared. + const char *storage = execution.model == ExecutionModelFragment ? "in" : "out"; + if (clip_distance_count != 0) + statement(storage, " float gl_ClipDistance[", clip_distance_count, "];"); + if (cull_distance_count != 0) + statement(storage, " float gl_CullDistance[", cull_distance_count, "];"); + if (clip_distance_count != 0 || cull_distance_count != 0) + statement(""); + } + + if (position_invariant) + { + statement("invariant gl_Position;"); + statement(""); + } + + bool emitted = false; + + // If emitted Vulkan GLSL, + // emit specialization constants as actual floats, + // spec op expressions will redirect to the constant name. + // + for (auto &id_ : ir.ids_for_constant_or_type) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get<SPIRConstant>(); + + bool needs_declaration = c.specialization || c.is_used_as_lut; + + if (needs_declaration) + { + if (!options.vulkan_semantics && c.specialization) + { + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + } + emit_constant(c); + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get<SPIRConstantOp>()); + emitted = true; + } + else if (id.get_type() == TypeType) + { + auto &type = id.get<SPIRType>(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && + !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + if (emitted) + statement(""); + emitted = false; + + emit_struct(type); + } + } + } + + if (emitted) + statement(""); + + // If we needed to declare work group size late, check here. + // If the work group size depends on a specialization constant, we need to declare the layout() block + // after constants (and their macros) have been declared. + if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && + execution.workgroup_size.constant != 0) + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0)) + { + vector<string> inputs; + build_workgroup_size(inputs, wg_x, wg_y, wg_z); + statement("layout(", merge(inputs), ") in;"); + statement(""); + } + } + + emitted = false; + + // Output UBOs and SSBOs + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; + bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && + has_block_flags) + { + emit_buffer_block(var); + } + }); + + // Output push constant blocks + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + } + }); + + bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; + + // Output Uniform Constants (values, samplers, images, etc). + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + // Sampler buffers are always used without a sampler, and they will also work in regular GL. + bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (!sampler_buffer && (separate_image || separate_sampler)) + return; + } + + if (var.storage != StorageClassFunction && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || + type.storage == StorageClassRayPayloadNV || type.storage == StorageClassHitAttributeNV || + type.storage == StorageClassIncomingRayPayloadNV) && + !is_hidden_variable(var)) + { + emit_uniform(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + // Output in/out interfaces. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + if (var.storage != StorageClassFunction && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var)) + { + emit_interface_block(var); + emitted = true; + } + else if (is_builtin_variable(var)) + { + // For gl_InstanceIndex emulation on GLES, the API user needs to + // supply this uniform. + if (options.vertex.support_nonzero_base_instance && + ir.meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics) + { + statement("uniform int SPIRV_Cross_BaseInstance;"); + emitted = true; + } + } + }); + + // Global variables. + for (auto global : global_variables) + { + auto &var = get<SPIRVariable>(global); + if (var.storage != StorageClassOutput) + { + if (!variable_is_lut(var)) + { + add_resource_name(var.self); + statement(variable_decl(var), ";"); + emitted = true; + } + } + } + + if (emitted) + statement(""); + + declare_undefined_values(); +} + +// Returns a string representation of the ID, usable as a function arg. +// Default is to simply return the expression representation fo the arg ID. +// Subclasses may override to modify the return value. +string CompilerGLSL::to_func_call_arg(uint32_t id) +{ + // Make sure that we use the name of the original variable, and not the parameter alias. + uint32_t name_id = id; + auto *var = maybe_get<SPIRVariable>(id); + if (var && var->basevariable) + name_id = var->basevariable; + return to_expression(name_id); +} + +void CompilerGLSL::handle_invalid_expression(uint32_t id) +{ + // We tried to read an invalidated expression. + // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated. + forced_temporaries.insert(id); + force_recompile = true; +} + +// Converts the format of the current expression from packed to unpacked, +// by wrapping the expression in a constructor of the appropriate type. +// GLSL does not support packed formats, so simply return the expression. +// Subclasses that do will override +string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t) +{ + return expr_str; +} + +// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. +void CompilerGLSL::strip_enclosed_expression(string &expr) +{ + if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') + return; + + // Have to make sure that our first and last parens actually enclose everything inside it. + uint32_t paren_count = 0; + for (auto &c : expr) + { + if (c == '(') + paren_count++; + else if (c == ')') + { + paren_count--; + + // If we hit 0 and this is not the final char, our first and final parens actually don't + // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). + if (paren_count == 0 && &c != &expr.back()) + return; + } + } + expr.erase(expr.size() - 1, 1); + expr.erase(begin(expr)); +} + +string CompilerGLSL::enclose_expression(const string &expr) +{ + bool need_parens = false; + + // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back + // unary expressions. + if (!expr.empty()) + { + auto c = expr.front(); + if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') + need_parens = true; + } + + if (!need_parens) + { + uint32_t paren_count = 0; + for (auto c : expr) + { + if (c == '(' || c == '[') + paren_count++; + else if (c == ')' || c == ']') + { + assert(paren_count); + paren_count--; + } + else if (c == ' ' && paren_count == 0) + { + need_parens = true; + break; + } + } + assert(paren_count == 0); + } + + // If this expression contains any spaces which are not enclosed by parentheses, + // we need to enclose it so we can treat the whole string as an expression. + // This happens when two expressions have been part of a binary op earlier. + if (need_parens) + return join('(', expr, ')'); + else + return expr; +} + +string CompilerGLSL::dereference_expression(const std::string &expr) +{ + // If this expression starts with an address-of operator ('&'), then + // just return the part after the operator. + // TODO: Strip parens if unnecessary? + if (expr.front() == '&') + return expr.substr(1); + else + return join('*', expr); +} + +string CompilerGLSL::address_of_expression(const std::string &expr) +{ + // If this expression starts with a dereference operator ('*'), then + // just return the part after the operator. + // TODO: Strip parens if unnecessary? + if (expr.front() == '*') + return expr.substr(1); + else + return join('&', expr); +} + +// Just like to_expression except that we enclose the expression inside parentheses if needed. +string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) +{ + return enclose_expression(to_expression(id, register_expression_read)); +} + +string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) +{ + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get<SPIRExpression>(id); + bool need_transpose = e && e->need_transpose; + if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked)) + return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), + get_extended_decoration(id, SPIRVCrossDecorationPackedType)); + else + return to_expression(id, register_expression_read); +} + +string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) +{ + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get<SPIRExpression>(id); + bool need_transpose = e && e->need_transpose; + if (!need_transpose && has_extended_decoration(id, SPIRVCrossDecorationPacked)) + return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), + get_extended_decoration(id, SPIRVCrossDecorationPackedType)); + else + return to_enclosed_expression(id, register_expression_read); +} + +string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && should_dereference(id)) + return dereference_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_expression(id, register_expression_read); +} + +string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_enclosed_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) +{ + auto expr = to_enclosed_expression(id); + if (has_extended_decoration(id, SPIRVCrossDecorationPacked)) + return join(expr, "[", index, "]"); + else + return join(expr, ".", index_to_swizzle(index)); +} + +string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) +{ + auto itr = invalid_expressions.find(id); + if (itr != end(invalid_expressions)) + handle_invalid_expression(id); + + if (ir.ids[id].get_type() == TypeExpression) + { + // We might have a more complex chain of dependencies. + // A possible scenario is that we + // + // %1 = OpLoad + // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. + // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. + // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. + // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. + // + // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, + // and see that we should not forward reads of the original variable. + auto &expr = get<SPIRExpression>(id); + for (uint32_t dep : expr.expression_dependencies) + if (invalid_expressions.find(dep) != end(invalid_expressions)) + handle_invalid_expression(dep); + } + + if (register_expression_read) + track_expression_read(id); + + switch (ir.ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get<SPIRExpression>(id); + if (e.base_expression) + return to_enclosed_expression(e.base_expression) + e.expression; + else if (e.need_transpose) + { + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPacked); + return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed); + } + else + { + if (force_recompile) + { + // During first compilation phase, certain expression patterns can trigger exponential growth of memory. + // Avoid this by returning dummy expressions during this phase. + // Do not use empty expressions here, because those are sentinels for other cases. + return "_"; + } + else + return e.expression; + } + } + + case TypeConstant: + { + auto &c = get<SPIRConstant>(id); + auto &type = get<SPIRType>(c.constant_type); + + // WorkGroupSize may be a constant. + auto &dec = ir.meta[c.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type, StorageClassGeneric); + else if (c.specialization) + return to_name(id); + else if (c.is_used_as_lut) + return to_name(id); + else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + return to_name(id); + else if (!type.array.empty() && !backend.can_declare_arrays_inline) + return to_name(id); + else + return constant_expression(c); + } + + case TypeConstantOp: + return to_name(id); + + case TypeVariable: + { + auto &var = get<SPIRVariable>(id); + // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, + // the variable has not been declared yet. + if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) + return to_expression(var.static_expression); + else if (var.deferred_declaration) + { + var.deferred_declaration = false; + return variable_decl(var); + } + else if (flattened_structs.count(id)) + { + return load_flattened_struct(var); + } + else + { + auto &dec = ir.meta[var.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type, var.storage); + else + return to_name(id); + } + } + + case TypeCombinedImageSampler: + // This type should never be taken the expression of directly. + // The intention is that texture sampling functions will extract the image and samplers + // separately and take their expressions as needed. + // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler + // expression ala sampler2D(texture, sampler). + SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); + + case TypeAccessChain: + // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. + SPIRV_CROSS_THROW("Access chains have no default expression representation."); + + default: + return to_name(id); + } +} + +string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) +{ + auto &type = get<SPIRType>(cop.basetype); + bool binary = false; + bool unary = false; + string op; + + if (is_legacy() && is_unsigned_opcode(cop.opcode)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); + + // TODO: Find a clean way to reuse emit_instruction. + switch (cop.opcode) + { + case OpSConvert: + case OpUConvert: + case OpFConvert: + op = type_to_glsl_constructor(type); + break; + +#define GLSL_BOP(opname, x) \ + case Op##opname: \ + binary = true; \ + op = x; \ + break + +#define GLSL_UOP(opname, x) \ + case Op##opname: \ + unary = true; \ + op = x; \ + break + + GLSL_UOP(SNegate, "-"); + GLSL_UOP(Not, "~"); + GLSL_BOP(IAdd, "+"); + GLSL_BOP(ISub, "-"); + GLSL_BOP(IMul, "*"); + GLSL_BOP(SDiv, "/"); + GLSL_BOP(UDiv, "/"); + GLSL_BOP(UMod, "%"); + GLSL_BOP(SMod, "%"); + GLSL_BOP(ShiftRightLogical, ">>"); + GLSL_BOP(ShiftRightArithmetic, ">>"); + GLSL_BOP(ShiftLeftLogical, "<<"); + GLSL_BOP(BitwiseOr, "|"); + GLSL_BOP(BitwiseXor, "^"); + GLSL_BOP(BitwiseAnd, "&"); + GLSL_BOP(LogicalOr, "||"); + GLSL_BOP(LogicalAnd, "&&"); + GLSL_UOP(LogicalNot, "!"); + GLSL_BOP(LogicalEqual, "=="); + GLSL_BOP(LogicalNotEqual, "!="); + GLSL_BOP(IEqual, "=="); + GLSL_BOP(INotEqual, "!="); + GLSL_BOP(ULessThan, "<"); + GLSL_BOP(SLessThan, "<"); + GLSL_BOP(ULessThanEqual, "<="); + GLSL_BOP(SLessThanEqual, "<="); + GLSL_BOP(UGreaterThan, ">"); + GLSL_BOP(SGreaterThan, ">"); + GLSL_BOP(UGreaterThanEqual, ">="); + GLSL_BOP(SGreaterThanEqual, ">="); + + case OpSelect: + { + if (cop.arguments.size() < 3) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // This one is pretty annoying. It's triggered from + // uint(bool), int(bool) from spec constants. + // In order to preserve its compile-time constness in Vulkan GLSL, + // we need to reduce the OpSelect expression back to this simplified model. + // If we cannot, fail. + if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) + { + // Implement as a simple cast down below. + } + else + { + // Implement a ternary and pray the compiler understands it :) + return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]); + } + break; + } + + case OpVectorShuffle: + { + string expr = type_to_glsl_constructor(type); + expr += "("; + + uint32_t left_components = expression_type(cop.arguments[0]).vecsize; + string left_arg = to_enclosed_expression(cop.arguments[0]); + string right_arg = to_enclosed_expression(cop.arguments[1]); + + for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) + { + uint32_t index = cop.arguments[i]; + if (index >= left_components) + expr += right_arg + "." + "xyzw"[index - left_components]; + else + expr += left_arg + "." + "xyzw"[index]; + + if (i + 1 < uint32_t(cop.arguments.size())) + expr += ", "; + } + + expr += ")"; + return expr; + } + + case OpCompositeExtract: + { + auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + return expr; + } + + case OpCompositeInsert: + SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported."); + + default: + // Some opcodes are unimplemented here, these are currently not possible to test from glslang. + SPIRV_CROSS_THROW("Unimplemented spec constant op."); + } + + uint32_t bit_width = 0; + if (unary || binary) + bit_width = expression_type(cop.arguments[0]).width; + + SPIRType::BaseType input_type; + bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); + + switch (cop.opcode) + { + case OpIEqual: + case OpINotEqual: + input_type = to_signed_basetype(bit_width); + break; + + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + case OpSMod: + case OpSDiv: + case OpShiftRightArithmetic: + input_type = to_signed_basetype(bit_width); + break; + + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: + case OpUMod: + case OpUDiv: + case OpShiftRightLogical: + input_type = to_unsigned_basetype(bit_width); + break; + + default: + input_type = type.basetype; + break; + } + +#undef GLSL_BOP +#undef GLSL_UOP + if (binary) + { + if (cop.arguments.size() < 2) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + string cast_op0; + string cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], + cop.arguments[1], skip_cast_if_equal_type); + + if (type.basetype != input_type && type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + auto expr = bitcast_glsl_op(type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + return expr; + } + else + return join("(", cast_op0, " ", op, " ", cast_op1, ")"); + } + else if (unary) + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // Auto-bitcast to result type as needed. + // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. + return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); + } + else + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + return join(op, "(", to_expression(cop.arguments[0]), ")"); + } +} + +string CompilerGLSL::constant_expression(const SPIRConstant &c) +{ + auto &type = get<SPIRType>(c.constant_type); + + if (type.pointer) + { + return backend.null_pointer_literal; + } + else if (!c.subconstants.empty()) + { + // Handles Arrays and structures. + string res; + if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && + type.array.empty()) + { + res = type_to_glsl_constructor(type) + "{ "; + } + else if (backend.use_initializer_list) + { + res = "{ "; + } + else + { + res = type_to_glsl_constructor(type) + "("; + } + + for (auto &elem : c.subconstants) + { + auto &subc = get<SPIRConstant>(elem); + if (subc.specialization) + res += to_name(elem); + else + res += constant_expression(subc); + + if (&elem != &c.subconstants.back()) + res += ", "; + } + + res += backend.use_initializer_list ? " }" : ")"; + return res; + } + else if (c.columns() == 1) + { + return constant_expression_vector(c, 0); + } + else + { + string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "("; + for (uint32_t col = 0; col < c.columns(); col++) + { + if (c.specialization_constant_id(col) != 0) + res += to_name(c.specialization_constant_id(col)); + else + res += constant_expression_vector(c, col); + + if (col + 1 < c.columns()) + res += ", "; + } + res += ")"; + return res; + } +} + +#ifdef _MSC_VER +// sprintf warning. +// We cannot rely on snprintf existing because, ..., MSVC. +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_f16(col, row); + + // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots + // of complicated workarounds, just value-cast to the half type always. + if (std::isnan(float_value) || std::isinf(float_value)) + { + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + + if (float_value == numeric_limits<float>::infinity()) + res = join(type_to_glsl(type), "(1.0 / 0.0)"); + else if (float_value == -numeric_limits<float>::infinity()) + res = join(type_to_glsl(type), "(-1.0 / 0.0)"); + else if (std::isnan(float_value)) + res = join(type_to_glsl(type), "(0.0 / 0.0)"); + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + else + { + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")"); + } + + return res; +} + +string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_f32(col, row); + + if (std::isnan(float_value) || std::isinf(float_value)) + { + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type; + SPIRType in_type; + out_type.basetype = SPIRType::Float; + in_type.basetype = SPIRType::UInt; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 32; + in_type.width = 32; + + char print_buffer[32]; + sprintf(print_buffer, "0x%xu", c.scalar(col, row)); + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); + } + else + { + if (float_value == numeric_limits<float>::infinity()) + { + if (backend.float_literal_suffix) + res = "(1.0f / 0.0f)"; + else + res = "(1.0 / 0.0)"; + } + else if (float_value == -numeric_limits<float>::infinity()) + { + if (backend.float_literal_suffix) + res = "(-1.0f / 0.0f)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(float_value)) + { + if (backend.float_literal_suffix) + res = "(0.0f / 0.0f)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + } + else + { + res = convert_to_string(float_value, current_locale_radix_character); + if (backend.float_literal_suffix) + res += "f"; + } + + return res; +} + +std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + double double_value = c.scalar_f64(col, row); + + if (std::isnan(double_value) || std::isinf(double_value)) + { + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type; + SPIRType in_type; + out_type.basetype = SPIRType::Double; + in_type.basetype = SPIRType::UInt64; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 64; + in_type.width = 64; + + uint64_t u64_value = c.scalar_u64(col, row); + + if (options.es) + SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile."); + require_extension_internal("GL_ARB_gpu_shader_int64"); + + char print_buffer[64]; + sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value), + backend.long_long_literal_suffix ? "ull" : "ul"); + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); + } + else + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); + + if (double_value == numeric_limits<double>::infinity()) + { + if (backend.double_literal_suffix) + res = "(1.0lf / 0.0lf)"; + else + res = "(1.0 / 0.0)"; + } + else if (double_value == -numeric_limits<double>::infinity()) + { + if (backend.double_literal_suffix) + res = "(-1.0lf / 0.0lf)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(double_value)) + { + if (backend.double_literal_suffix) + res = "(0.0lf / 0.0lf)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + } + else + { + res = convert_to_string(double_value, current_locale_radix_character); + if (backend.double_literal_suffix) + res += "lf"; + } + + return res; +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) +{ + auto type = get<SPIRType>(c.constant_type); + type.columns = 1; + + auto scalar_type = type; + scalar_type.vecsize = 1; + + string res; + bool splat = backend.use_constructor_splatting && c.vector_size() > 1; + bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; + + if (!type_is_floating_point(type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } + + if (splat || swizzle_splat) + { + // Cannot use constant splatting if we have specialization constants somewhere in the vector. + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.specialization_constant_id(vector, i) != 0) + { + splat = false; + swizzle_splat = false; + break; + } + } + } + + if (splat || swizzle_splat) + { + if (type.width == 64) + { + uint64_t ident = c.scalar_u64(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar_u64(vector, i)) + { + splat = false; + swizzle_splat = false; + break; + } + } + } + else + { + uint32_t ident = c.scalar(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar(vector, i)) + { + splat = false; + swizzle_splat = false; + } + } + } + } + + if (c.vector_size() > 1 && !swizzle_splat) + res += type_to_glsl(type) + "("; + + switch (type.basetype) + { + case SPIRType::Half: + if (splat || swizzle_splat) + { + res += convert_half_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_half_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Float: + if (splat || swizzle_splat) + { + res += convert_float_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_float_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Double: + if (splat || swizzle_splat) + { + res += convert_double_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_double_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int64: + if (splat) + { + res += convert_to_string(c.scalar_i64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ll"; + else + res += "l"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar_i64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ll"; + else + res += "l"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt64: + if (splat) + { + res += convert_to_string(c.scalar_u64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar_u64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, 0) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar(vector, i)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, i) < 0) + SPIRV_CROSS_THROW( + "Tried to convert uint literal into int, but this made the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int: + if (splat) + res += convert_to_string(c.scalar_i32(vector, 0)); + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += convert_to_string(c.scalar_i32(vector, i)); + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UShort: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i16(vector, 0) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); + } + else + res += backend.uint16_t_literal_suffix; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar(vector, i)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i16(vector, i) < 0) + SPIRV_CROSS_THROW( + "Tried to convert uint literal into int, but this made the literal negative."); + } + else + res += backend.uint16_t_literal_suffix; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Short: + if (splat) + { + res += convert_to_string(c.scalar_i16(vector, 0)); + res += backend.int16_t_literal_suffix; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar_i16(vector, i)); + res += backend.int16_t_literal_suffix; + } + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UByte: + if (splat) + { + res += convert_to_string(c.scalar_u8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u8(vector, i)); + res += ")"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::SByte: + if (splat) + { + res += convert_to_string(c.scalar_i8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i8(vector, i)); + res += ")"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Boolean: + if (splat) + res += c.scalar(vector, 0) ? "true" : "false"; + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_name(c.specialization_constant_id(vector, i)); + else + res += c.scalar(vector, i) ? "true" : "false"; + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + default: + SPIRV_CROSS_THROW("Invalid constant expression basetype."); + } + + if (c.vector_size() > 1 && !swizzle_splat) + res += ")"; + + return res; +} + +string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) +{ + auto &type = get<SPIRType>(result_type); + auto &flags = ir.meta[result_id].decoration.decoration_flags; + + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (current_continue_block && !hoisted_temporaries.count(result_id)) + { + auto &header = get<SPIRBlock>(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile = true; + } + + return join(to_name(result_id), " = "); + } + else if (hoisted_temporaries.count(result_id)) + { + // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. + return join(to_name(result_id), " = "); + } + else + { + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); + return join(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); + } +} + +bool CompilerGLSL::expression_is_forwarded(uint32_t id) +{ + return forwarded_temporaries.find(id) != end(forwarded_temporaries); +} + +SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, + bool suppress_usage_tracking) +{ + if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) + { + // Just forward it without temporary. + // If the forward is trivial, we do not force flushing to temporary for this expression. + if (!suppress_usage_tracking) + forwarded_temporaries.insert(result_id); + + return set<SPIRExpression>(result_id, rhs, result_type, true); + } + else + { + // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). + statement(declare_temporary(result_type, result_id), rhs, ";"); + return set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + } +} + +void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, + join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) +{ + auto &type = get<SPIRType>(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + expr += op; + expr += to_extract_component_expression(operand, i); + + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(operand)); + + inherit_expression_dependencies(result_id, operand); +} + +void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + auto &type = get<SPIRType>(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + expr += to_extract_component_expression(op0, i); + expr += ' '; + expr += op; + expr += ' '; + expr += to_extract_component_expression(op1, i); + + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. + // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected + // since equality test is exactly the same. + bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); + + // Create a fake type so we can bitcast to it. + // We only deal with regular arithmetic types here like int, uints and so on. + SPIRType expected_type; + expected_type.basetype = input_type; + expected_type.vecsize = type0.vecsize; + expected_type.columns = type0.columns; + expected_type.width = type0.width; + + if (cast) + { + cast_op0 = bitcast_glsl(expected_type, op0); + cast_op1 = bitcast_glsl(expected_type, op1); + } + else + { + // If we don't cast, our actual input type is that of the first (or second) argument. + cast_op0 = to_enclosed_unpacked_expression(op0); + cast_op1 = to_enclosed_unpacked_expression(op1); + input_type = type0.basetype; + } + + return expected_type; +} + +void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get<SPIRType>(result_type); + + // We might have casted away from the result type, so bitcast again. + // For example, arithmetic right shift with uint inputs. + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + } + else + expr += join(cast_op0, " ", op, " ", cast_op1); + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"), + forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, + SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) +{ + auto &out_type = get<SPIRType>(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op = expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_expression(op0); + + string expr; + if (out_type.basetype != expected_result_type) + { + expected_type.basetype = expected_result_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, + uint32_t op0, uint32_t op1, uint32_t op2, + const char *op, + SPIRType::BaseType input_type) +{ + auto &out_type = get<SPIRType>(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op0 = expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_expression(op0); + string cast_op1 = expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_expression(op1); + string cast_op2 = expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_expression(op2); + + string expr; + if (out_type.basetype != input_type) + { + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get<SPIRType>(result_type); + + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} + +// EXT_shader_texture_lod only concerns fragment shaders so lod tex functions +// are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex +// functions in vertex shaders so we revert those back to plain calls when +// the lod is a constant value of zero. +bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod) +{ + auto &execution = get_entry_point(); + bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment; + if (!allowed && lod != 0) + { + auto *lod_constant = maybe_get<SPIRConstant>(lod); + if (!lod_constant || lod_constant->scalar_f32() != 0.0f) + { + SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders."); + } + } + return allowed; +} + +string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t tex) +{ + const char *type; + switch (imgtype.image.dim) + { + case spv::Dim1D: + type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; + break; + case spv::Dim2D: + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; + break; + case spv::Dim3D: + type = "3D"; + break; + case spv::DimCube: + type = "Cube"; + break; + case spv::DimRect: + type = "2DRect"; + break; + case spv::DimBuffer: + type = "Buffer"; + break; + case spv::DimSubpassData: + type = "2D"; + break; + default: + type = ""; + break; + } + + bool use_explicit_lod = check_explicit_lod_allowed(lod); + + if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad" || op == "textureProjGrad") + { + if (is_legacy_es()) + { + if (use_explicit_lod) + require_extension_internal("GL_EXT_shader_texture_lod"); + } + else if (is_legacy()) + require_extension_internal("GL_ARB_shader_texture_lod"); + } + + if (op == "textureLodOffset" || op == "textureProjLodOffset") + { + if (is_legacy_es()) + SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); + + require_extension_internal("GL_EXT_gpu_shader4"); + } + + // GLES has very limited support for shadow samplers. + // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, + // everything else can just throw + if (image_is_comparison(imgtype, tex) && is_legacy_es()) + { + if (op == "texture" || op == "textureProj") + require_extension_internal("GL_EXT_shadow_samplers"); + else + SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); + } + + bool is_es_and_depth = is_legacy_es() && image_is_comparison(imgtype, tex); + std::string type_prefix = image_is_comparison(imgtype, tex) ? "shadow" : "texture"; + + if (op == "texture") + return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); + else if (op == "textureLod") + { + if (use_explicit_lod) + return join(type_prefix, type, is_legacy_es() ? "LodEXT" : "Lod"); + else + return join(type_prefix, type); + } + else if (op == "textureProj") + return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj"); + else if (op == "textureGrad") + return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad"); + else if (op == "textureProjLod") + { + if (use_explicit_lod) + return join(type_prefix, type, is_legacy_es() ? "ProjLodEXT" : "ProjLod"); + else + return join(type_prefix, type, "Proj"); + } + else if (op == "textureLodOffset") + { + if (use_explicit_lod) + return join(type_prefix, type, "LodOffset"); + else + return join(type_prefix, type); + } + else if (op == "textureProjGrad") + return join(type_prefix, type, + is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); + else if (op == "textureProjLodOffset") + { + if (use_explicit_lod) + return join(type_prefix, type, "ProjLodOffset"); + else + return join(type_prefix, type, "ProjOffset"); + } + else + { + SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); + } +} + +bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto *cleft = maybe_get<SPIRConstant>(left); + auto *cright = maybe_get<SPIRConstant>(right); + auto &lerptype = expression_type(lerp); + + // If our targets aren't constants, we cannot use construction. + if (!cleft || !cright) + return false; + + // If our targets are spec constants, we cannot use construction. + if (cleft->specialization || cright->specialization) + return false; + + // We can only use trivial construction if we have a scalar + // (should be possible to do it for vectors as well, but that is overkill for now). + if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1) + return false; + + // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. + bool ret = false; + switch (type.basetype) + { + case SPIRType::Short: + case SPIRType::UShort: + ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1; + break; + + case SPIRType::Int: + case SPIRType::UInt: + ret = cleft->scalar() == 0 && cright->scalar() == 1; + break; + + case SPIRType::Half: + ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f; + break; + + case SPIRType::Float: + ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f; + break; + + case SPIRType::Double: + ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0; + break; + + case SPIRType::Int64: + case SPIRType::UInt64: + ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1; + break; + + default: + break; + } + + if (ret) + op = type_to_glsl_constructor(type); + return ret; +} + +string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, + uint32_t false_value) +{ + string expr; + auto &lerptype = expression_type(select); + + if (lerptype.vecsize == 1) + expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ", + to_enclosed_pointer_expression(false_value)); + else + { + auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); }; + + expr = type_to_glsl_constructor(restype); + expr += "("; + for (uint32_t i = 0; i < restype.vecsize; i++) + { + expr += swiz(select, i); + expr += " ? "; + expr += swiz(true_value, i); + expr += " : "; + expr += swiz(false_value, i); + if (i + 1 < restype.vecsize) + expr += ", "; + } + expr += ")"; + } + + return expr; +} + +void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto &lerptype = expression_type(lerp); + auto &restype = get<SPIRType>(result_type); + + // If this results in a variable pointer, assume it may be written through. + if (restype.pointer) + { + register_write(left); + register_write(right); + } + + string mix_op; + bool has_boolean_mix = backend.boolean_mix_support && + ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); + bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); + + // Cannot use boolean mix when the lerp argument is just one boolean, + // fall back to regular trinary statements. + if (lerptype.vecsize == 1) + has_boolean_mix = false; + + // If we can reduce the mix to a simple cast, do so. + // This helps for cases like int(bool), uint(bool) which is implemented with + // OpSelect bool 1 0. + if (trivial_mix) + { + emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); + } + else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) + { + // Boolean mix not supported on desktop without extension. + // Was added in OpenGL 4.5 with ES 3.1 compat. + // + // Could use GL_EXT_shader_integer_mix on desktop at least, + // but Apple doesn't support it. :( + // Just implement it as ternary expressions. + auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left); + emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); + inherit_expression_dependencies(id, left); + inherit_expression_dependencies(id, right); + inherit_expression_dependencies(id, lerp); + } + else + emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); +} + +string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id) +{ + // Keep track of the array indices we have used to load the image. + // We'll need to use the same array index into the combined image sampler array. + auto image_expr = to_expression(image_id); + string array_expr; + auto array_index = image_expr.find_first_of('['); + if (array_index != string::npos) + array_expr = image_expr.substr(array_index, string::npos); + + auto &args = current_function->arguments; + + // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect + // all possible combinations into new sampler2D uniforms. + auto *image = maybe_get_backing_variable(image_id); + auto *samp = maybe_get_backing_variable(samp_id); + if (image) + image_id = image->self; + if (samp) + samp_id = samp->self; + + auto image_itr = find_if(begin(args), end(args), + [image_id](const SPIRFunction::Parameter ¶m) { return param.id == image_id; }); + + auto sampler_itr = find_if(begin(args), end(args), + [samp_id](const SPIRFunction::Parameter ¶m) { return param.id == samp_id; }); + + if (image_itr != end(args) || sampler_itr != end(args)) + { + // If any parameter originates from a parameter, we will find it in our argument list. + bool global_image = image_itr == end(args); + bool global_sampler = sampler_itr == end(args); + uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args)); + uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args)); + + auto &combined = current_function->combined_parameters; + auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { + return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && + p.sampler_id == sid; + }); + + if (itr != end(combined)) + return to_expression(itr->id) + array_expr; + else + { + SPIRV_CROSS_THROW( + "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used " + "before compile() was called?"); + } + } + else + { + // For global sampler2D, look directly at the global remapping table. + auto &mapping = combined_image_samplers; + auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == samp_id; + }); + + if (itr != end(combined_image_samplers)) + return to_expression(itr->combined_id) + array_expr; + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " + "before compile() was called?"); + } + } +} + +void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + if (options.vulkan_semantics && combined_image_samplers.empty()) + { + emit_binary_func_op(result_type, result_id, image_id, samp_id, + type_to_glsl(get<SPIRType>(result_type), result_id).c_str()); + + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + forwarded_temporaries.erase(result_id); + } + else + { + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + } +} + +static inline bool image_opcode_is_sample_no_dref(Op op) +{ + switch (op) + { + case OpImageSampleExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageFetch: + case OpImageRead: + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseFetch: + case OpImageSparseRead: + return true; + + default: + return false; + } +} + +void CompilerGLSL::emit_texture_op(const Instruction &i) +{ + auto *ops = stream(i); + auto op = static_cast<Op>(i.op); + uint32_t length = i.length; + + vector<uint32_t> inherited_expressions; + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + bool fetch = false; + const uint32_t *opt = nullptr; + + inherited_expressions.push_back(coord); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + proj = true; + break; + + case OpImageDrefGather: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + break; + + case OpImageGather: + comp = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + break; + + case OpImageFetch: + case OpImageRead: // Reads == fetches in Metal (other langs will not get here) + opt = &ops[4]; + length -= 4; + fetch = true; + break; + + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; + + default: + opt = &ops[4]; + length -= 4; + break; + } + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get<SPIRType>(type.self); + + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: + coord_components = 1; + break; + case spv::Dim2D: + coord_components = 2; + break; + case spv::Dim3D: + coord_components = 3; + break; + case spv::DimCube: + coord_components = 3; + break; + case spv::DimBuffer: + coord_components = 1; + break; + default: + coord_components = 2; + break; + } + + if (dref) + inherited_expressions.push_back(dref); + + if (proj) + coord_components++; + if (imgtype.image.arrayed) + coord_components++; + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t flags = 0; + + if (length) + { + flags = *opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + inherited_expressions.push_back(v); + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + + string expr; + bool forward = false; + expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset), + (!!grad_x || !!grad_y), !!dref, lod); + expr += "("; + expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod, + coffset, offset, bias, comp, sample, &forward); + expr += ")"; + + // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. + if (is_legacy() && image_is_comparison(imgtype, img)) + expr += ".r"; + + // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. + // Remap back to 4 components as sampling opcodes expect. + bool image_is_depth; + const auto *combined = maybe_get<SPIRCombinedImageSampler>(img); + if (combined) + image_is_depth = image_is_comparison(imgtype, combined->image); + else + image_is_depth = image_is_comparison(imgtype, img); + + if (image_is_depth && backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) + { + expr = remap_swizzle(get<SPIRType>(result_type), 1, expr); + } + + // Deals with reads from MSL. We might need to downconvert to fewer components. + if (op == OpImageRead) + expr = remap_swizzle(get<SPIRType>(result_type), 4, expr); + + emit_op(result_type, id, expr, forward); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); + break; + + default: + break; + } +} + +bool CompilerGLSL::expression_is_constant_null(uint32_t id) const +{ + auto *c = maybe_get<SPIRConstant>(id); + if (!c) + return false; + return c->constant_is_null(); +} + +// Returns the function name for a texture sampling function for the specified image and sampling characteristics. +// For some subclasses, the function is a method on the specified image. +string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather, + bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool, + uint32_t lod) +{ + string fname; + + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = false; + if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + image_is_comparison(imgtype, tex) && lod) + { + if (!expression_is_constant_null(lod)) + { + SPIRV_CROSS_THROW( + "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL."); + } + workaround_lod_array_shadow_as_grad = true; + } + + if (is_fetch) + fname += "texelFetch"; + else + { + fname += "texture"; + + if (is_gather) + fname += "Gather"; + if (has_array_offsets) + fname += "Offsets"; + if (is_proj) + fname += "Proj"; + if (has_grad || workaround_lod_array_shadow_as_grad) + fname += "Grad"; + if (!!lod && !workaround_lod_array_shadow_as_grad) + fname += "Lod"; + } + + if (has_offset) + fname += "Offset"; + + return is_legacy() ? legacy_tex_op(fname, imgtype, lod, tex) : fname; +} + +std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) +{ + auto *var = maybe_get_backing_variable(id); + + // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. + // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. + if (var) + { + auto &type = get<SPIRType>(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + { + if (options.vulkan_semantics) + { + // Newer glslang supports this extension to deal with texture2D as argument to texture functions. + if (dummy_sampler_id) + SPIRV_CROSS_THROW("Vulkan GLSL should not have a dummy sampler for combining."); + require_extension_internal("GL_EXT_samplerless_texture_functions"); + } + else + { + if (!dummy_sampler_id) + SPIRV_CROSS_THROW( + "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?"); + + return to_combined_image_sampler(id, dummy_sampler_id); + } + } + } + + return to_expression(id); +} + +// Returns the function args for a texture sampling function for the specified image and sampling characteristics. +string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, + uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, + uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward) +{ + string farg_str; + if (is_fetch) + farg_str = convert_separate_image_to_expression(img); + else + farg_str = to_expression(img); + + bool swizz_func = backend.swizzle_is_function; + auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; + + switch (comps) + { + case 1: + return ".x"; + case 2: + return swizz_func ? ".xy()" : ".xy"; + case 3: + return swizz_func ? ".xyz()" : ".xyz"; + default: + return ""; + } + }; + + bool forward = should_forward(coord); + + // The IR can give us more components than we need, so chop them off as needed. + auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize); + // Only enclose the UV expression if needed. + auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr); + + // texelFetch only takes int, not uint. + auto &coord_type = expression_type(coord); + if (coord_type.basetype == SPIRType::UInt) + { + auto expected_type = coord_type; + expected_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); + } + + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = + ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + image_is_comparison(imgtype, img) && lod; + + if (dref) + { + forward = forward && should_forward(dref); + + // SPIR-V splits dref and coordinate. + if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. + { + farg_str += ", "; + farg_str += to_expression(coord); + farg_str += ", "; + farg_str += to_expression(dref); + } + else if (is_proj) + { + // Have to reshuffle so we get vec4(coord, dref, proj), special case. + // Other shading languages splits up the arguments for coord and compare value like SPIR-V. + // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. + farg_str += ", vec4("; + + if (imgtype.image.dim == Dim1D) + { + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(coord) + ".x"; + farg_str += ", "; + farg_str += "0.0, "; + farg_str += to_expression(dref); + farg_str += ", "; + farg_str += to_enclosed_expression(coord) + ".y)"; + } + else if (imgtype.image.dim == Dim2D) + { + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy"); + farg_str += ", "; + farg_str += to_expression(dref); + farg_str += ", "; + farg_str += to_enclosed_expression(coord) + ".z)"; + } + else + SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); + } + else + { + // Create a composite which merges coord/dref into a single vector. + auto type = expression_type(coord); + type.vecsize = coord_components + 1; + farg_str += ", "; + farg_str += type_to_glsl_constructor(type); + farg_str += "("; + farg_str += coord_expr; + farg_str += ", "; + farg_str += to_expression(dref); + farg_str += ")"; + } + } + else + { + farg_str += ", "; + farg_str += coord_expr; + } + + if (grad_x || grad_y) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + farg_str += ", "; + farg_str += to_expression(grad_x); + farg_str += ", "; + farg_str += to_expression(grad_y); + } + + if (lod) + { + if (workaround_lod_array_shadow_as_grad) + { + // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. + // Implementing this as plain texture() is not safe on some implementations. + if (imgtype.image.dim == Dim2D) + farg_str += ", vec2(0.0), vec2(0.0)"; + else if (imgtype.image.dim == DimCube) + farg_str += ", vec3(0.0), vec3(0.0)"; + } + else + { + if (check_explicit_lod_allowed(lod)) + { + forward = forward && should_forward(lod); + farg_str += ", "; + farg_str += to_expression(lod); + } + } + } + else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) + { + // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. + farg_str += ", 0"; + } + + if (coffset) + { + forward = forward && should_forward(coffset); + farg_str += ", "; + farg_str += to_expression(coffset); + } + else if (offset) + { + forward = forward && should_forward(offset); + farg_str += ", "; + farg_str += to_expression(offset); + } + + if (bias) + { + forward = forward && should_forward(bias); + farg_str += ", "; + farg_str += to_expression(bias); + } + + if (comp) + { + forward = forward && should_forward(comp); + farg_str += ", "; + farg_str += to_expression(comp); + } + + if (sample) + { + farg_str += ", "; + farg_str += to_expression(sample); + } + + *p_forward = forward; + + return farg_str; +} + +void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) +{ + auto op = static_cast<GLSLstd450>(eop); + + if (is_legacy() && is_unsigned_glsl_opcode(op)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + // FP fiddling + case GLSLstd450Round: + emit_unary_func_op(result_type, id, args[0], "round"); + break; + + case GLSLstd450RoundEven: + if ((options.es && options.version >= 300) || (!options.es && options.version >= 130)) + emit_unary_func_op(result_type, id, args[0], "roundEven"); + else + SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up."); + break; + + case GLSLstd450Trunc: + emit_unary_func_op(result_type, id, args[0], "trunc"); + break; + case GLSLstd450SAbs: + emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type); + break; + case GLSLstd450FAbs: + emit_unary_func_op(result_type, id, args[0], "abs"); + break; + case GLSLstd450SSign: + emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type); + break; + case GLSLstd450FSign: + emit_unary_func_op(result_type, id, args[0], "sign"); + break; + case GLSLstd450Floor: + emit_unary_func_op(result_type, id, args[0], "floor"); + break; + case GLSLstd450Ceil: + emit_unary_func_op(result_type, id, args[0], "ceil"); + break; + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "fract"); + break; + case GLSLstd450Radians: + emit_unary_func_op(result_type, id, args[0], "radians"); + break; + case GLSLstd450Degrees: + emit_unary_func_op(result_type, id, args[0], "degrees"); + break; + case GLSLstd450Fma: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); + break; + case GLSLstd450Modf: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "modf"); + break; + + case GLSLstd450ModfStruct: + { + forced_temporaries.insert(id); + auto &type = get<SPIRType>(result_type); + auto &flags = ir.meta[id].decoration.decoration_flags; + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + break; + } + + // Minmax + case GLSLstd450UMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false); + break; + + case GLSLstd450SMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false); + break; + + case GLSLstd450FMin: + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + break; + + case GLSLstd450FMax: + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + break; + + case GLSLstd450UMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false); + break; + + case GLSLstd450SMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false); + break; + + case GLSLstd450FClamp: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + break; + + case GLSLstd450UClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type); + break; + + case GLSLstd450SClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type); + break; + + // Trig + case GLSLstd450Sin: + emit_unary_func_op(result_type, id, args[0], "sin"); + break; + case GLSLstd450Cos: + emit_unary_func_op(result_type, id, args[0], "cos"); + break; + case GLSLstd450Tan: + emit_unary_func_op(result_type, id, args[0], "tan"); + break; + case GLSLstd450Asin: + emit_unary_func_op(result_type, id, args[0], "asin"); + break; + case GLSLstd450Acos: + emit_unary_func_op(result_type, id, args[0], "acos"); + break; + case GLSLstd450Atan: + emit_unary_func_op(result_type, id, args[0], "atan"); + break; + case GLSLstd450Sinh: + emit_unary_func_op(result_type, id, args[0], "sinh"); + break; + case GLSLstd450Cosh: + emit_unary_func_op(result_type, id, args[0], "cosh"); + break; + case GLSLstd450Tanh: + emit_unary_func_op(result_type, id, args[0], "tanh"); + break; + case GLSLstd450Asinh: + emit_unary_func_op(result_type, id, args[0], "asinh"); + break; + case GLSLstd450Acosh: + emit_unary_func_op(result_type, id, args[0], "acosh"); + break; + case GLSLstd450Atanh: + emit_unary_func_op(result_type, id, args[0], "atanh"); + break; + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan"); + break; + + // Exponentials + case GLSLstd450Pow: + emit_binary_func_op(result_type, id, args[0], args[1], "pow"); + break; + case GLSLstd450Exp: + emit_unary_func_op(result_type, id, args[0], "exp"); + break; + case GLSLstd450Log: + emit_unary_func_op(result_type, id, args[0], "log"); + break; + case GLSLstd450Exp2: + emit_unary_func_op(result_type, id, args[0], "exp2"); + break; + case GLSLstd450Log2: + emit_unary_func_op(result_type, id, args[0], "log2"); + break; + case GLSLstd450Sqrt: + emit_unary_func_op(result_type, id, args[0], "sqrt"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "inversesqrt"); + break; + + // Matrix math + case GLSLstd450Determinant: + emit_unary_func_op(result_type, id, args[0], "determinant"); + break; + case GLSLstd450MatrixInverse: + emit_unary_func_op(result_type, id, args[0], "inverse"); + break; + + // Lerping + case GLSLstd450FMix: + case GLSLstd450IMix: + { + emit_mix_op(result_type, id, args[0], args[1], args[2]); + break; + } + case GLSLstd450Step: + emit_binary_func_op(result_type, id, args[0], args[1], "step"); + break; + case GLSLstd450SmoothStep: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); + break; + + // Packing + case GLSLstd450Frexp: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); + break; + + case GLSLstd450FrexpStruct: + { + forced_temporaries.insert(id); + auto &type = get<SPIRType>(result_type); + auto &flags = ir.meta[id].decoration.decoration_flags; + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + break; + } + + case GLSLstd450Ldexp: + emit_binary_func_op(result_type, id, args[0], args[1], "ldexp"); + break; + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); + break; + case GLSLstd450PackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); + break; + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); + break; + case GLSLstd450UnpackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); + break; + + case GLSLstd450PackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); + break; + case GLSLstd450UnpackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); + break; + + // Vector math + case GLSLstd450Length: + emit_unary_func_op(result_type, id, args[0], "length"); + break; + case GLSLstd450Distance: + emit_binary_func_op(result_type, id, args[0], args[1], "distance"); + break; + case GLSLstd450Cross: + emit_binary_func_op(result_type, id, args[0], args[1], "cross"); + break; + case GLSLstd450Normalize: + emit_unary_func_op(result_type, id, args[0], "normalize"); + break; + case GLSLstd450FaceForward: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); + break; + case GLSLstd450Reflect: + emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); + break; + case GLSLstd450Refract: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); + break; + + // Bit-fiddling + case GLSLstd450FindILsb: + emit_unary_func_op(result_type, id, args[0], "findLSB"); + break; + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type, int_type); // findMSB always returns int. + break; + + // Multisampled varying + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); + break; + + case GLSLstd450NMin: + case GLSLstd450NMax: + { + emit_nminmax_op(result_type, id, args[0], args[1], op); + break; + } + + case GLSLstd450NClamp: + { + // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. + // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. + uint32_t &max_id = extra_sub_expressions[id | 0x80000000u]; + if (!max_id) + max_id = ir.increase_bound_by(1); + + // Inherit precision qualifiers. + ir.meta[max_id] = ir.meta[id]; + + emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); + emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); + break; + } + + default: + statement("// unimplemented GLSL op ", eop); + break; + } +} + +void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) +{ + // Need to emulate this call. + uint32_t &ids = extra_sub_expressions[id]; + if (!ids) + { + ids = ir.increase_bound_by(5); + auto btype = get<SPIRType>(result_type); + btype.basetype = SPIRType::Boolean; + set<SPIRType>(ids, btype); + } + + uint32_t btype_id = ids + 0; + uint32_t left_nan_id = ids + 1; + uint32_t right_nan_id = ids + 2; + uint32_t tmp_id = ids + 3; + uint32_t mixed_first_id = ids + 4; + + // Inherit precision qualifiers. + ir.meta[tmp_id] = ir.meta[id]; + ir.meta[mixed_first_id] = ir.meta[id]; + + emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); + emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); + emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); + emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); + emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); +} + +void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_shader_ballot"); + + enum AMDShaderBallot + { + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4 + }; + + auto op = static_cast<AMDShaderBallot>(eop); + + switch (op) + { + case SwizzleInvocationsAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); + register_control_dependent_expression(id); + break; + + case SwizzleInvocationsMaskedAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); + register_control_dependent_expression(id); + break; + + case WriteInvocationAMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); + register_control_dependent_expression(id); + break; + + case MbcntAMD: + emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); + register_control_dependent_expression(id); + break; + + default: + statement("// unimplemented SPV AMD shader ballot op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); + + enum AMDShaderExplicitVertexParameter + { + InterpolateAtVertexAMD = 1 + }; + + auto op = static_cast<AMDShaderExplicitVertexParameter>(eop); + + switch (op) + { + case InterpolateAtVertexAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); + break; + + default: + statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_trinary_minmax"); + + enum AMDShaderTrinaryMinMax + { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 + }; + + auto op = static_cast<AMDShaderTrinaryMinMax>(eop); + + switch (op) + { + case FMin3AMD: + case UMin3AMD: + case SMin3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); + break; + + case FMax3AMD: + case UMax3AMD: + case SMax3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); + break; + + case FMid3AMD: + case UMid3AMD: + case SMid3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); + break; + + default: + statement("// unimplemented SPV AMD shader trinary minmax op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_gcn_shader"); + + enum AMDGCNShader + { + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3 + }; + + auto op = static_cast<AMDGCNShader>(eop); + + switch (op) + { + case CubeFaceIndexAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); + break; + case CubeFaceCoordAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); + break; + case TimeAMD: + { + string expr = "timeAMD()"; + emit_op(result_type, id, expr, true); + register_control_dependent_expression(id); + break; + } + + default: + statement("// unimplemented SPV AMD gcn shader op ", eop); + break; + } +} + +void CompilerGLSL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast<Op>(i.op); + + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); + + switch (op) + { + case OpGroupNonUniformElect: + require_extension_internal("GL_KHR_shader_subgroup_basic"); + break; + + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + require_extension_internal("GL_KHR_shader_subgroup_ballot"); + break; + + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + require_extension_internal("GL_KHR_shader_subgroup_shuffle"); + break; + + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); + break; + + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + require_extension_internal("GL_KHR_shader_subgroup_vote"); + break; + + case OpGroupNonUniformFAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + { + auto operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationClusteredReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_clustered"); + } + else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || + operation == GroupOperationReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_arithmetic"); + } + else + SPIRV_CROSS_THROW("Invalid group operation."); + break; + } + + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + require_extension_internal("GL_KHR_shader_subgroup_quad"); + break; + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar()); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "subgroupElect()", true); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallot"); + break; + + case OpGroupNonUniformInverseBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot"); + break; + + case OpGroupNonUniformBallotBitExtract: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotFindLSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB"); + break; + + case OpGroupNonUniformBallotFindMSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB"); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationReduce) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount"); + else if (operation == GroupOperationInclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount"); + else if (operation == GroupOperationExclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount"); + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + + case OpGroupNonUniformShuffle: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle"); + break; + + case OpGroupNonUniformShuffleXor: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor"); + break; + + case OpGroupNonUniformShuffleUp: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp"); + break; + + case OpGroupNonUniformShuffleDown: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); + break; + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "subgroupAny"); + break; + + case OpGroupNonUniformAllEqual: + emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual"); + break; + + // clang-format off +#define GLSL_GROUP_OP(op, glsl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast<GroupOperation>(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + GLSL_GROUP_OP(FAdd, Add) + GLSL_GROUP_OP(FMul, Mul) + GLSL_GROUP_OP(FMin, Min) + GLSL_GROUP_OP(FMax, Max) + GLSL_GROUP_OP(IAdd, Add) + GLSL_GROUP_OP(IMul, Mul) + GLSL_GROUP_OP(SMin, Min) + GLSL_GROUP_OP(SMax, Max) + GLSL_GROUP_OP(UMin, Min) + GLSL_GROUP_OP(UMax, Max) + GLSL_GROUP_OP(BitwiseAnd, And) + GLSL_GROUP_OP(BitwiseOr, Or) + GLSL_GROUP_OP(BitwiseXor, Xor) +#undef GLSL_GROUP_OP + // clang-format on + + case OpGroupNonUniformQuadSwap: + { + uint32_t direction = get<SPIRConstant>(ops[4]).scalar(); + if (direction == 0) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal"); + else if (direction == 1) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical"); + else if (direction == 2) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal"); + else + SPIRV_CROSS_THROW("Invalid quad swap direction."); + break; + } + + case OpGroupNonUniformQuadBroadcast: + { + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast"); + break; + } + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + +string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + if (out_type.basetype == in_type.basetype) + return ""; + + assert(out_type.basetype != SPIRType::Boolean); + assert(in_type.basetype != SPIRType::Boolean); + + bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); + bool same_size_cast = out_type.width == in_type.width; + + // Trivial bitcast case, casts between integers. + if (integral_cast && same_size_cast) + return type_to_glsl(out_type); + + // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). + if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) + return "unpack8"; + else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) + return "pack16"; + else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) + return "pack32"; + + // Floating <-> Integer special casts. Just have to enumerate all cases. :( + // 16-bit, 32-bit and 64-bit floats. + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + return "floatBitsToUint"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + return "floatBitsToInt"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + return "uintBitsToFloat"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + return "intBitsToFloat"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToInt64"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToUint64"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "int64BitsToDouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "uint64BitsToDouble"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) + return "float16BitsToInt16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) + return "float16BitsToUint16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) + return "int16BitsToFloat16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) + return "uint16BitsToFloat16"; + + // And finally, some even more special purpose casts. + if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) + return "packUint2x32"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackFloat2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + return "packFloat2x16"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) + return "packInt2x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) + return "unpackInt2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) + return "packUint2x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackUint2x16"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) + return "packInt4x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) + return "unpackInt4x16"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) + return "packUint4x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) + return "unpackUint4x16"; + + return ""; +} + +string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) +{ + auto op = bitcast_glsl_op(result_type, expression_type(argument)); + if (op.empty()) + return to_enclosed_expression(argument); + else + return join(op, "(", to_expression(argument), ")"); +} + +std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) +{ + auto expr = to_expression(arg); + auto &src_type = expression_type(arg); + if (src_type.basetype != target_type) + { + auto target = src_type; + target.basetype = target_type; + expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); + } + + return expr; +} + +std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, + const std::string &expr) +{ + if (target_type.basetype == expr_type) + return expr; + + auto src_type = target_type; + src_type.basetype = expr_type; + return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); +} + +string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) +{ + switch (builtin) + { + case BuiltInPosition: + return "gl_Position"; + case BuiltInPointSize: + return "gl_PointSize"; + case BuiltInClipDistance: + return "gl_ClipDistance"; + case BuiltInCullDistance: + return "gl_CullDistance"; + case BuiltInVertexId: + if (options.vulkan_semantics) + SPIRV_CROSS_THROW( + "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics."); + return "gl_VertexID"; + case BuiltInInstanceId: + if (options.vulkan_semantics) + SPIRV_CROSS_THROW( + "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics."); + return "gl_InstanceID"; + case BuiltInVertexIndex: + if (options.vulkan_semantics) + return "gl_VertexIndex"; + else + return "gl_VertexID"; // gl_VertexID already has the base offset applied. + case BuiltInInstanceIndex: + if (options.vulkan_semantics) + return "gl_InstanceIndex"; + else if (options.vertex.support_nonzero_base_instance) + return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. + else + return "gl_InstanceID"; + case BuiltInPrimitiveId: + if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) + return "gl_PrimitiveIDIn"; + else + return "gl_PrimitiveID"; + case BuiltInInvocationId: + return "gl_InvocationID"; + case BuiltInLayer: + return "gl_Layer"; + case BuiltInViewportIndex: + return "gl_ViewportIndex"; + case BuiltInTessLevelOuter: + return "gl_TessLevelOuter"; + case BuiltInTessLevelInner: + return "gl_TessLevelInner"; + case BuiltInTessCoord: + return "gl_TessCoord"; + case BuiltInFragCoord: + return "gl_FragCoord"; + case BuiltInPointCoord: + return "gl_PointCoord"; + case BuiltInFrontFacing: + return "gl_FrontFacing"; + case BuiltInFragDepth: + return "gl_FragDepth"; + case BuiltInNumWorkgroups: + return "gl_NumWorkGroups"; + case BuiltInWorkgroupSize: + return "gl_WorkGroupSize"; + case BuiltInWorkgroupId: + return "gl_WorkGroupID"; + case BuiltInLocalInvocationId: + return "gl_LocalInvocationID"; + case BuiltInGlobalInvocationId: + return "gl_GlobalInvocationID"; + case BuiltInLocalInvocationIndex: + return "gl_LocalInvocationIndex"; + case BuiltInHelperInvocation: + return "gl_HelperInvocation"; + case BuiltInBaseVertex: + if (options.es) + SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseVertexARB"; + } + return "gl_BaseVertex"; + case BuiltInBaseInstance: + if (options.es) + SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseInstanceARB"; + } + return "gl_BaseInstance"; + case BuiltInDrawIndex: + if (options.es) + SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_DrawIDARB"; + } + return "gl_DrawID"; + + case BuiltInSampleId: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400."); + return "gl_SampleID"; + + case BuiltInSampleMask: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400."); + + if (storage == StorageClassInput) + return "gl_SampleMaskIn"; + else + return "gl_SampleMask"; + + case BuiltInSamplePosition: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400."); + return "gl_SamplePosition"; + + case BuiltInViewIndex: + if (options.vulkan_semantics) + { + require_extension_internal("GL_EXT_multiview"); + return "gl_ViewIndex"; + } + else + { + require_extension_internal("GL_OVR_multiview2"); + return "gl_ViewID_OVR"; + } + + case BuiltInNumSubgroups: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_basic"); + return "gl_NumSubgroups"; + + case BuiltInSubgroupId: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_basic"); + return "gl_SubgroupID"; + + case BuiltInSubgroupSize: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_basic"); + return "gl_SubgroupSize"; + + case BuiltInSubgroupLocalInvocationId: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_basic"); + return "gl_SubgroupInvocationID"; + + case BuiltInSubgroupEqMask: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_ballot"); + return "gl_SubgroupEqMask"; + + case BuiltInSubgroupGeMask: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_ballot"); + return "gl_SubgroupGeMask"; + + case BuiltInSubgroupGtMask: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_ballot"); + return "gl_SubgroupGtMask"; + + case BuiltInSubgroupLeMask: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_ballot"); + return "gl_SubgroupLeMask"; + + case BuiltInSubgroupLtMask: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); + require_extension_internal("GL_KHR_shader_subgroup_ballot"); + return "gl_SubgroupLtMask"; + + case BuiltInLaunchIdNV: + return "gl_LaunchIDNV"; + case BuiltInLaunchSizeNV: + return "gl_LaunchSizeNV"; + case BuiltInWorldRayOriginNV: + return "gl_WorldRayOriginNV"; + case BuiltInWorldRayDirectionNV: + return "gl_WorldRayDirectionNV"; + case BuiltInObjectRayOriginNV: + return "gl_ObjectRayOriginNV"; + case BuiltInObjectRayDirectionNV: + return "gl_ObjectRayDirectionNV"; + case BuiltInRayTminNV: + return "gl_RayTminNV"; + case BuiltInRayTmaxNV: + return "gl_RayTmaxNV"; + case BuiltInInstanceCustomIndexNV: + return "gl_InstanceCustomIndexNV"; + case BuiltInObjectToWorldNV: + return "gl_ObjectToWorldNV"; + case BuiltInWorldToObjectNV: + return "gl_WorldToObjectNV"; + case BuiltInHitTNV: + return "gl_HitTNV"; + case BuiltInHitKindNV: + return "gl_HitKindNV"; + case BuiltInIncomingRayFlagsNV: + return "gl_IncomingRayFlagsNV"; + + default: + return join("gl_BuiltIn_", convert_to_string(builtin)); + } +} + +const char *CompilerGLSL::index_to_swizzle(uint32_t index) +{ + switch (index) + { + case 0: + return "x"; + case 1: + return "y"; + case 2: + return "z"; + case 3: + return "w"; + default: + SPIRV_CROSS_THROW("Swizzle index out of range"); + } +} + +string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, + AccessChainFlags flags, AccessChainMeta *meta) +{ + string expr; + + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; + bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + + if (!chain_only) + expr = to_enclosed_expression(base, register_expression_read); + + // Start traversing type hierarchy at the proper non-pointer types, + // but keep type_id referencing the original pointer for use below. + uint32_t type_id = expression_type_id(base); + const auto *type = &get_pointee_type(type_id); + + bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; + bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); + bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPacked); + uint32_t packed_type = get_extended_decoration(base, SPIRVCrossDecorationPackedType); + bool is_invariant = has_decoration(base, DecorationInvariant); + bool pending_array_enclose = false; + bool dimension_flatten = false; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + const auto append_index = [&]() { + expr += "["; + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index, register_expression_read); + expr += "]"; + }; + + // Pointer chains + if (ptr_chain && i == 0) + { + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays) + { + dimension_flatten = type->array.size() >= 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(type->array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(*type, j - 1)); + } + + if (type->array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + else + { + append_index(); + } + + if (type->basetype == SPIRType::ControlPointArray) + { + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + } + + access_chain_is_arrayed = true; + } + // Arrays + else if (!type->array.empty()) + { + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays && !pending_array_enclose) + { + dimension_flatten = type->array.size() > 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + assert(type->parent_type); + + auto *var = maybe_get<SPIRVariable>(base); + if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && + !has_decoration(type->self, DecorationBlock)) + { + // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. + // Normally, these variables live in blocks when compiled from GLSL, + // but HLSL seems to just emit straight arrays here. + // We must pretend this access goes through gl_in/gl_out arrays + // to be able to access certain builtins as arrays. + auto builtin = ir.meta[base].decoration.builtin_type; + switch (builtin) + { + // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. + // case BuiltInClipDistance: + case BuiltInPosition: + case BuiltInPointSize: + if (var->storage == StorageClassInput) + expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassOutput) + expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); + else + append_index(); + break; + + default: + append_index(); + break; + } + } + else if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + auto &parent_type = get<SPIRType>(type->parent_type); + + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(parent_type.array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(parent_type, j - 1)); + } + + if (parent_type.array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + else + { + append_index(); + } + + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + + access_chain_is_arrayed = true; + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + if (!index_is_literal) + index = get<SPIRConstant>(index).scalar(); + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + BuiltIn builtin; + if (is_member_builtin(*type, index, &builtin)) + { + // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly. + // To make this properly work by omitting all OpName opcodes, + // we need to infer gl_in or gl_out based on the builtin, and stage. + if (access_chain_is_arrayed) + { + expr += "."; + expr += builtin_to_glsl(builtin, type->storage); + } + else + expr = builtin_to_glsl(builtin, type->storage); + } + else + { + // If the member has a qualified name, use it as the entire chain + string qual_mbr_name = get_member_qualified_name(type_id, index); + if (!qual_mbr_name.empty()) + expr = qual_mbr_name; + else + expr += to_member_reference(base, *type, index, ptr_chain); + } + + if (has_member_decoration(type->self, index, DecorationInvariant)) + is_invariant = true; + + is_packed = member_is_packed_type(*type, index); + if (is_packed) + packed_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPackedType); + else + packed_type = 0; + + row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); + type = &get<SPIRType>(type->member_types[index]); + } + // Matrix -> Vector + else if (type->columns > 1) + { + if (row_major_matrix_needs_conversion) + { + expr = convert_row_major_matrix(expr, *type, is_packed); + row_major_matrix_needs_conversion = false; + is_packed = false; + packed_type = 0; + } + + expr += "["; + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index, register_expression_read); + expr += "]"; + + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + if (index_is_literal && !is_packed) + { + expr += "."; + expr += index_to_swizzle(index); + } + else if (ir.ids[index].get_type() == TypeConstant && !is_packed) + { + auto &c = get<SPIRConstant>(index); + expr += "."; + expr += index_to_swizzle(c.scalar()); + } + else if (index_is_literal) + { + // For packed vectors, we can only access them as an array, not by swizzle. + expr += join("[", index, "]"); + } + else + { + expr += "["; + expr += to_expression(index, register_expression_read); + expr += "]"; + } + + is_packed = false; + packed_type = 0; + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + } + else if (!backend.allow_truncated_access_chain) + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (pending_array_enclose) + { + SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " + "but the access chain was terminated in the middle of a multidimensional array. " + "This is not supported."); + } + + if (meta) + { + meta->need_transpose = row_major_matrix_needs_conversion; + meta->storage_is_packed = is_packed; + meta->storage_is_invariant = is_invariant; + meta->storage_packed_type = packed_type; + } + + return expr; +} + +string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index) +{ + auto &type = get<SPIRType>(var.basetype); + return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index))); +} + +string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, + AccessChainMeta *meta, bool ptr_chain) +{ + if (flattened_buffer_blocks.count(base)) + { + uint32_t matrix_stride = 0; + bool need_transpose = false; + flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride, + ptr_chain); + + if (meta) + { + meta->need_transpose = target_type.columns > 1 && need_transpose; + meta->storage_is_packed = false; + } + + return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose); + } + else if (flattened_structs.count(base) && count > 0) + { + AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + + auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1); + if (meta) + { + meta->need_transpose = false; + meta->storage_is_packed = false; + } + return sanitize_underscores(join(to_name(base), "_", chain)); + } + else + { + AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + return access_chain_internal(base, indices, count, flags, meta); + } +} + +string CompilerGLSL::load_flattened_struct(SPIRVariable &var) +{ + auto expr = type_to_glsl_constructor(get<SPIRType>(var.basetype)); + expr += '('; + + auto &type = get<SPIRType>(var.basetype); + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + if (i) + expr += ", "; + + // Flatten the varyings. + // Apply name transformation for flattened I/O blocks. + expr += to_flattened_struct_member(var, i); + } + expr += ')'; + return expr; +} + +void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value) +{ + // We're trying to store a structure which has been flattened. + // Need to copy members one by one. + auto rhs = to_expression(value); + + // Store result locally. + // Since we're declaring a variable potentially multiple times here, + // store the variable in an isolated scope. + begin_scope(); + statement(variable_decl_function_local(var), " = ", rhs, ";"); + + auto &type = get<SPIRType>(var.basetype); + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + // Flatten the varyings. + // Apply name transformation for flattened I/O blocks. + + auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i))); + rhs = join(to_name(var.self), ".", to_member_name(type, i)); + statement(lhs, " = ", rhs, ";"); + } + end_scope(); +} + +std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + bool need_transpose) +{ + if (!target_type.array.empty()) + SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); + else if (target_type.basetype == SPIRType::Struct) + return flattened_access_chain_struct(base, indices, count, target_type, offset); + else if (target_type.columns > 1) + return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); + else + return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); +} + +std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset) +{ + std::string expr; + + expr += type_to_glsl_constructor(target_type); + expr += "("; + + for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) + { + if (i != 0) + expr += ", "; + + const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]); + uint32_t member_offset = type_struct_member_offset(target_type, i); + + // The access chain terminates at the struct, so we need to find matrix strides and row-major information + // ahead of time. + bool need_transpose = false; + uint32_t matrix_stride = 0; + if (member_type.columns > 1) + { + need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor); + matrix_stride = type_struct_member_matrix_stride(target_type, i); + } + + auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, + need_transpose); + + // Cannot forward transpositions, so resolve them here. + if (need_transpose) + expr += convert_row_major_matrix(tmp, member_type, false); + else + expr += tmp; + } + + expr += ")"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + assert(matrix_stride); + SPIRType tmp_type = target_type; + if (need_transpose) + swap(tmp_type.vecsize, tmp_type.columns); + + std::string expr; + + expr += type_to_glsl_constructor(tmp_type); + expr += "("; + + for (uint32_t i = 0; i < tmp_type.columns; i++) + { + if (i != 0) + expr += ", "; + + expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, + /* need_transpose= */ false); + } + + expr += ")"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); + + auto buffer_name = to_name(expression_type(base).self); + + if (need_transpose) + { + std::string expr; + + if (target_type.vecsize > 1) + { + expr += type_to_glsl_constructor(target_type); + expr += "("; + } + + for (uint32_t i = 0; i < target_type.vecsize; ++i) + { + if (i != 0) + expr += ", "; + + uint32_t component_offset = result.second + i * matrix_stride; + + assert(component_offset % (target_type.width / 8) == 0); + uint32_t index = component_offset / (target_type.width / 8); + + expr += buffer_name; + expr += "["; + expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + + expr += convert_to_string(index / 4); + expr += "]"; + + expr += vector_swizzle(1, index % 4); + } + + if (target_type.vecsize > 1) + { + expr += ")"; + } + + return expr; + } + else + { + assert(result.second % (target_type.width / 8) == 0); + uint32_t index = result.second / (target_type.width / 8); + + std::string expr; + + expr += buffer_name; + expr += "["; + expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + + expr += convert_to_string(index / 4); + expr += "]"; + + expr += vector_swizzle(target_type.vecsize, index % 4); + + return expr; + } +} + +std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset( + const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, + bool *need_transpose, uint32_t *out_matrix_stride, bool ptr_chain) +{ + // Start traversing type hierarchy at the proper non-pointer types. + const auto *type = &get_pointee_type(basetype); + + // This holds the type of the current pointer which we are traversing through. + // We always start out from a struct type which is the block. + // This is primarily used to reflect the array strides and matrix strides later. + // For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set + // accordingly as members of structs are accessed. + assert(type->basetype == SPIRType::Struct); + uint32_t type_id = 0; + + std::string expr; + + // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. + bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; + uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + // Pointers + if (ptr_chain && i == 0) + { + // Here, the pointer type will be decorated with an array stride. + uint32_t array_stride = get_decoration(basetype.self, DecorationArrayStride); + if (!array_stride) + SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); + + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW( + "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + // Type ID is unchanged. + } + // Arrays + else if (!type->array.empty()) + { + // Here, the type_id will be a type ID for the array type itself. + uint32_t array_stride = get_decoration(type_id, DecorationArrayStride); + if (!array_stride) + SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); + + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW( + "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + + uint32_t parent_type = type->parent_type; + type = &get<SPIRType>(parent_type); + type_id = parent_type; + + // Type ID now refers to the array type with one less dimension. + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + index = get<SPIRConstant>(index).scalar(); + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + offset += type_struct_member_offset(*type, index); + type_id = type->member_types[index]; + + auto &struct_type = *type; + type = &get<SPIRType>(type->member_types[index]); + + if (type->columns > 1) + { + matrix_stride = type_struct_member_matrix_stride(struct_type, index); + row_major_matrix_needs_conversion = + combined_decoration_for_member(struct_type, index).get(DecorationRowMajor); + } + else + row_major_matrix_needs_conversion = false; + } + // Matrix -> Vector + else if (type->columns > 1) + { + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + index = get<SPIRConstant>(index).scalar(); + offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW( + "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. " + "Likely culprit here is a row-major matrix being accessed dynamically. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + uint32_t parent_type = type->parent_type; + type = &get<SPIRType>(type->parent_type); + type_id = parent_type; + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + index = get<SPIRConstant>(index).scalar(); + offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); + + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW( + "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. " + "This cannot be flattened in legacy targets."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + uint32_t parent_type = type->parent_type; + type = &get<SPIRType>(type->parent_type); + type_id = parent_type; + } + else + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (need_transpose) + *need_transpose = row_major_matrix_needs_conversion; + if (out_matrix_stride) + *out_matrix_stride = matrix_stride; + + return std::make_pair(expr, offset); +} + +bool CompilerGLSL::should_dereference(uint32_t id) +{ + const auto &type = expression_type(id); + // Non-pointer expressions don't need to be dereferenced. + if (!type.pointer) + return false; + + // Handles shouldn't be dereferenced either. + if (!expression_is_lvalue(id)) + return false; + + // If id is a variable but not a phi variable, we should not dereference it. + if (auto *var = maybe_get<SPIRVariable>(id)) + return var->phi_variable; + + // If id is an access chain, we should not dereference it. + if (auto *expr = maybe_get<SPIRExpression>(id)) + return !expr->access_chain; + + // Otherwise, we should dereference this pointer expression. + return true; +} + +bool CompilerGLSL::should_forward(uint32_t id) +{ + // If id is a variable we will try to forward it regardless of force_temporary check below + // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL + auto *var = maybe_get<SPIRVariable>(id); + if (var && var->forwardable) + return true; + + // For debugging emit temporary variables for all expressions + if (options.force_temporary) + return false; + + // Immutable expression can always be forwarded. + if (is_immutable(id)) + return true; + + return false; +} + +void CompilerGLSL::track_expression_read(uint32_t id) +{ + switch (ir.ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get<SPIRExpression>(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + case TypeAccessChain: + { + auto &e = get<SPIRAccessChain>(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + default: + break; + } + + // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. + // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. + if (expression_is_forwarded(id)) + { + auto &v = expression_usage_counts[id]; + v++; + + if (v >= 2) + { + //if (v == 2) + // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); + + forced_temporaries.insert(id); + // Force a recompile after this pass to avoid forwarding this variable. + force_recompile = true; + } + } +} + +bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) +{ + if (forced_temporaries.find(id) != end(forced_temporaries)) + return false; + + for (uint32_t i = 0; i < num_args; i++) + if (!should_forward(args[i])) + return false; + + // We need to forward globals as well. + if (!pure) + { + for (auto global : global_variables) + if (!should_forward(global)) + return false; + for (auto aliased : aliased_variables) + if (!should_forward(aliased)) + return false; + } + + return true; +} + +void CompilerGLSL::register_impure_function_call() +{ + // Impure functions can modify globals and aliased variables, so invalidate them as well. + for (auto global : global_variables) + flush_dependees(get<SPIRVariable>(global)); + for (auto aliased : aliased_variables) + flush_dependees(get<SPIRVariable>(aliased)); +} + +void CompilerGLSL::register_call_out_argument(uint32_t id) +{ + register_write(id); + + auto *var = maybe_get<SPIRVariable>(id); + if (var) + flush_variable_declaration(var->self); +} + +string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) +{ + // These variables are always function local, + // so make sure we emit the variable without storage qualifiers. + // Some backends will inject custom variables locally in a function + // with a storage qualifier which is not function-local. + auto old_storage = var.storage; + var.storage = StorageClassFunction; + auto expr = variable_decl(var); + var.storage = old_storage; + return expr; +} + +void CompilerGLSL::flush_variable_declaration(uint32_t id) +{ + auto *var = maybe_get<SPIRVariable>(id); + if (var && var->deferred_declaration) + { + statement(variable_decl_function_local(*var), ";"); + if (var->allocate_temporary_copy) + { + auto &type = get<SPIRType>(var->basetype); + auto &flags = ir.meta[id].decoration.decoration_flags; + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, join("_", id, "_copy")), + ";"); + } + var->deferred_declaration = false; + } +} + +bool CompilerGLSL::remove_duplicate_swizzle(string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto prevpos = op.find_last_of('.', pos - 1); + if (prevpos == string::npos) + return false; + + prevpos++; + + // Make sure there are only swizzles here ... + for (auto i = prevpos; i < pos; i++) + { + if (op[i] < 'w' || op[i] > 'z') + { + // If swizzles are foo.xyz() like in C++ backend for example, check for that. + if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') + break; + return false; + } + } + + // If original swizzle is large enough, just carve out the components we need. + // E.g. foobar.wyx.xy will turn into foobar.wy. + if (pos - prevpos >= final_swiz.size()) + { + op.erase(prevpos + final_swiz.size(), string::npos); + + // Add back the function call ... + if (backend.swizzle_is_function) + op += "()"; + } + return true; +} + +// Optimizes away vector swizzles where we have something like +// vec3 foo; +// foo.xyz <-- swizzle expression does nothing. +// This is a very common pattern after OpCompositeCombine. +bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto &type = expression_type(base); + + // Sanity checking ... + assert(type.columns == 1 && type.array.empty()); + + if (type.vecsize == final_swiz.size()) + op.erase(pos, string::npos); + return true; +} + +string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) +{ + uint32_t base = 0; + string op; + string subop; + + // Can only merge swizzles for vectors. + auto &type = get<SPIRType>(return_type); + bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; + bool swizzle_optimization = false; + + for (uint32_t i = 0; i < length; i++) + { + auto *e = maybe_get<SPIRExpression>(elems[i]); + + // If we're merging another scalar which belongs to the same base + // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! + if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) + { + // Only supposed to be used for vector swizzle -> scalar. + assert(!e->expression.empty() && e->expression.front() == '.'); + subop += e->expression.substr(1, string::npos); + swizzle_optimization = true; + } + else + { + // We'll likely end up with duplicated swizzles, e.g. + // foobar.xyz.xyz from patterns like + // OpVectorShuffle + // OpCompositeExtract x 3 + // OpCompositeConstruct 3x + other scalar. + // Just modify op in-place. + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. + // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. + // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. + // Essentially, we can only remove one set of swizzles, since that's what we have control over ... + // Case 1: + // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. + // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. + // Case 2: + // foo.xyz: Duplicate swizzle won't kick in. + // If foo is vec3, we can remove xyz, giving just foo. + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + swizzle_optimization = false; + op += subop; + } + else + op += subop; + + if (i) + op += ", "; + subop = to_expression(elems[i]); + } + + base = e ? e->base_expression : 0; + } + + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + } + + op += subop; + return op; +} + +bool CompilerGLSL::skip_argument(uint32_t id) const +{ + if (!combined_image_samplers.empty() || !options.vulkan_semantics) + { + auto &type = expression_type(id); + if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) + return true; + } + return false; +} + +bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) +{ + // Do this with strings because we have a very clear pattern we can check for and it avoids + // adding lots of special cases to the code emission. + if (rhs.size() < lhs.size() + 3) + return false; + + // Do not optimize matrices. They are a bit awkward to reason about in general + // (in which order does operation happen?), and it does not work on MSL anyways. + if (type.vecsize > 1 && type.columns > 1) + return false; + + auto index = rhs.find(lhs); + if (index != 0) + return false; + + // TODO: Shift operators, but it's not important for now. + auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); + if (op != lhs.size() + 1) + return false; + + // Check that the op is followed by space. This excludes && and ||. + if (rhs[op + 1] != ' ') + return false; + + char bop = rhs[op]; + auto expr = rhs.substr(lhs.size() + 3); + // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. + // Find some common patterns which are equivalent. + if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) + statement(lhs, bop, bop, ";"); + else + statement(lhs, " ", bop, "= ", expr, ";"); + return true; +} + +void CompilerGLSL::register_control_dependent_expression(uint32_t expr) +{ + if (forwarded_temporaries.find(expr) == end(forwarded_temporaries)) + return; + + assert(current_emitting_block); + current_emitting_block->invalidate_expressions.push_back(expr); +} + +void CompilerGLSL::emit_block_instructions(SPIRBlock &block) +{ + current_emitting_block = █ + for (auto &op : block.ops) + emit_instruction(op); + current_emitting_block = nullptr; +} + +void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) +{ + if (forwarded_temporaries.count(expr.self)) + { + forced_temporaries.insert(expr.self); + force_recompile = true; + } + + for (auto &dependent : expr.expression_dependencies) + disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent)); +} + +void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) +{ + // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to + // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary + // in one translation unit, but not another, e.g. due to multiple use of an expression. + // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent + // expressions to be temporaries. + // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough + // for all reasonable uses of invariant. + if (!has_decoration(store_id, DecorationInvariant)) + return; + + auto *expr = maybe_get<SPIRExpression>(value_id); + if (!expr) + return; + + disallow_forwarding_in_expression_chain(*expr); +} + +void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) +{ + auto rhs = to_pointer_expression(rhs_expression); + + // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. + if (!rhs.empty()) + { + handle_store_to_invariant_variable(lhs_expression, rhs_expression); + + auto lhs = to_dereferenced_expression(lhs_expression); + + // We might need to bitcast in order to store to a builtin. + bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression)); + + // Tries to optimize assignments like "<lhs> = <lhs> op expr". + // While this is purely cosmetic, this is important for legacy ESSL where loop + // variable increments must be in either i++ or i += const-expr. + // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + register_write(lhs_expression); + } +} + +uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const +{ + if (instr.length < 3) + return 32; + + auto *ops = stream(instr); + + switch (instr.op) + { + case OpIEqual: + case OpINotEqual: + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + return expression_type(ops[2]).width; + + default: + { + // We can look at result type which is more robust. + auto *type = maybe_get<SPIRType>(ops[0]); + if (type && type_is_integral(*type)) + return type->width; + else + return 32; + } + } +} + +uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const +{ + if (length < 1) + return 32; + + switch (op) + { + case GLSLstd450SAbs: + case GLSLstd450SSign: + case GLSLstd450UMin: + case GLSLstd450SMin: + case GLSLstd450UMax: + case GLSLstd450SMax: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + return expression_type(ops[0]).width; + + default: + { + // We don't need to care about other opcodes, just return 32. + return 32; + } + } +} + +void CompilerGLSL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast<Op>(instruction.op); + uint32_t length = instruction.length; + +#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (opcode) + { + // Dealing with memory + case OpLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + flush_variable_declaration(ptr); + + // If we're loading from memory that cannot be changed by the shader, + // just forward the expression directly to avoid needless temporaries. + // If an expression is mutable and forwardable, we speculate that it is immutable. + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If loading a non-native row-major matrix, mark the expression as need_transpose. + bool need_transpose = false; + bool old_need_transpose = false; + + auto *ptr_expression = maybe_get<SPIRExpression>(ptr); + if (ptr_expression && ptr_expression->need_transpose) + { + old_need_transpose = true; + ptr_expression->need_transpose = false; + need_transpose = true; + } + else if (is_non_native_row_major_matrix(ptr)) + need_transpose = true; + + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + auto expr = to_dereferenced_expression(ptr, !forward); + + // We might need to bitcast in order to load from a builtin. + bitcast_from_builtin_load(ptr, expr, get<SPIRType>(result_type)); + + // We might be trying to load a gl_Position[N], where we should be + // doing float4[](gl_in[i].gl_Position, ...) instead. + // Similar workarounds are required for input arrays in tessellation. + unroll_array_from_complex_load(id, ptr, expr); + + if (ptr_expression) + ptr_expression->need_transpose = old_need_transpose; + + // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. + // However, if we try to load a complex, composite object from a flattened buffer, + // we should avoid emitting the same code over and over and lower the result to a temporary. + auto &type = get<SPIRType>(result_type); + bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 && + (type.basetype == SPIRType::Struct || (type.columns > 1)); + + auto &e = emit_op(result_type, id, expr, forward, !usage_tracking); + e.need_transpose = need_transpose; + register_read(id, ptr, forward); + + // Pass through whether the result is of a packed type. + if (has_extended_decoration(ptr, SPIRVCrossDecorationPacked)) + { + set_extended_decoration(id, SPIRVCrossDecorationPacked); + set_extended_decoration(id, SPIRVCrossDecorationPackedType, + get_extended_decoration(ptr, SPIRVCrossDecorationPackedType)); + } + + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(e, ptr); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + auto *var = maybe_get<SPIRVariable>(ops[2]); + if (var) + flush_variable_declaration(var->self); + + // If the base is immutable, the access chain pointer must also be. + // If an expression is mutable and forwardable, we speculate that it is immutable. + AccessChainMeta meta; + bool ptr_chain = opcode == OpPtrAccessChain; + auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain); + + auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2])); + + auto *backing_variable = maybe_get_backing_variable(ops[2]); + expr.loaded_from = backing_variable ? backing_variable->self : ops[2]; + expr.need_transpose = meta.need_transpose; + expr.access_chain = true; + + // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. + if (meta.storage_is_packed) + set_extended_decoration(ops[1], SPIRVCrossDecorationPacked); + if (meta.storage_packed_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type); + if (meta.storage_is_invariant) + set_decoration(ops[1], DecorationInvariant); + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(expr, ops[i]); + } + + break; + } + + case OpStore: + { + auto *var = maybe_get<SPIRVariable>(ops[0]); + + if (var && var->statically_assigned) + var->static_expression = ops[1]; + else if (var && var->loop_variable && !var->loop_variable_enable) + var->static_expression = ops[1]; + else if (var && var->remapped_variable) + { + // Skip the write. + } + else if (var && flattened_structs.count(ops[0])) + { + store_flattened_struct(*var, ops[1]); + register_write(ops[0]); + } + else + { + emit_store_statement(ops[0], ops[1]); + } + + // Storing a pointer results in a variable pointer, so we must conservatively assume + // we can write through it. + if (expression_type(ops[1]).pointer) + register_write(ops[1]); + break; + } + + case OpArrayLength: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + set<SPIRExpression>(id, e + ".length()", result_type, true); + break; + } + + // Function calls + case OpFunctionCall: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t func = ops[2]; + const auto *arg = &ops[3]; + length -= 3; + + auto &callee = get<SPIRFunction>(func); + auto &return_type = get<SPIRType>(callee.return_type); + bool pure = function_is_pure(callee); + + bool callee_has_out_variables = false; + bool emit_return_value_as_argument = false; + + // Invalidate out variables passed to functions since they can be OpStore'd to. + for (uint32_t i = 0; i < length; i++) + { + if (callee.arguments[i].write_count) + { + register_call_out_argument(arg[i]); + callee_has_out_variables = true; + } + + flush_variable_declaration(arg[i]); + } + + if (!return_type.array.empty() && !backend.can_return_array) + { + callee_has_out_variables = true; + emit_return_value_as_argument = true; + } + + if (!pure) + register_impure_function_call(); + + string funexpr; + vector<string> arglist; + funexpr += to_name(func) + "("; + + if (emit_return_value_as_argument) + { + statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); + arglist.push_back(to_name(id)); + } + + for (uint32_t i = 0; i < length; i++) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg[i])) + continue; + + arglist.push_back(to_func_call_arg(arg[i])); + } + + for (auto &combined : callee.combined_parameters) + { + uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id]; + uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id]; + arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); + } + + append_global_func_args(callee, length, arglist); + + funexpr += merge(arglist); + funexpr += ")"; + + // Check for function call constraints. + check_function_call_constraints(arg, length); + + if (return_type.basetype != SPIRType::Void) + { + // If the function actually writes to an out variable, + // take the conservative route and do not forward. + // The problem is that we might not read the function + // result (and emit the function) before an out variable + // is read (common case when return value is ignored! + // In order to avoid start tracking invalid variables, + // just avoid the forwarding problem altogether. + bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && + (forced_temporaries.find(id) == end(forced_temporaries)); + + if (emit_return_value_as_argument) + { + statement(funexpr, ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + } + else + emit_op(result_type, id, funexpr, forward); + + // Function calls are implicit loads from all variables in question. + // Set dependencies for them. + for (uint32_t i = 0; i < length; i++) + register_read(id, arg[i], forward); + + // If we're going to forward the temporary result, + // put dependencies on every variable that must not change. + if (forward) + register_global_read_dependencies(callee, id); + } + else + statement(funexpr, ";"); + + break; + } + + // Composite munging + case OpCompositeConstruct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + const auto *const elems = &ops[2]; + length -= 2; + + bool forward = true; + for (uint32_t i = 0; i < length; i++) + forward = forward && should_forward(elems[i]); + + auto &out_type = get<SPIRType>(result_type); + auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr; + + // Only splat if we have vector constructors. + // Arrays and structs must be initialized properly in full. + bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; + + bool splat = false; + bool swizzle_splat = false; + + if (in_type) + { + splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; + swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; + + if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } + } + + if (splat || swizzle_splat) + { + uint32_t input = elems[0]; + for (uint32_t i = 0; i < length; i++) + { + if (input != elems[i]) + { + splat = false; + swizzle_splat = false; + } + } + } + + if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + forward = false; + if (!out_type.array.empty() && !backend.can_declare_arrays_inline) + forward = false; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + forward = false; + + string constructor_op; + if (!backend.array_is_value_type && out_type.array.size() > 1) + { + // We cannot construct array of arrays because we cannot treat the inputs + // as value types. Need to declare the array-of-arrays, and copy in elements one by one. + forced_temporaries.insert(id); + auto &flags = ir.meta[id].decoration.decoration_flags; + statement(flags_to_precision_qualifiers_glsl(out_type, flags), variable_decl(out_type, to_name(id)), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + for (uint32_t i = 0; i < length; i++) + emit_array_copy(join(to_expression(id), "[", i, "]"), elems[i]); + } + else if (backend.use_initializer_list && composite) + { + // Only use this path if we are building composites. + // This path cannot be used for arithmetic. + if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) + constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)); + constructor_op += "{ "; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += " }"; + } + else if (swizzle_splat && !composite) + { + constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_expression(elems[0])); + } + else + { + constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "("; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += ")"; + } + + if (!constructor_op.empty()) + { + emit_op(result_type, id, constructor_op, forward); + for (uint32_t i = 0; i < length; i++) + inherit_expression_dependencies(id, elems[i]); + } + break; + } + + case OpVectorInsertDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec = ops[2]; + uint32_t comp = ops[3]; + uint32_t index = ops[4]; + + flush_variable_declaration(vec); + + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(vec), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + auto chain = access_chain_internal(id, &index, 1, 0, nullptr); + statement(chain, " = ", to_expression(comp), ";"); + break; + } + + case OpVectorExtractDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr); + emit_op(result_type, id, expr, should_forward(ops[2])); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpCompositeExtract: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + length -= 3; + + auto &type = get<SPIRType>(result_type); + + // We can only split the expression here if our expression is forwarded as a temporary. + bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); + + // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. + auto &composite_type = expression_type(ops[2]); + if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty()) + allow_base_expression = false; + + // Packed expressions cannot be split up. + if (has_extended_decoration(ops[2], SPIRVCrossDecorationPacked)) + allow_base_expression = false; + + AccessChainMeta meta; + SPIRExpression *e = nullptr; + + // Only apply this optimization if result is scalar. + if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) + { + // We want to split the access chain from the base. + // This is so we can later combine different CompositeExtract results + // with CompositeConstruct without emitting code like + // + // vec3 temp = texture(...).xyz + // vec4(temp.x, temp.y, temp.z, 1.0). + // + // when we actually wanted to emit this + // vec4(texture(...).xyz, 1.0). + // + // Including the base will prevent this and would trigger multiple reads + // from expression causing it to be forced to an actual temporary in GLSL. + auto expr = access_chain_internal(ops[2], &ops[3], length, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta); + e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); + inherit_expression_dependencies(id, ops[2]); + e->base_expression = ops[2]; + } + else + { + auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); + inherit_expression_dependencies(id, ops[2]); + } + + // Pass through some meta information to the loaded expression. + // We can still end up loading a buffer type to a variable, then CompositeExtract from it + // instead of loading everything through an access chain. + e->need_transpose = meta.need_transpose; + if (meta.storage_is_packed) + set_extended_decoration(id, SPIRVCrossDecorationPacked); + if (meta.storage_packed_type != 0) + set_extended_decoration(id, SPIRVCrossDecorationPackedType, meta.storage_packed_type); + if (meta.storage_is_invariant) + set_decoration(id, DecorationInvariant); + + break; + } + + case OpCompositeInsert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t obj = ops[2]; + uint32_t composite = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + flush_variable_declaration(composite); + + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(composite), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + statement(chain, " = ", to_expression(obj), ";"); + + break; + } + + case OpCopyMemory: + { + uint32_t lhs = ops[0]; + uint32_t rhs = ops[1]; + if (lhs != rhs) + { + flush_variable_declaration(lhs); + flush_variable_declaration(rhs); + statement(to_expression(lhs), " = ", to_expression(rhs), ";"); + register_write(lhs); + } + break; + } + + case OpCopyObject: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + bool pointer = get<SPIRType>(result_type).pointer; + + if (expression_is_lvalue(rhs) && !pointer) + { + // Need a copy. + // For pointer types, we copy the pointer itself. + statement(declare_temporary(result_type, id), to_expression(rhs), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + inherit_expression_dependencies(id, rhs); + } + else + { + // RHS expression is immutable, so just forward it. + // Copying these things really make no sense, but + // seems to be allowed anyways. + auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true); + if (pointer) + { + auto *var = maybe_get_backing_variable(rhs); + e.loaded_from = var ? var->self : 0; + } + } + break; + } + + case OpVectorShuffle: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec0 = ops[2]; + uint32_t vec1 = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + auto &type0 = expression_type(vec0); + + // If we have the undefined swizzle index -1, we need to swizzle in undefined data, + // or in our case, T(0). + bool shuffle = false; + for (uint32_t i = 0; i < length; i++) + if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) + shuffle = true; + + // Cannot use swizzles with packed expressions, force shuffle path. + if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPacked)) + shuffle = true; + + string expr; + bool should_fwd, trivial_forward; + + if (shuffle) + { + should_fwd = should_forward(vec0) && should_forward(vec1); + trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); + + // Constructor style and shuffling from two different vectors. + vector<string> args; + for (uint32_t i = 0; i < length; i++) + { + if (elems[i] == 0xffffffffu) + { + // Use a constant 0 here. + // We could use the first component or similar, but then we risk propagating + // a value we might not need, and bog down codegen. + SPIRConstant c; + c.constant_type = type0.parent_type; + assert(type0.parent_type != 0); + args.push_back(constant_expression(c)); + } + else if (elems[i] >= type0.vecsize) + args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize)); + else + args.push_back(to_extract_component_expression(vec0, elems[i])); + } + expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")"); + } + else + { + should_fwd = should_forward(vec0); + trivial_forward = !expression_is_forwarded(vec0); + + // We only source from first vector, so can use swizzle. + // If the vector is packed, unpack it before applying a swizzle (needed for MSL) + expr += to_enclosed_unpacked_expression(vec0); + expr += "."; + for (uint32_t i = 0; i < length; i++) + { + assert(elems[i] != 0xffffffffu); + expr += index_to_swizzle(elems[i]); + } + + if (backend.swizzle_is_function && length > 1) + expr += "()"; + } + + // A shuffle is trivial in that it doesn't actually *do* anything. + // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. + + emit_op(result_type, id, expr, should_fwd, trivial_forward); + inherit_expression_dependencies(id, vec0); + inherit_expression_dependencies(id, vec1); + break; + } + + // ALU + case OpIsNan: + GLSL_UFOP(isnan); + break; + + case OpIsInf: + GLSL_UFOP(isinf); + break; + + case OpSNegate: + case OpFNegate: + GLSL_UOP(-); + break; + + case OpIAdd: + { + // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(+, type); + break; + } + + case OpFAdd: + GLSL_BOP(+); + break; + + case OpISub: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(-, type); + break; + } + + case OpFSub: + GLSL_BOP(-); + break; + + case OpIMul: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(*, type); + break; + } + + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + // If the matrix needs transpose, just flip the multiply order. + auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]); + if (e && e->need_transpose) + { + e->need_transpose = false; + emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); + e->need_transpose = true; + } + else + GLSL_BOP(*); + break; + } + + case OpFMul: + case OpMatrixTimesScalar: + case OpVectorTimesScalar: + case OpMatrixTimesMatrix: + GLSL_BOP(*); + break; + + case OpOuterProduct: + GLSL_BFOP(outerProduct); + break; + + case OpDot: + GLSL_BFOP(dot); + break; + + case OpTranspose: + GLSL_UFOP(transpose); + break; + + case OpSRem: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } + + case OpSDiv: + GLSL_BOP_CAST(/, int_type); + break; + + case OpUDiv: + GLSL_BOP_CAST(/, uint_type); + break; + + case OpIAddCarry: + case OpISubBorrow: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + forced_temporaries.insert(result_id); + auto &type = get<SPIRType>(result_type); + auto &flags = ir.meta[result_id].decoration.decoration_flags; + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); + set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + + const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; + + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", + to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");"); + break; + } + + case OpUMulExtended: + case OpSMulExtended: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + forced_temporaries.insert(result_id); + auto &type = get<SPIRType>(result_type); + auto &flags = ir.meta[result_id].decoration.decoration_flags; + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); + set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + + const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; + + statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", + to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");"); + break; + } + + case OpFDiv: + GLSL_BOP(/); + break; + + case OpShiftRightLogical: + GLSL_BOP_CAST(>>, uint_type); + break; + + case OpShiftRightArithmetic: + GLSL_BOP_CAST(>>, int_type); + break; + + case OpShiftLeftLogical: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(<<, type); + break; + } + + case OpBitwiseOr: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(|, type); + break; + } + + case OpBitwiseXor: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(^, type); + break; + } + + case OpBitwiseAnd: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(&, type); + break; + } + + case OpNot: + GLSL_UOP(~); + break; + + case OpUMod: + GLSL_BOP_CAST(%, uint_type); + break; + + case OpSMod: + GLSL_BOP_CAST(%, int_type); + break; + + case OpFMod: + GLSL_BFOP(mod); + break; + + case OpFRem: + { + if (is_legacy()) + SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is " + "needed for legacy."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } + + // Relational + case OpAny: + GLSL_UFOP(any); + break; + + case OpAll: + GLSL_UFOP(all); + break; + + case OpSelect: + emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); + break; + + case OpLogicalOr: + { + // No vector variant in GLSL for logical OR. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get<SPIRType>(result_type); + + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||"); + else + GLSL_BOP(||); + break; + } + + case OpLogicalAnd: + { + // No vector variant in GLSL for logical AND. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get<SPIRType>(result_type); + + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&"); + else + GLSL_BOP(&&); + break; + } + + case OpLogicalNot: + { + auto &type = get<SPIRType>(ops[0]); + if (type.vecsize > 1) + GLSL_UFOP(not); + else + GLSL_UOP(!); + break; + } + + case OpIEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(equal, int_type); + else + GLSL_BOP_CAST(==, int_type); + break; + } + + case OpLogicalEqual: + case OpFOrdEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(equal); + else + GLSL_BOP(==); + break; + } + + case OpINotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(notEqual, int_type); + else + GLSL_BOP_CAST(!=, int_type); + break; + } + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(notEqual); + else + GLSL_BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + { + auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThan, type); + else + GLSL_BOP_CAST(>, type); + break; + } + + case OpFOrdGreaterThan: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThan); + else + GLSL_BOP(>); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThanEqual, type); + else + GLSL_BOP_CAST(>=, type); + break; + } + + case OpFOrdGreaterThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThanEqual); + else + GLSL_BOP(>=); + break; + } + + case OpULessThan: + case OpSLessThan: + { + auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThan, type); + else + GLSL_BOP_CAST(<, type); + break; + } + + case OpFOrdLessThan: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThan); + else + GLSL_BOP(<); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThanEqual, type); + else + GLSL_BOP_CAST(<=, type); + break; + } + + case OpFOrdLessThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThanEqual); + else + GLSL_BOP(<=); + break; + } + + // Conversion + case OpConvertFToU: + case OpConvertFToS: + case OpConvertSToF: + case OpConvertUToF: + case OpUConvert: + case OpSConvert: + case OpFConvert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto func = type_to_glsl_constructor(get<SPIRType>(result_type)); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + break; + } + + case OpBitcast: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg)); + emit_unary_func_op(result_type, id, arg, op.c_str()); + break; + } + + case OpQuantizeToF16: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + string op; + auto &type = get<SPIRType>(result_type); + + switch (type.vecsize) + { + case 1: + op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); + break; + case 2: + op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); + break; + case 3: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); + op = join("vec3(", op0, ", ", op1, ")"); + break; + } + case 4: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); + op = join("vec4(", op0, ", ", op1, ")"); + break; + } + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + + emit_op(result_type, id, op, should_forward(arg)); + inherit_expression_dependencies(id, arg); + break; + } + + // Derivatives + case OpDPdx: + GLSL_UFOP(dFdx); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + GLSL_UFOP(dFdy); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxFine: + GLSL_UFOP(dFdxFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyFine: + GLSL_UFOP(dFdyFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxCoarse: + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + GLSL_UFOP(dFdxCoarse); + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyCoarse: + GLSL_UFOP(dFdyCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + GLSL_UFOP(fwidth); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidthCoarse: + GLSL_UFOP(fwidthCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidthFine: + GLSL_UFOP(fwidthFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + // Bitfield + case OpBitFieldInsert: + // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. + GLSL_QFOP(bitfieldInsert); + break; + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. + GLSL_TFOP(bitfieldExtract); + break; + + case OpBitReverse: + GLSL_UFOP(bitfieldReverse); + break; + + case OpBitCount: + GLSL_UFOP(bitCount); + break; + + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[5]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, ptr, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicCompareExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; + + forced_temporaries.insert(id); + emit_trinary_func_op(result_type, id, ptr, comp, val, op); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicLoad: + flush_all_atomic_capable_variables(); + // FIXME: Image? + // OpAtomicLoad seems to only be relevant for atomic counters. + GLSL_UFOP(atomicCounter); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + + case OpAtomicStore: + SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore."); + + case OpAtomicIIncrement: + case OpAtomicIDecrement: + { + forced_temporaries.insert(ops[1]); + auto &type = expression_type(ops[2]); + if (type.storage == StorageClassAtomicCounter) + { + // Legacy GLSL stuff, not sure if this is relevant to support. + if (opcode == OpAtomicIIncrement) + GLSL_UFOP(atomicCounterIncrement); + else + GLSL_UFOP(atomicCounterDecrement); + } + else + { + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; + + const char *increment = nullptr; + if (opcode == OpAtomicIIncrement && unsigned_type) + increment = "1u"; + else if (opcode == OpAtomicIIncrement) + increment = "1"; + else if (unsigned_type) + increment = "uint(-1)"; + else + increment = "-1"; + + emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); + } + + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicIAdd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicISub: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); + emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicSMin: + case OpAtomicUMin: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicSMax: + case OpAtomicUMax: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicAnd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicOr: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + case OpAtomicXor: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; + forced_temporaries.insert(ops[1]); + emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); + break; + } + + // Geometry shaders + case OpEmitVertex: + statement("EmitVertex();"); + break; + + case OpEndPrimitive: + statement("EndPrimitive();"); + break; + + case OpEmitStreamVertex: + statement("EmitStreamVertex();"); + break; + + case OpEndStreamPrimitive: + statement("EndStreamPrimitive();"); + break; + + // Textures + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // Suppress usage tracking. + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + + // When using the image, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpImageQueryLod: + { + if (!options.es && options.version < 400) + { + require_extension_internal("GL_ARB_texture_query_lod"); + // For some reason, the ARB spec is all-caps. + GLSL_BFOP(textureQueryLOD); + } + else if (options.es) + SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); + else + GLSL_BFOP(textureQueryLod); + register_control_dependent_expression(ops[1]); + break; + } + + case OpImageQueryLevels: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_texture_query_levels"); + if (options.es) + SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); + + auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")"); + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySamples: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + string expr; + if (type.image.sampled == 2) + expr = join("imageSamples(", to_expression(ops[2]), ")"); + else + expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); + + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpSampledImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_sampled_image_op(result_type, id, ops[2], ops[3]); + break; + } + + case OpImageQuerySizeLod: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ", ", + bitcast_expression(SPIRType::Int, ops[3]), ")"); + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + // Image load/store + case OpImageRead: + { + // We added Nonreadable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to read the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + { + auto &flags = ir.meta[var->self].decoration.decoration_flags; + if (flags.get(DecorationNonReadable)) + { + flags.clear(DecorationNonReadable); + force_recompile = true; + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + bool pure; + string imgexpr; + auto &type = expression_type(ops[2]); + + if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code + { + if (type.image.ms) + SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); + + auto itr = + find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); + + if (itr == end(pls_inputs)) + { + // For non-PLS inputs, we rely on subpass type remapping information to get it right + // since ImageRead always returns 4-component vectors and the backing type is opaque. + if (!var->remapped_components) + SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); + imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2])); + } + else + { + // PLS input could have different number of components than what the SPIR expects, swizzle to + // the appropriate vector size. + uint32_t components = pls_format_to_components(itr->format); + imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2])); + } + pure = true; + } + else if (type.image.dim == DimSubpassData) + { + if (options.vulkan_semantics) + { + // With Vulkan semantics, use the proper Vulkan GLSL construct. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW( + "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); + } + else + imgexpr = join("subpassLoad(", to_expression(ops[2]), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW( + "Multisampled image used in OpImageRead, but unexpected operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", + to_expression(samples), ")"); + } + else + { + // Implement subpass loads via texture barrier style sampling. + imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); + } + } + imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr); + pure = true; + } + else + { + // imageLoad only accepts int coords, not uint. + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + + // Plain image load/store. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = + join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); + } + else + imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")"); + + imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr); + pure = false; + } + + if (var && var->forwardable) + { + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); + + // We only need to track dependencies if we're reading from image load/store. + if (!pure) + { + e.loaded_from = var->self; + if (forward) + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false); + + inherit_expression_dependencies(id, ops[2]); + if (type.image.ms) + inherit_expression_dependencies(id, ops[5]); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpImageWrite: + { + // We added Nonwritable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to write to the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[0]); + if (var) + { + auto &flags = ir.meta[var->self].decoration.decoration_flags; + if (flags.get(DecorationNonWritable)) + { + flags.clear(DecorationNonWritable); + force_recompile = true; + } + } + + auto &type = expression_type(ops[0]); + auto &value_type = expression_type(ops[2]); + auto store_type = value_type; + store_type.vecsize = 4; + + // imageStore only accepts int coords, not uint. + auto coord_expr = to_expression(ops[1]); + auto target_coord_type = expression_type(ops[1]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); + + if (type.image.ms) + { + uint32_t operands = ops[3]; + if (operands != ImageOperandsSampleMask || length != 5) + SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); + uint32_t samples = ops[4]; + statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + } + else + statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageQuerySize: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.basetype == SPIRType::Image) + { + string expr; + if (type.image.sampled == 2) + { + // The size of an image is always constant. + expr = join("imageSize(", to_expression(ops[2]), ")"); + } + else + { + // This path is hit for samplerBuffers and multisampled images which do not have LOD. + expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ")"); + } + + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + } + else + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); + break; + } + + // Compute + case OpControlBarrier: + case OpMemoryBarrier: + { + uint32_t execution_scope = 0; + uint32_t memory; + uint32_t semantics; + + if (opcode == OpMemoryBarrier) + { + memory = get<SPIRConstant>(ops[0]).scalar(); + semantics = get<SPIRConstant>(ops[1]).scalar(); + } + else + { + execution_scope = get<SPIRConstant>(ops[0]).scalar(); + memory = get<SPIRConstant>(ops[1]).scalar(); + semantics = get<SPIRConstant>(ops[2]).scalar(); + } + + if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); + require_extension_internal("GL_KHR_shader_subgroup_basic"); + } + + if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) + { + // Control shaders only have barriers, and it implies memory barriers. + if (opcode == OpControlBarrier) + statement("barrier();"); + break; + } + + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar(); + uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar(); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + bool memory_scope_covered = false; + if (next_memory == memory) + memory_scope_covered = true; + else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) + { + // If we only care about workgroup memory, either Device or Workgroup scope is fine, + // scope does not have to match. + if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && + (memory == ScopeDevice || memory == ScopeWorkgroup)) + { + memory_scope_covered = true; + } + } + else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) + { + // The control barrier has device scope, but the memory barrier just has workgroup scope. + memory_scope_covered = true; + } + + // If we have the same memory scope, and all memory types are covered, we're good. + if (memory_scope_covered && (semantics & next_semantics) == semantics) + break; + } + } + + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) + { + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + } + + if (memory == ScopeWorkgroup) // Only need to consider memory within a group + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + else if (semantics != 0) + statement("groupMemoryBarrier();"); + } + else if (memory == ScopeSubgroup) + { + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("subgroupMemoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 3 barriers. + statement("subgroupMemoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("subgroupMemoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("subgroupMemoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("subgroupMemoryBarrierImage();"); + } + } + else + { + const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | + MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("memoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 4 barriers. + statement("memoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("memoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("memoryBarrierImage();"); + if (semantics & MemorySemanticsAtomicCounterMemoryMask) + statement("memoryBarrierAtomicCounter();"); + } + } + + if (opcode == OpControlBarrier) + { + if (execution_scope == ScopeSubgroup) + statement("subgroupBarrier();"); + else + statement("barrier();"); + } + break; + } + + case OpExtInst: + { + uint32_t extension_set = ops[2]; + + if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL) + { + emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot) + { + emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) + { + emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) + { + emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader) + { + emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else + { + statement("// unimplemented ext op ", instruction.op); + break; + } + + break; + } + + // Legacy sub-group stuff ... + case OpSubgroupBallotKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + string expr; + expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)"); + emit_op(result_type, id, expr, should_forward(ops[2])); + + require_extension_internal("GL_ARB_shader_ballot"); + inherit_expression_dependencies(id, ops[2]); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupFirstInvocationKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); + + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupReadInvocationKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); + + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAllKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAnyKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAllEqualKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupIAddNonUniformAMD: + case OpGroupFAddNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupFMinNonUniformAMD: + case OpGroupUMinNonUniformAMD: + case OpGroupSMinNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupFMaxNonUniformAMD: + case OpGroupUMaxNonUniformAMD: + case OpGroupSMaxNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpFragmentMaskFetchAMD: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); + } + else + { + emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); + break; + } + + case OpFragmentFetchAMD: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); + } + else + { + emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); + break; + } + + // Vulkan 1.1 sub-group stuff ... + case OpGroupNonUniformElect: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + emit_subgroup_op(instruction); + break; + + case OpFUnordEqual: + GLSL_BFOP(unsupported_FUnordEqual); + break; + + case OpFUnordNotEqual: + GLSL_BFOP(unsupported_FUnordNotEqual); + break; + + case OpFUnordLessThan: + GLSL_BFOP(unsupported_FUnordLessThan); + break; + + case OpFUnordGreaterThan: + GLSL_BFOP(unsupported_FUnordGreaterThan); + break; + + case OpFUnordLessThanEqual: + GLSL_BFOP(unsupported_FUnordLessThanEqual); + break; + + case OpFUnordGreaterThanEqual: + GLSL_BFOP(unsupported_FUnordGreaterThanEqual); + break; + + case OpReportIntersectionNV: + statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + break; + case OpIgnoreIntersectionNV: + statement("ignoreIntersectionNV();"); + break; + case OpTerminateRayNV: + statement("terminateRayNV();"); + break; + case OpTraceNV: + statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", + to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); + break; + case OpExecuteCallableNV: + statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + break; + + default: + statement("// unimplemented op ", instruction.op); + break; + } +} + +// Appends function arguments, mapped from global variables, beyond the specified arg index. +// This is used when a function call uses fewer arguments than the function defines. +// This situation may occur if the function signature has been dynamically modified to +// extract global variables referenced from within the function, and convert them to +// function arguments. This is necessary for shader languages that do not support global +// access to shader input content from within a function (eg. Metal). Each additional +// function args uses the name of the global variable. Function nesting will modify the +// functions and function calls all the way up the nesting chain. +void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, vector<string> &arglist) +{ + auto &args = func.arguments; + uint32_t arg_cnt = uint32_t(args.size()); + for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) + { + auto &arg = args[arg_idx]; + assert(arg.alias_global_variable); + + // If the underlying variable needs to be declared + // (ie. a local variable with deferred declaration), do so now. + uint32_t var_id = get<SPIRVariable>(arg.id).basevariable; + if (var_id) + flush_variable_declaration(var_id); + + arglist.push_back(to_func_call_arg(arg.id)); + } +} + +string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) +{ + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_m", index); +} + +string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) +{ + return join(".", to_member_name(type, index)); +} + +void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) +{ + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + { + auto &name = memb[index].alias; + if (name.empty()) + return; + + // Reserved for temporaries. + if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + { + name.clear(); + return; + } + + update_name_cache(type.member_name_cache, name); + } +} + +// Checks whether the ID is a row_major matrix that requires conversion before use +bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) +{ + // Natively supported row-major matrices do not need to be converted. + // Legacy targets do not support row major. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!has_decoration(id, DecorationRowMajor)) + return false; + + // Only square row-major matrices can be converted at this time. + // Converting non-square matrices will require defining custom GLSL function that + // swaps matrix elements while retaining the original dimensional form of the matrix. + const auto type = expression_type(id); + if (type.columns != type.vecsize) + SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + + return true; +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!has_member_decoration(type.self, index, DecorationRowMajor)) + return false; + + // Only square row-major matrices can be converted at this time. + // Converting non-square matrices will require defining custom GLSL function that + // swaps matrix elements while retaining the original dimensional form of the matrix. + const auto mbr_type = get<SPIRType>(type.member_types[index]); + if (mbr_type.columns != mbr_type.vecsize) + SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + + return true; +} + +// Checks whether the member is in packed data type, that might need to be unpacked. +// GLSL does not define packed data types, but certain subclasses do. +bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked); +} + +// Wraps the expression string in a function call that converts the +// row_major matrix result of the expression to a column_major matrix. +// Base implementation uses the standard library transpose() function. +// Subclasses may override to use a different function. +string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/) +{ + strip_enclosed_expression(exp_str); + return join("transpose(", exp_str, ")"); +} + +string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) +{ + string type_name = type_to_glsl(type, id); + remap_variable_type_name(type, name, type_name); + return join(type_name, " ", name, type_to_array_glsl(type)); +} + +// Emit a structure member. Subclasses may override to modify output, +// or to dynamically add a padding member if needed. +void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t) +{ + auto &membertype = get<SPIRType>(member_type_id); + + Bitset memberflags; + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + string qualifiers; + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); + + statement(layout_for_member(type, index), qualifiers, qualifier, + flags_to_precision_qualifiers_glsl(membertype, memberflags), + variable_decl(membertype, to_member_name(type, index)), ";"); +} + +const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, const Bitset &flags) +{ + // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt && + type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage && + type.basetype != SPIRType::Sampler) + return ""; + + if (options.es) + { + auto &execution = get_entry_point(); + + if (flags.get(DecorationRelaxedPrecision)) + { + bool implied_fmediump = type.basetype == SPIRType::Float && + options.fragment.default_float_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + options.fragment.default_int_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + return implied_fmediump || implied_imediump ? "" : "mediump "; + } + else + { + bool implied_fhighp = + type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + ((options.fragment.default_int_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + return implied_fhighp || implied_ihighp ? "" : "highp "; + } + } + else if (backend.allow_precision_qualifiers) + { + // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. + // The default is highp however, so only emit mediump in the rare case that a shader has these. + if (flags.get(DecorationRelaxedPrecision)) + return "mediump "; + else + return ""; + } + else + return ""; +} + +const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +{ + return flags_to_precision_qualifiers_glsl(expression_type(id), ir.meta[id].decoration.decoration_flags); +} + +string CompilerGLSL::to_qualifiers_glsl(uint32_t id) +{ + auto &flags = ir.meta[id].decoration.decoration_flags; + string res; + + auto *var = maybe_get<SPIRVariable>(id); + + if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) + res += "shared "; + + res += to_interpolation_qualifiers(flags); + if (var) + res += to_storage_qualifiers_glsl(*var); + + auto &type = expression_type(id); + if (type.image.dim != DimSubpassData && type.image.sampled == 2) + { + if (flags.get(DecorationCoherent)) + res += "coherent "; + if (flags.get(DecorationRestrict)) + res += "restrict "; + if (flags.get(DecorationNonWritable)) + res += "readonly "; + if (flags.get(DecorationNonReadable)) + res += "writeonly "; + } + + res += to_precision_qualifiers_glsl(id); + + return res; +} + +string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... + auto &type = expression_type(arg.id); + const char *direction = ""; + + if (type.pointer) + { + if (arg.write_count && arg.read_count) + direction = "inout "; + else if (arg.write_count) + direction = "out "; + } + + return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); +} + +string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) +{ + return to_expression(var.initializer); +} + +string CompilerGLSL::variable_decl(const SPIRVariable &variable) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + auto &type = get_variable_data_type(variable); + + if (type.pointer_depth > 1) + SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); + + auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); + + if (variable.loop_variable && variable.static_expression) + { + uint32_t expr = variable.static_expression; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_expression(variable.static_expression)); + } + else if (variable.initializer) + { + uint32_t expr = variable.initializer; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_initializer_expression(variable)); + } + return res; +} + +const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) +{ + auto &flags = ir.meta[variable.self].decoration.decoration_flags; + if (flags.get(DecorationRelaxedPrecision)) + return "mediump "; + else + return "highp "; +} + +string CompilerGLSL::pls_decl(const PlsRemap &var) +{ + auto &variable = get<SPIRVariable>(var.id); + + SPIRType type; + type.vecsize = pls_format_to_components(var.format); + type.basetype = pls_format_to_basetype(var.format); + + return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", + to_name(variable.self)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const +{ + return to_array_size_literal(type, uint32_t(type.array.size() - 1)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const +{ + assert(type.array.size() == type.array_size_literal.size()); + + if (type.array_size_literal[index]) + { + return type.array[index]; + } + else + { + // Use the default spec constant value. + // This is the best we can do. + uint32_t array_size_id = type.array[index]; + + // Explicitly check for this case. The error message you would get (bad cast) makes no sense otherwise. + if (ir.ids[array_size_id].get_type() == TypeConstantOp) + SPIRV_CROSS_THROW("An array size was found to be an OpSpecConstantOp. This is not supported since " + "SPIRV-Cross cannot deduce the actual size here."); + + uint32_t array_size = get<SPIRConstant>(array_size_id).scalar(); + return array_size; + } +} + +string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) +{ + assert(type.array.size() == type.array_size_literal.size()); + + // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. + // Opt for unsized as it's the more "correct" variant to use. + if (type.storage == StorageClassInput && (get_entry_point().model == ExecutionModelTessellationControl || + get_entry_point().model == ExecutionModelTessellationEvaluation)) + return ""; + + auto &size = type.array[index]; + if (!type.array_size_literal[index]) + return to_expression(size); + else if (size) + return convert_to_string(size); + else if (!backend.flexible_member_array_supported) + { + // For runtime-sized arrays, we can work around + // lack of standard support for this by simply having + // a single element array. + // + // Runtime length arrays must always be the last element + // in an interface block. + return "1"; + } + else + return ""; +} + +string CompilerGLSL::type_to_array_glsl(const SPIRType &type) +{ + if (type.array.empty()) + return ""; + + if (options.flatten_multidimensional_arrays) + { + string res; + res += "["; + for (auto i = uint32_t(type.array.size()); i; i--) + { + res += enclose_expression(to_array_size(type, i - 1)); + if (i > 1) + res += " * "; + } + res += "]"; + return res; + } + else + { + if (type.array.size() > 1) + { + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " + "Try using --flatten-multidimensional-arrays or set " + "options.flatten_multidimensional_arrays to true."); + } + + string res; + for (auto i = uint32_t(type.array.size()); i; i--) + { + res += "["; + res += to_array_size(type, i - 1); + res += "]"; + } + return res; + } +} + +string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) +{ + auto &imagetype = get<SPIRType>(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int: + res = "i"; + break; + case SPIRType::UInt: + res = "u"; + break; + default: + break; + } + + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); + + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) + { + // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. + if (type.image.dim == DimBuffer && type.image.sampled == 1) + res += "sampler"; + else + res += type.image.sampled == 2 ? "image" : "texture"; + } + else + res += "sampler"; + + switch (type.image.dim) + { + case Dim1D: + res += "1D"; + break; + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "Cube"; + break; + case DimRect: + if (options.es) + SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); + + if (is_legacy_desktop()) + require_extension_internal("GL_ARB_texture_rectangle"); + + res += "2DRect"; + break; + + case DimBuffer: + if (options.es && options.version < 320) + require_extension_internal("GL_OES_texture_buffer"); + else if (!options.es && options.version < 300) + require_extension_internal("GL_EXT_texture_buffer_object"); + res += "Buffer"; + break; + + case DimSubpassData: + res += "2D"; + break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); + } + + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) + { + if (is_legacy_desktop()) + require_extension_internal("GL_EXT_texture_array"); + res += "Array"; + } + + // "Shadow" state in GLSL only exists for samplers and combined image samplers. + if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && + image_is_comparison(type, id)) + { + res += "Shadow"; + } + + return res; +} + +string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) +{ + if (type.array.size() > 1) + { + if (options.flatten_multidimensional_arrays) + SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]()."); + else if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); + } + + auto e = type_to_glsl(type); + for (uint32_t i = 0; i < type.array.size(); i++) + e += "[]"; + return e; +} + +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type, id); + + case SPIRType::Sampler: + // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing + // this distinction into the type system. + return comparison_ids.count(id) ? "samplerShadow" : "sampler"; + + case SPIRType::AccelerationStructureNV: + return "accelerationStructureNV"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (type.basetype == SPIRType::UInt && is_legacy()) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); + + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::SByte: + return backend.basic_int8_type; + case SPIRType::UByte: + return backend.basic_uint8_type; + case SPIRType::Short: + return backend.basic_int16_type; + case SPIRType::UShort: + return backend.basic_uint16_type; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Half: + return "float16_t"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + return "int64_t"; + case SPIRType::UInt64: + return "uint64_t"; + default: + return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bvec", type.vecsize); + case SPIRType::SByte: + return join("i8vec", type.vecsize); + case SPIRType::UByte: + return join("u8vec", type.vecsize); + case SPIRType::Short: + return join("i16vec", type.vecsize); + case SPIRType::UShort: + return join("u16vec", type.vecsize); + case SPIRType::Int: + return join("ivec", type.vecsize); + case SPIRType::UInt: + return join("uvec", type.vecsize); + case SPIRType::Half: + return join("f16vec", type.vecsize); + case SPIRType::Float: + return join("vec", type.vecsize); + case SPIRType::Double: + return join("dvec", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; + } + } + else if (type.vecsize == type.columns) // Simple Matrix builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.vecsize); + case SPIRType::Int: + return join("imat", type.vecsize); + case SPIRType::UInt: + return join("umat", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.vecsize); + case SPIRType::Float: + return join("mat", type.vecsize); + case SPIRType::Double: + return join("dmat", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("imat", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("umat", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("mat", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("dmat", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } +} + +void CompilerGLSL::add_variable(unordered_set<string> &variables_primary, + const unordered_set<string> &variables_secondary, string &name) +{ + if (name.empty()) + return; + + // Reserved for temporaries. + if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) + { + name.clear(); + return; + } + + // Avoid double underscores. + name = sanitize_underscores(name); + + update_name_cache(variables_primary, variables_secondary, name); +} + +void CompilerGLSL::add_local_variable_name(uint32_t id) +{ + add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias); +} + +void CompilerGLSL::add_resource_name(uint32_t id) +{ + add_variable(resource_names, block_names, ir.meta[id].decoration.alias); +} + +void CompilerGLSL::add_header_line(const std::string &line) +{ + header_lines.push_back(line); +} + +bool CompilerGLSL::has_extension(const std::string &ext) const +{ + auto itr = find(begin(forced_extensions), end(forced_extensions), ext); + return itr != end(forced_extensions); +} + +void CompilerGLSL::require_extension(const std::string &ext) +{ + if (!has_extension(ext)) + forced_extensions.push_back(ext); +} + +void CompilerGLSL::require_extension_internal(const string &ext) +{ + if (backend.supports_extensions && !has_extension(ext)) + { + forced_extensions.push_back(ext); + force_recompile = true; + } +} + +void CompilerGLSL::flatten_buffer_block(uint32_t id) +{ + auto &var = get<SPIRVariable>(id); + auto &type = get<SPIRType>(var.basetype); + auto name = to_name(type.self, false); + auto &flags = ir.meta[type.self].decoration.decoration_flags; + + if (!type.array.empty()) + SPIRV_CROSS_THROW(name + " is an array of UBOs."); + if (type.basetype != SPIRType::Struct) + SPIRV_CROSS_THROW(name + " is not a struct."); + if (!flags.get(DecorationBlock)) + SPIRV_CROSS_THROW(name + " is not a block."); + if (type.member_types.empty()) + SPIRV_CROSS_THROW(name + " is an empty struct."); + + flattened_buffer_blocks.insert(id); +} + +bool CompilerGLSL::check_atomic_image(uint32_t id) +{ + auto &type = expression_type(id); + if (type.storage == StorageClassImage) + { + if (options.es && options.version < 320) + require_extension_internal("GL_OES_shader_image_atomic"); + + auto *var = maybe_get_backing_variable(id); + if (var) + { + auto &flags = ir.meta[var->self].decoration.decoration_flags; + if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable)) + { + flags.clear(DecorationNonWritable); + flags.clear(DecorationNonReadable); + force_recompile = true; + } + } + return true; + } + else + return false; +} + +void CompilerGLSL::add_function_overload(const SPIRFunction &func) +{ + Hasher hasher; + for (auto &arg : func.arguments) + { + // Parameters can vary with pointer type or not, + // but that will not change the signature in GLSL/HLSL, + // so strip the pointer type before hashing. + uint32_t type_id = get_pointee_type_id(arg.type); + auto &type = get<SPIRType>(type_id); + + if (!combined_image_samplers.empty()) + { + // If we have combined image samplers, we cannot really trust the image and sampler arguments + // we pass down to callees, because they may be shuffled around. + // Ignore these arguments, to make sure that functions need to differ in some other way + // to be considered different overloads. + if (type.basetype == SPIRType::SampledImage || + (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) + { + continue; + } + } + + hasher.u32(type_id); + } + uint64_t types_hash = hasher.get(); + + auto function_name = to_name(func.self); + auto itr = function_overloads.find(function_name); + if (itr != end(function_overloads)) + { + // There exists a function with this name already. + auto &overloads = itr->second; + if (overloads.count(types_hash) != 0) + { + // Overload conflict, assign a new name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); + } + else + { + // Can reuse the name. + overloads.insert(types_hash); + } + } + else + { + // First time we see this function name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); + } +} + +void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + // Avoid shadow declarations. + local_variable_names = resource_names; + + string decl; + + auto &type = get<SPIRType>(func.return_type); + decl += flags_to_precision_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += type_to_array_glsl(type); + decl += " "; + + if (func.self == ir.default_entry_point) + { + decl += "main"; + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + vector<string> arglist; + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; + + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + var->parameter = &arg; + } + + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + var->parameter = &arg; + } + + decl += merge(arglist); + decl += ")"; + statement(decl); +} + +void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) +{ + // Avoid potential cycles. + if (func.active) + return; + func.active = true; + + // If we depend on a function, emit that function before we emit our own function. + for (auto block : func.blocks) + { + auto &b = get<SPIRBlock>(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast<Op>(i.op); + + if (op == OpFunctionCall) + { + // Recursively emit functions which are called. + uint32_t id = ops[2]; + emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags); + } + } + } + + emit_function_prototype(func, return_flags); + begin_scope(); + + if (func.self == ir.default_entry_point) + emit_entry_point_declarations(); + + current_function = &func; + auto &entry_block = get<SPIRBlock>(func.entry_block); + + sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); + for (auto &array : func.constant_arrays_needed_on_stack) + { + auto &c = get<SPIRConstant>(array); + auto &type = get<SPIRType>(c.constant_type); + statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); + } + + for (auto &v : func.local_variables) + { + auto &var = get<SPIRVariable>(v); + if (var.storage == StorageClassWorkgroup) + { + // Special variable type which cannot have initializer, + // need to be declared as standalone variables. + // Comes from MSL which can push global variables as local variables in main function. + add_local_variable_name(var.self); + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else if (var.storage == StorageClassPrivate) + { + // These variables will not have had their CFG usage analyzed, so move it to the entry block. + // Comes from MSL which can push global variables as local variables in main function. + // We could just declare them right now, but we would miss out on an important initialization case which is + // LUT declaration in MSL. + // If we don't declare the variable when it is assigned we're forced to go through a helper function + // which copies elements one by one. + add_local_variable_name(var.self); + auto &dominated = entry_block.dominated_variables; + if (find(begin(dominated), end(dominated), var.self) == end(dominated)) + entry_block.dominated_variables.push_back(var.self); + var.deferred_declaration = true; + } + else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) + { + // No need to declare this variable, it has a static expression. + var.deferred_declaration = false; + } + else if (expression_is_lvalue(v)) + { + add_local_variable_name(var.self); + + if (var.initializer) + statement(variable_decl_function_local(var), ";"); + else + { + // Don't declare variable until first use to declutter the GLSL output quite a lot. + // If we don't touch the variable before first branch, + // declare it then since we need variable declaration to be in top scope. + var.deferred_declaration = true; + } + } + else + { + // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. + // For these types (non-lvalue), we enforce forwarding through a shadowed variable. + // This means that when we OpStore to these variables, we just write in the expression ID directly. + // This breaks any kind of branching, since the variable must be statically assigned. + // Branching on samplers and images would be pretty much impossible to fake in GLSL. + var.statically_assigned = true; + } + + var.loop_variable_enable = false; + + // Loop variables are never declared outside their for-loop, so block any implicit declaration. + if (var.loop_variable) + var.deferred_declaration = false; + } + + for (auto &line : current_function->fixup_hooks_in) + line(); + + entry_block.loop_dominator = SPIRBlock::NoDominator; + emit_block_chain(entry_block); + + end_scope(); + processing_entry_point = false; + statement(""); +} + +void CompilerGLSL::emit_fixup() +{ + auto &execution = get_entry_point(); + if (execution.model == ExecutionModelVertex) + { + if (options.vertex.fixup_clipspace) + { + const char *suffix = backend.float_literal_suffix ? "f" : ""; + statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); + } + + if (options.vertex.flip_vert_y) + statement("gl_Position.y = -gl_Position.y;"); + } +} + +bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to) +{ + auto &child = get<SPIRBlock>(to); + for (auto &phi : child.phi_variables) + if (phi.parent == from) + return true; + return false; +} + +void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) +{ + auto &child = get<SPIRBlock>(to); + + unordered_set<uint32_t> temporary_phi_variables; + + for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) + { + auto &phi = *itr; + + if (phi.parent == from) + { + auto &var = get<SPIRVariable>(phi.function_variable); + + // A Phi variable might be a loop variable, so flush to static expression. + if (var.loop_variable && !var.loop_variable_enable) + var.static_expression = phi.local_variable; + else + { + flush_variable_declaration(phi.function_variable); + + // Check if we are going to write to a Phi variable that another statement will read from + // as part of another Phi node in our target block. + // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. + // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. + bool need_saved_temporary = + find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { + return future_phi.local_variable == phi.function_variable && future_phi.parent == from; + }) != end(child.phi_variables); + + if (need_saved_temporary) + { + // Need to make sure we declare the phi variable with a copy at the right scope. + // We cannot safely declare a temporary here since we might be inside a continue block. + if (!var.allocate_temporary_copy) + { + var.allocate_temporary_copy = true; + force_recompile = true; + } + statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";"); + temporary_phi_variables.insert(phi.function_variable); + } + + // This might be called in continue block, so make sure we + // use this to emit ESSL 1.0 compliant increments/decrements. + auto lhs = to_expression(phi.function_variable); + + string rhs; + if (temporary_phi_variables.count(phi.local_variable)) + rhs = join("_", phi.local_variable, "_copy"); + else + rhs = to_pointer_expression(phi.local_variable); + + if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + + register_write(phi.function_variable); + } + } +} + +void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) +{ + auto &to_block = get<SPIRBlock>(to); + if (from == to) + return; + + assert(is_continue(to)); + if (to_block.complex_continue) + { + // Just emit the whole block chain as is. + auto usage_counts = expression_usage_counts; + auto invalid = invalid_expressions; + + emit_block_chain(to_block); + + // Expression usage counts and invalid expressions + // are moot after returning from the continue block. + // Since we emit the same block multiple times, + // we don't want to invalidate ourselves. + expression_usage_counts = usage_counts; + invalid_expressions = invalid; + } + else + { + auto &from_block = get<SPIRBlock>(from); + bool outside_control_flow = false; + uint32_t loop_dominator = 0; + + // FIXME: Refactor this to not use the old loop_dominator tracking. + if (from_block.merge_block) + { + // If we are a loop header, we don't set the loop dominator, + // so just use "self" here. + loop_dominator = from; + } + else if (from_block.loop_dominator != SPIRBlock::NoDominator) + { + loop_dominator = from_block.loop_dominator; + } + + if (loop_dominator != 0) + { + auto &dominator = get<SPIRBlock>(loop_dominator); + + // For non-complex continue blocks, we implicitly branch to the continue block + // by having the continue block be part of the loop header in for (; ; continue-block). + outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); + } + + // Some simplification for for-loops. We always end up with a useless continue; + // statement since we branch to a loop block. + // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, + // we can avoid writing out an explicit continue statement. + // Similar optimization to return statements if we know we're outside flow control. + if (!outside_control_flow) + statement("continue;"); + } +} + +void CompilerGLSL::branch(uint32_t from, uint32_t to) +{ + flush_phi(from, to); + flush_control_dependent_expressions(from); + flush_all_active_variables(); + + // This is only a continue if we branch to our loop dominator. + if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to) + { + // This can happen if we had a complex continue block which was emitted. + // Once the continue block tries to branch to the loop header, just emit continue; + // and end the chain here. + statement("continue;"); + } + else if (is_break(to)) + { + // Very dirty workaround. + // Switch constructs are able to break, but they cannot break out of a loop at the same time. + // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, + // write to the ladder here, and defer the break. + // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. + if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u && + get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to) + { + if (!current_emitting_switch->need_ladder_break) + { + force_recompile = true; + current_emitting_switch->need_ladder_break = true; + } + + statement("_", current_emitting_switch->self, "_ladder_break = true;"); + } + statement("break;"); + } + else if (is_continue(to) || (from == to)) + { + // For from == to case can happen for a do-while loop which branches into itself. + // We don't mark these cases as continue blocks, but the only possible way to branch into + // ourselves is through means of continue blocks. + branch_to_continue(from, to); + } + else if (!is_conditional(to)) + emit_block_chain(get<SPIRBlock>(to)); + + // It is important that we check for break before continue. + // A block might serve two purposes, a break block for the inner scope, and + // a continue block in the outer scope. + // Inner scope always takes precedence. +} + +void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) +{ + // If we branch directly to a selection merge target, we don't really need a code path. + bool true_sub = !is_conditional(true_block); + bool false_sub = !is_conditional(false_block); + + if (true_sub) + { + emit_block_hints(get<SPIRBlock>(from)); + statement("if (", to_expression(cond), ")"); + begin_scope(); + branch(from, true_block); + end_scope(); + + if (false_sub || is_continue(false_block) || is_break(false_block)) + { + statement("else"); + begin_scope(); + branch(from, false_block); + end_scope(); + } + else if (flush_phi_required(from, false_block)) + { + statement("else"); + begin_scope(); + flush_phi(from, false_block); + end_scope(); + } + } + else if (false_sub && !true_sub) + { + // Only need false path, use negative conditional. + emit_block_hints(get<SPIRBlock>(from)); + statement("if (!", to_enclosed_expression(cond), ")"); + begin_scope(); + branch(from, false_block); + end_scope(); + + if (is_continue(true_block) || is_break(true_block)) + { + statement("else"); + begin_scope(); + branch(from, true_block); + end_scope(); + } + else if (flush_phi_required(from, true_block)) + { + statement("else"); + begin_scope(); + flush_phi(from, true_block); + end_scope(); + } + } +} + +void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block) +{ + // Propagate down the loop dominator block, so that dominated blocks can back trace. + if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator) + { + uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator; + + auto set_dominator = [this](uint32_t self, uint32_t new_dominator) { + auto &dominated_block = this->get<SPIRBlock>(self); + + // If we already have a loop dominator, we're trying to break out to merge targets + // which should not update the loop dominator. + if (!dominated_block.loop_dominator) + dominated_block.loop_dominator = new_dominator; + }; + + // After merging a loop, we inherit the loop dominator always. + if (block.merge_block) + set_dominator(block.merge_block, block.loop_dominator); + + if (block.true_block) + set_dominator(block.true_block, dominator); + if (block.false_block) + set_dominator(block.false_block, dominator); + if (block.next_block) + set_dominator(block.next_block, dominator); + if (block.default_block) + set_dominator(block.default_block, dominator); + + for (auto &c : block.cases) + set_dominator(c.block, dominator); + + // In older glslang output continue_block can be == loop header. + if (block.continue_block && block.continue_block != block.self) + set_dominator(block.continue_block, dominator); + } +} + +// FIXME: This currently cannot handle complex continue blocks +// as in do-while. +// This should be seen as a "trivial" continue block. +string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) +{ + auto *block = &get<SPIRBlock>(continue_block); + + // While emitting the continue block, declare_temporary will check this + // if we have to emit temporaries. + current_continue_block = block; + + vector<string> statements; + + // Capture all statements into our list. + auto *old = redirect_statement; + redirect_statement = &statements; + + // Stamp out all blocks one after each other. + while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) + { + propagate_loop_dominators(*block); + // Write out all instructions we have in this block. + emit_block_instructions(*block); + + // For plain branchless for/while continue blocks. + if (block->next_block) + { + flush_phi(continue_block, block->next_block); + block = &get<SPIRBlock>(block->next_block); + } + // For do while blocks. The last block will be a select block. + else if (block->true_block && follow_true_block) + { + flush_phi(continue_block, block->true_block); + block = &get<SPIRBlock>(block->true_block); + } + else if (block->false_block && follow_false_block) + { + flush_phi(continue_block, block->false_block); + block = &get<SPIRBlock>(block->false_block); + } + else + { + SPIRV_CROSS_THROW("Invalid continue block detected!"); + } + } + + // Restore old pointer. + redirect_statement = old; + + // Somewhat ugly, strip off the last ';' since we use ',' instead. + // Ideally, we should select this behavior in statement(). + for (auto &s : statements) + { + if (!s.empty() && s.back() == ';') + s.erase(s.size() - 1, 1); + } + + current_continue_block = nullptr; + return merge(statements); +} + +void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) +{ + // While loops do not take initializers, so declare all of them outside. + for (auto &loop_var : block.loop_variables) + { + auto &var = get<SPIRVariable>(loop_var); + statement(variable_decl(var), ";"); + } +} + +string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) +{ + if (block.loop_variables.empty()) + return ""; + + bool same_types = for_loop_initializers_are_same_type(block); + // We can only declare for loop initializers if all variables are of same type. + // If we cannot do this, declare individual variables before the loop header. + + // We might have a loop variable candidate which was not assigned to for some reason. + uint32_t missing_initializers = 0; + for (auto &variable : block.loop_variables) + { + uint32_t expr = get<SPIRVariable>(variable).static_expression; + + // Sometimes loop variables are initialized with OpUndef, but we can just declare + // a plain variable without initializer in this case. + if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) + missing_initializers++; + } + + if (block.loop_variables.size() == 1 && missing_initializers == 0) + { + return variable_decl(get<SPIRVariable>(block.loop_variables.front())); + } + else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) + { + for (auto &loop_var : block.loop_variables) + statement(variable_decl(get<SPIRVariable>(loop_var)), ";"); + return ""; + } + else + { + // We have a mix of loop variables, either ones with a clear initializer, or ones without. + // Separate the two streams. + string expr; + + for (auto &loop_var : block.loop_variables) + { + uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression; + if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) + { + statement(variable_decl(get<SPIRVariable>(loop_var)), ";"); + } + else + { + auto &var = get<SPIRVariable>(loop_var); + auto &type = get_variable_data_type(var); + if (expr.empty()) + { + // For loop initializers are of the form <type id = value, id = value, id = value, etc ... + expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " "); + } + else + { + expr += ", "; + // In MSL, being based on C++, the asterisk marking a pointer + // binds to the identifier, not the type. + if (type.pointer) + expr += "* "; + } + + expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression)); + } + } + return expr; + } +} + +bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block) +{ + if (block.loop_variables.size() <= 1) + return true; + + uint32_t expected = 0; + Bitset expected_flags; + for (auto &var : block.loop_variables) + { + // Don't care about uninitialized variables as they will not be part of the initializers. + uint32_t expr = get<SPIRVariable>(var).static_expression; + if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) + continue; + + if (expected == 0) + { + expected = get<SPIRVariable>(var).basetype; + expected_flags = get_decoration_bitset(var); + } + else if (expected != get<SPIRVariable>(var).basetype) + return false; + + // Precision flags and things like that must also match. + if (expected_flags != get_decoration_bitset(var)) + return false; + } + + return true; +} + +bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) +{ + SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block)); + + if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) + { + uint32_t current_count = statement_count; + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + emit_block_instructions(block); + + bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); + + // This can work! We only did trivial things which could be forwarded in block body! + if (current_count == statement_count && condition_is_temporary) + { + switch (continue_type) + { + case SPIRBlock::ForLoop: + { + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(block); + + // Important that we do this in this order because + // emitting the continue block can invalidate the condition expression. + auto initializer = emit_for_loop_initializers(block); + auto condition = to_expression(block.condition); + + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + condition = join("!", enclose_expression(condition)); + + emit_block_hints(block); + if (method != SPIRBlock::MergeToSelectContinueForLoop) + { + auto continue_block = emit_continue_block(block.continue_block, false, false); + statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); + } + else + statement("for (", initializer, "; ", condition, "; )"); + break; + } + + case SPIRBlock::WhileLoop: + { + // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header. + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emit_block_hints(block); + + auto condition = to_expression(block.condition); + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + condition = join("!", enclose_expression(condition)); + + statement("while (", condition, ")"); + break; + } + + default: + SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + } + + begin_scope(); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile = true; + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + auto &child = get<SPIRBlock>(block.next_block); + + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(child); + + uint32_t current_count = statement_count; + + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + emit_block_instructions(child); + + bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); + + if (current_count == statement_count && condition_is_temporary) + { + propagate_loop_dominators(child); + uint32_t target_block = child.true_block; + + switch (continue_type) + { + case SPIRBlock::ForLoop: + { + // Important that we do this in this order because + // emitting the continue block can invalidate the condition expression. + auto initializer = emit_for_loop_initializers(block); + auto condition = to_expression(child.condition); + + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } + + auto continue_block = emit_continue_block(block.continue_block, false, false); + emit_block_hints(block); + statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); + break; + } + + case SPIRBlock::WhileLoop: + { + emit_while_loop_initializers(block); + emit_block_hints(block); + + auto condition = to_expression(child.condition); + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } + + statement("while (", condition, ")"); + break; + } + + default: + SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); + } + + begin_scope(); + branch(child.self, target_block); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile = true; + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else + return false; +} + +void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) +{ + // Enforce declaration order for regression testing purposes. + sort(begin(block.dominated_variables), end(block.dominated_variables)); + for (auto &v : block.dominated_variables) + flush_variable_declaration(v); +} + +void CompilerGLSL::emit_hoisted_temporaries(vector<pair<uint32_t, uint32_t>> &temporaries) +{ + // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. + // Need to sort these to ensure that reference output is stable. + sort(begin(temporaries), end(temporaries), + [](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; }); + + for (auto &tmp : temporaries) + { + add_local_variable_name(tmp.second); + auto &flags = ir.meta[tmp.second].decoration.decoration_flags; + auto &type = get<SPIRType>(tmp.first); + statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";"); + + hoisted_temporaries.insert(tmp.second); + forced_temporaries.insert(tmp.second); + + // The temporary might be read from before it's assigned, set up the expression now. + set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true); + } +} + +void CompilerGLSL::emit_block_chain(SPIRBlock &block) +{ + propagate_loop_dominators(block); + + bool select_branch_to_true_block = false; + bool select_branch_to_false_block = false; + bool skip_direct_branch = false; + bool emitted_loop_header_variables = false; + bool force_complex_continue_block = false; + + emit_hoisted_temporaries(block.declare_temporary); + + SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; + if (block.continue_block) + continue_type = continue_block_type(get<SPIRBlock>(block.continue_block)); + + // If we have loop variables, stop masking out access to the variable now. + for (auto var : block.loop_variables) + get<SPIRVariable>(var).loop_variable_enable = true; + + // This is the method often used by spirv-opt to implement loops. + // The loop header goes straight into the continue block. + // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, + // it *MUST* be used in the continue block. This loop method will not work. + if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop)) + { + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + force_complex_continue_block = true; + } + } + // This is the older loop behavior in glslang which branches to loop body directly from the loop header. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) + { + // The body of while, is actually just the true (or false) block, so always branch there unconditionally. + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + } + } + // This is the newer loop behavior in glslang which branches from Loop header directly to + // a new block, which in turn has a OpBranchSelection without a selection merge. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) + { + skip_direct_branch = true; + emitted_loop_header_variables = true; + } + } + else if (continue_type == SPIRBlock::DoWhileLoop) + { + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + statement("do"); + begin_scope(); + + emit_block_instructions(block); + } + else if (block.merge == SPIRBlock::MergeLoop) + { + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; + + // We have a generic loop without any distinguishable pattern like for, while or do while. + get<SPIRBlock>(block.continue_block).complex_continue = true; + continue_type = SPIRBlock::ComplexLoop; + + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + statement("for (;;)"); + begin_scope(); + + emit_block_instructions(block); + } + else + { + emit_block_instructions(block); + } + + // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem + // as writes to said loop variables might have been masked out, we need a recompile. + if (!emitted_loop_header_variables && !block.loop_variables.empty()) + { + force_recompile = true; + for (auto var : block.loop_variables) + get<SPIRVariable>(var).loop_variable = false; + block.loop_variables.clear(); + } + + flush_undeclared_variables(block); + bool emit_next_block = true; + + // Handle end of block. + switch (block.terminator) + { + case SPIRBlock::Direct: + // True when emitting complex continue block. + if (block.loop_dominator == block.next_block) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + // True if MergeToDirectForLoop succeeded. + else if (skip_direct_branch) + emit_next_block = false; + else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + break; + + case SPIRBlock::Select: + // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. + if (select_branch_to_true_block) + { + if (force_complex_continue_block) + { + assert(block.true_block == block.continue_block); + + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.true_block); + complex_continue = old_complex; + } + else + branch(block.self, block.true_block); + } + else if (select_branch_to_false_block) + { + if (force_complex_continue_block) + { + assert(block.false_block == block.continue_block); + + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.false_block); + complex_continue = old_complex; + } + else + branch(block.self, block.false_block); + } + else + branch(block.self, block.condition, block.true_block, block.false_block); + break; + + case SPIRBlock::MultiSelect: + { + auto &type = expression_type(block.condition); + bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort; + + if (block.merge == SPIRBlock::MergeNone) + SPIRV_CROSS_THROW("Switch statement is not structured"); + + if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) + { + // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. + SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); + } + + const char *label_suffix = ""; + if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) + label_suffix = "u"; + else if (type.basetype == SPIRType::UShort) + label_suffix = backend.uint16_t_literal_suffix; + else if (type.basetype == SPIRType::Short) + label_suffix = backend.int16_t_literal_suffix; + + SPIRBlock *old_emitting_switch = current_emitting_switch; + current_emitting_switch = █ + + if (block.need_ladder_break) + statement("bool _", block.self, "_ladder_break = false;"); + + emit_block_hints(block); + statement("switch (", to_expression(block.condition), ")"); + begin_scope(); + + // Multiple case labels can branch to same block, so find all unique blocks. + bool emitted_default = false; + unordered_set<uint32_t> emitted_blocks; + + for (auto &c : block.cases) + { + if (emitted_blocks.count(c.block) != 0) + continue; + + // Emit all case labels which branch to our target. + // FIXME: O(n^2), revisit if we hit shaders with 100++ case labels ... + for (auto &other_case : block.cases) + { + if (other_case.block == c.block) + { + // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. + auto case_value = unsigned_case ? convert_to_string(uint32_t(other_case.value)) : + convert_to_string(int32_t(other_case.value)); + statement("case ", case_value, label_suffix, ":"); + } + } + + // Maybe we share with default block? + if (block.default_block == c.block) + { + statement("default:"); + emitted_default = true; + } + + // Complete the target. + emitted_blocks.insert(c.block); + + begin_scope(); + branch(block.self, c.block); + end_scope(); + } + + if (!emitted_default) + { + if (block.default_block != block.next_block) + { + statement("default:"); + begin_scope(); + if (is_break(block.default_block)) + SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ..."); + branch(block.self, block.default_block); + end_scope(); + } + else if (flush_phi_required(block.self, block.next_block)) + { + statement("default:"); + begin_scope(); + flush_phi(block.self, block.next_block); + statement("break;"); + end_scope(); + } + } + + end_scope(); + + if (block.need_ladder_break) + { + statement("if (_", block.self, "_ladder_break)"); + begin_scope(); + statement("break;"); + end_scope(); + } + + current_emitting_switch = old_emitting_switch; + break; + } + + case SPIRBlock::Return: + for (auto &line : current_function->fixup_hooks_out) + line(); + + if (processing_entry_point) + emit_fixup(); + + if (block.return_value) + { + auto &type = expression_type(block.return_value); + if (!type.array.empty() && !backend.can_return_array) + { + // If we cannot return arrays, we will have a special out argument we can write to instead. + // The backend is responsible for setting this up, and redirection the return values as appropriate. + if (ir.ids[block.return_value].get_type() != TypeUndef) + emit_array_copy("SPIRV_Cross_return_value", block.return_value); + + if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) || + block.loop_dominator != SPIRBlock::NoDominator) + { + statement("return;"); + } + } + else + { + // OpReturnValue can return Undef, so don't emit anything for this case. + if (ir.ids[block.return_value].get_type() != TypeUndef) + statement("return ", to_expression(block.return_value), ";"); + } + } + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... + else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) || + block.loop_dominator != SPIRBlock::NoDominator) + { + statement("return;"); + } + break; + + case SPIRBlock::Kill: + statement(backend.discard_literal, ";"); + break; + + case SPIRBlock::Unreachable: + emit_next_block = false; + break; + + default: + SPIRV_CROSS_THROW("Unimplemented block terminator."); + } + + if (block.next_block && emit_next_block) + { + // If we hit this case, we're dealing with an unconditional branch, which means we will output + // that block after this. If we had selection merge, we already flushed phi variables. + if (block.merge != SPIRBlock::MergeSelection) + flush_phi(block.self, block.next_block); + + // For merge selects we might have ignored the fact that a merge target + // could have been a break; or continue; + // We will need to deal with it here. + if (is_loop_break(block.next_block)) + { + // Cannot check for just break, because switch statements will also use break. + assert(block.merge == SPIRBlock::MergeSelection); + statement("break;"); + } + else if (is_continue(block.next_block)) + { + assert(block.merge == SPIRBlock::MergeSelection); + branch_to_continue(block.self, block.next_block); + } + else + emit_block_chain(get<SPIRBlock>(block.next_block)); + } + + if (block.merge == SPIRBlock::MergeLoop) + { + if (continue_type == SPIRBlock::DoWhileLoop) + { + // Make sure that we run the continue block to get the expressions set, but this + // should become an empty string. + // We have no fallbacks if we cannot forward everything to temporaries ... + const auto &continue_block = get<SPIRBlock>(block.continue_block); + bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block), + get<SPIRBlock>(continue_block.loop_dominator)); + + auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test); + if (!statements.empty()) + { + // The DoWhile block has side effects, force ComplexLoop pattern next pass. + get<SPIRBlock>(block.continue_block).complex_continue = true; + force_recompile = true; + } + + // Might have to invert the do-while test here. + auto condition = to_expression(continue_block.condition); + if (!positive_test) + condition = join("!", enclose_expression(condition)); + + end_scope_decl(join("while (", condition, ")")); + } + else + end_scope(); + + // We cannot break out of two loops at once, so don't check for break; here. + // Using block.self as the "from" block isn't quite right, but it has the same scope + // and dominance structure, so it's fine. + if (is_continue(block.merge_block)) + branch_to_continue(block.self, block.merge_block); + else + emit_block_chain(get<SPIRBlock>(block.merge_block)); + } + + // Forget about control dependent expressions now. + block.invalidate_expressions.clear(); +} + +void CompilerGLSL::begin_scope() +{ + statement("{"); + indent++; +} + +void CompilerGLSL::end_scope() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}"); +} + +void CompilerGLSL::end_scope_decl() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("};"); +} + +void CompilerGLSL::end_scope_decl(const string &decl) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("} ", decl, ";"); +} + +void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) +{ + // If our variable is remapped, and we rely on type-remapping information as + // well, then we cannot pass the variable as a function parameter. + // Fixing this is non-trivial without stamping out variants of the same function, + // so for now warn about this and suggest workarounds instead. + for (uint32_t i = 0; i < length; i++) + { + auto *var = maybe_get<SPIRVariable>(args[i]); + if (!var || !var->remapped_variable) + continue; + + auto &type = get<SPIRType>(var->basetype); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + { + SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " + "This will not work correctly because type-remapping information is lost. " + "To workaround, please consider not passing the subpass input as a function parameter, " + "or use in/out variables instead which do not need type remapping information."); + } + } +} + +const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) +{ + // FIXME: This is kind of hacky. There should be a cleaner way. + auto offset = uint32_t(&instr - current_emitting_block->ops.data()); + if ((offset + 1) < current_emitting_block->ops.size()) + return ¤t_emitting_block->ops[offset + 1]; + else + return nullptr; +} + +uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) +{ + return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); +} + +void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id) +{ + statement(lhs, " = ", to_expression(rhs_id), ";"); +} + +void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) +{ + if (!backend.force_gl_in_out_block) + return; + // This path is only relevant for GL backends. + + auto *var = maybe_get<SPIRVariable>(source_id); + if (!var) + return; + + if (var->storage != StorageClassInput) + return; + + auto &type = get_variable_data_type(*var); + if (type.array.empty()) + return; + + auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition); + bool is_tess = is_tessellation_shader(); + + // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. + // We must unroll the array load. + // For builtins, we couldn't catch this case normally, + // because this is resolved in the OpAccessChain in most cases. + // If we load the entire array, we have no choice but to unroll here. + if (is_builtin || is_tess) + { + auto new_expr = join("_", target_id, "_unrolled"); + statement(variable_decl(type, new_expr, target_id), ";"); + string array_expr; + if (type.array_size_literal.front()) + { + array_expr = convert_to_string(type.array.front()); + if (type.array.front() == 0) + SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); + } + else + array_expr = to_expression(type.array.front()); + + // The array size might be a specialization constant, so use a for-loop instead. + statement("for (int i = 0; i < int(", array_expr, "); i++)"); + begin_scope(); + if (is_builtin) + statement(new_expr, "[i] = gl_in[i].", expr, ";"); + else + statement(new_expr, "[i] = ", expr, "[i];"); + end_scope(); + + expr = move(new_expr); + } +} + +void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, + const spirv_cross::SPIRType &expr_type) +{ + auto *var = maybe_get_backing_variable(source_id); + if (var) + source_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(source_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + + // TODO: Fill in for more builtins. + switch (builtin) + { + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInSampleId: + case BuiltInBaseVertex: + case BuiltInBaseInstance: + case BuiltInDrawIndex: + expected_type = SPIRType::Int; + break; + + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + case BuiltInLocalInvocationIndex: + case BuiltInWorkgroupSize: + case BuiltInNumWorkgroups: + expected_type = SPIRType::UInt; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + expr = bitcast_expression(expr_type, expected_type, expr); +} + +void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, + const spirv_cross::SPIRType &expr_type) +{ + // Only interested in standalone builtin variables. + if (!has_decoration(target_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + + // TODO: Fill in for more builtins. + switch (builtin) + { + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + expected_type = SPIRType::Int; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + { + auto type = expr_type; + type.basetype = expected_type; + expr = bitcast_expression(type, expr_type.basetype, expr); + } +} + +void CompilerGLSL::emit_block_hints(const SPIRBlock &) +{ +} + +void CompilerGLSL::preserve_alias_on_reset(uint32_t id) +{ + preserved_aliases[id] = get_name(id); +} + +void CompilerGLSL::reset_name_caches() +{ + for (auto &preserved : preserved_aliases) + set_name(preserved.first, preserved.second); + + preserved_aliases.clear(); + resource_names.clear(); + block_input_names.clear(); + block_output_names.clear(); + block_ubo_names.clear(); + block_ssbo_names.clear(); + block_names.clear(); + function_overloads.clear(); +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp new file mode 100644 index 0000000..33e3547 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp @@ -0,0 +1,655 @@ +/* + * Copyright 2015-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_GLSL_HPP +#define SPIRV_CROSS_GLSL_HPP + +#include "spirv_cross.hpp" +#include "GLSL.std.450.h" +#include <sstream> +#include <unordered_map> +#include <unordered_set> +#include <utility> + +namespace spirv_cross +{ +enum PlsFormat +{ + PlsNone = 0, + + PlsR11FG11FB10F, + PlsR32F, + PlsRG16F, + PlsRGB10A2, + PlsRGBA8, + PlsRG16, + + PlsRGBA8I, + PlsRG16I, + + PlsRGB10A2UI, + PlsRGBA8UI, + PlsRG16UI, + PlsR32UI +}; + +struct PlsRemap +{ + uint32_t id; + PlsFormat format; +}; + +enum AccessChainFlagBits +{ + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT = 1 << 0, + ACCESS_CHAIN_CHAIN_ONLY_BIT = 1 << 1, + ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2, + ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3 +}; +typedef uint32_t AccessChainFlags; + +class CompilerGLSL : public Compiler +{ +public: + struct Options + { + // The shading language version. Corresponds to #version $VALUE. + uint32_t version = 450; + + // Emit the OpenGL ES shading language instead of desktop OpenGL. + bool es = false; + + // Debug option to always emit temporary variables for all expressions. + bool force_temporary = false; + + // If true, Vulkan GLSL features are used instead of GL-compatible features. + // Mostly useful for debugging SPIR-V files. + bool vulkan_semantics = false; + + // If true, gl_PerVertex is explicitly redeclared in vertex, geometry and tessellation shaders. + // The members of gl_PerVertex is determined by which built-ins are declared by the shader. + // This option is ignored in ES versions, as redeclaration in ES is not required, and it depends on a different extension + // (EXT_shader_io_blocks) which makes things a bit more fuzzy. + bool separate_shader_objects = false; + + // Flattens multidimensional arrays, e.g. float foo[a][b][c] into single-dimensional arrays, + // e.g. float foo[a * b * c]. + // This function does not change the actual SPIRType of any object. + // Only the generated code, including declarations of interface variables are changed to be single array dimension. + bool flatten_multidimensional_arrays = false; + + // For older desktop GLSL targets than version 420, the + // GL_ARB_shading_language_420pack extensions is used to be able to support + // layout(binding) on UBOs and samplers. + // If disabled on older targets, binding decorations will be stripped. + bool enable_420pack_extension = true; + + // In non-Vulkan GLSL, emit push constant blocks as UBOs rather than plain uniforms. + bool emit_push_constant_as_uniform_buffer = false; + + enum Precision + { + DontCare, + Lowp, + Mediump, + Highp + }; + + struct + { + // GLSL: In vertex shaders, rewrite [0, w] depth (Vulkan/D3D style) to [-w, w] depth (GL style). + // MSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + // HLSL: In vertex shaders, rewrite [-w, w] depth (GL style) to [0, w] depth. + bool fixup_clipspace = false; + + // Inverts gl_Position.y or equivalent. + bool flip_vert_y = false; + + // GLSL only, for HLSL version of this option, see CompilerHLSL. + // If true, the backend will assume that InstanceIndex will need to apply + // a base instance offset. Set to false if you know you will never use base instance + // functionality as it might remove some internal uniforms. + bool support_nonzero_base_instance = true; + } vertex; + + struct + { + // Add precision mediump float in ES targets when emitting GLES source. + // Add precision highp int in ES targets when emitting GLES source. + Precision default_float_precision = Mediump; + Precision default_int_precision = Highp; + } fragment; + }; + + void remap_pixel_local_storage(std::vector<PlsRemap> inputs, std::vector<PlsRemap> outputs) + { + pls_inputs = std::move(inputs); + pls_outputs = std::move(outputs); + remap_pls_variables(); + } + + explicit CompilerGLSL(std::vector<uint32_t> spirv_) + : Compiler(move(spirv_)) + { + init(); + } + + CompilerGLSL(const uint32_t *ir_, size_t word_count) + : Compiler(ir_, word_count) + { + init(); + } + + explicit CompilerGLSL(const ParsedIR &ir_) + : Compiler(ir_) + { + init(); + } + + explicit CompilerGLSL(ParsedIR &&ir_) + : Compiler(std::move(ir_)) + { + init(); + } + + const Options &get_common_options() const + { + return options; + } + + void set_common_options(const Options &opts) + { + options = opts; + } + + std::string compile() override; + + // Returns the current string held in the conversion buffer. Useful for + // capturing what has been converted so far when compile() throws an error. + std::string get_partial_source(); + + // Adds a line to be added right after #version in GLSL backend. + // This is useful for enabling custom extensions which are outside the scope of SPIRV-Cross. + // This can be combined with variable remapping. + // A new-line will be added. + // + // While add_header_line() is a more generic way of adding arbitrary text to the header + // of a GLSL file, require_extension() should be used when adding extensions since it will + // avoid creating collisions with SPIRV-Cross generated extensions. + // + // Code added via add_header_line() is typically backend-specific. + void add_header_line(const std::string &str); + + // Adds an extension which is required to run this shader, e.g. + // require_extension("GL_KHR_my_extension"); + void require_extension(const std::string &ext); + + // Legacy GLSL compatibility method. + // Takes a uniform or push constant variable and flattens it into a (i|u)vec4 array[N]; array instead. + // For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but + // mixing int and float is not. + // The name of the uniform array will be the same as the interface block name. + void flatten_buffer_block(uint32_t id); + +protected: + void reset(); + void emit_function(SPIRFunction &func, const Bitset &return_flags); + + bool has_extension(const std::string &ext) const; + void require_extension_internal(const std::string &ext); + + // Virtualize methods which need to be overridden by subclass targets like C++ and such. + virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags); + + SPIRBlock *current_emitting_block = nullptr; + SPIRBlock *current_emitting_switch = nullptr; + + virtual void emit_instruction(const Instruction &instr); + void emit_block_instructions(SPIRBlock &block); + virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count); + virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count); + virtual void emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count); + virtual void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, + const uint32_t *args, uint32_t count); + virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count); + virtual void emit_header(); + void build_workgroup_size(std::vector<std::string> &arguments, const SpecializationConstant &x, + const SpecializationConstant &y, const SpecializationConstant &z); + + virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); + virtual void emit_texture_op(const Instruction &i); + virtual void emit_subgroup_op(const Instruction &i); + virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0); + virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage); + virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier = "", uint32_t base_offset = 0); + virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0); + std::string constant_expression(const SPIRConstant &c); + std::string constant_op_expression(const SPIRConstantOp &cop); + virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); + virtual void emit_fixup(); + virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); + virtual std::string to_func_call_arg(uint32_t id); + virtual std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, + bool has_dref, uint32_t lod); + virtual std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, + uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, + uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, + bool *p_forward); + virtual void emit_buffer_block(const SPIRVariable &type); + virtual void emit_push_constant_block(const SPIRVariable &var); + virtual void emit_uniform(const SPIRVariable &var); + virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id); + + std::unique_ptr<std::ostringstream> buffer; + + template <typename T> + inline void statement_inner(T &&t) + { + (*buffer) << std::forward<T>(t); + statement_count++; + } + + template <typename T, typename... Ts> + inline void statement_inner(T &&t, Ts &&... ts) + { + (*buffer) << std::forward<T>(t); + statement_count++; + statement_inner(std::forward<Ts>(ts)...); + } + + template <typename... Ts> + inline void statement(Ts &&... ts) + { + if (force_recompile) + { + // Do not bother emitting code while force_recompile is active. + // We will compile again. + statement_count++; + return; + } + + if (redirect_statement) + redirect_statement->push_back(join(std::forward<Ts>(ts)...)); + else + { + for (uint32_t i = 0; i < indent; i++) + (*buffer) << " "; + statement_inner(std::forward<Ts>(ts)...); + (*buffer) << '\n'; + } + } + + template <typename... Ts> + inline void statement_no_indent(Ts &&... ts) + { + auto old_indent = indent; + indent = 0; + statement(std::forward<Ts>(ts)...); + indent = old_indent; + } + + // Used for implementing continue blocks where + // we want to obtain a list of statements we can merge + // on a single line separated by comma. + std::vector<std::string> *redirect_statement = nullptr; + const SPIRBlock *current_continue_block = nullptr; + + void begin_scope(); + void end_scope(); + void end_scope_decl(); + void end_scope_decl(const std::string &decl); + + Options options; + + std::string type_to_array_glsl(const SPIRType &type); + std::string to_array_size(const SPIRType &type, uint32_t index); + uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const; + uint32_t to_array_size_literal(const SPIRType &type) const; + std::string variable_decl(const SPIRVariable &variable); + std::string variable_decl_function_local(SPIRVariable &variable); + + void add_local_variable_name(uint32_t id); + void add_resource_name(uint32_t id); + void add_member_name(SPIRType &type, uint32_t name); + void add_function_overload(const SPIRFunction &func); + + virtual bool is_non_native_row_major_matrix(uint32_t id); + virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); + bool member_is_packed_type(const SPIRType &type, uint32_t index) const; + virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed); + + std::unordered_set<std::string> local_variable_names; + std::unordered_set<std::string> resource_names; + std::unordered_set<std::string> block_input_names; + std::unordered_set<std::string> block_output_names; + std::unordered_set<std::string> block_ubo_names; + std::unordered_set<std::string> block_ssbo_names; + std::unordered_set<std::string> block_names; // A union of all block_*_names. + std::unordered_map<std::string, std::unordered_set<uint64_t>> function_overloads; + std::unordered_map<uint32_t, std::string> preserved_aliases; + void preserve_alias_on_reset(uint32_t id); + void reset_name_caches(); + + bool processing_entry_point = false; + + // Can be overriden by subclass backends for trivial things which + // shouldn't need polymorphism. + struct BackendVariations + { + std::string discard_literal = "discard"; + std::string null_pointer_literal = ""; + bool float_literal_suffix = false; + bool double_literal_suffix = true; + bool uint32_t_literal_suffix = true; + bool long_long_literal_suffix = false; + const char *basic_int_type = "int"; + const char *basic_uint_type = "uint"; + const char *basic_int8_type = "int8_t"; + const char *basic_uint8_type = "uint8_t"; + const char *basic_int16_type = "int16_t"; + const char *basic_uint16_type = "uint16_t"; + const char *int16_t_literal_suffix = "s"; + const char *uint16_t_literal_suffix = "us"; + bool swizzle_is_function = false; + bool shared_is_implied = false; + bool flexible_member_array_supported = true; + bool explicit_struct_type = false; + bool use_initializer_list = false; + bool use_typed_initializer_list = false; + bool can_declare_struct_inline = true; + bool can_declare_arrays_inline = true; + bool native_row_major_matrix = true; + bool use_constructor_splatting = true; + bool boolean_mix_support = true; + bool allow_precision_qualifiers = false; + bool can_swizzle_scalar = false; + bool force_gl_in_out_block = false; + bool can_return_array = true; + bool allow_truncated_access_chain = false; + bool supports_extensions = false; + bool supports_empty_struct = false; + bool array_is_value_type = true; + bool comparison_image_samples_scalar = false; + } backend; + + void emit_struct(SPIRType &type); + void emit_resources(); + void emit_buffer_block_native(const SPIRVariable &var); + void emit_buffer_block_legacy(const SPIRVariable &var); + void emit_buffer_block_flattened(const SPIRVariable &type); + void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model); + void emit_push_constant_block_vulkan(const SPIRVariable &var); + void emit_push_constant_block_glsl(const SPIRVariable &var); + void emit_interface_block(const SPIRVariable &type); + void emit_flattened_io_block(const SPIRVariable &var, const char *qual); + void emit_block_chain(SPIRBlock &block); + void emit_hoisted_temporaries(std::vector<std::pair<uint32_t, uint32_t>> &temporaries); + std::string constant_value_macro_name(uint32_t id); + void emit_constant(const SPIRConstant &constant); + void emit_specialization_constant_op(const SPIRConstantOp &constant); + std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block); + bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); + void propagate_loop_dominators(const SPIRBlock &block); + + void branch(uint32_t from, uint32_t to); + void branch_to_continue(uint32_t from, uint32_t to); + void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block); + void flush_phi(uint32_t from, uint32_t to); + bool flush_phi_required(uint32_t from, uint32_t to); + void flush_variable_declaration(uint32_t id); + void flush_undeclared_variables(SPIRBlock &block); + + bool should_dereference(uint32_t id); + bool should_forward(uint32_t id); + void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); + void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op); + bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp); + void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + uint32_t op3, const char *op); + void emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + const char *op); + void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + + void emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, + SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type); + void emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + void emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op, + SPIRType::BaseType input_type); + + void emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + void emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op); + void emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, + SPIRType::BaseType input_type, bool skip_cast_if_equal_type); + + SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type); + + std::string to_ternary_expression(const SPIRType &result_type, uint32_t select, uint32_t true_value, + uint32_t false_value); + + void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); + bool expression_is_forwarded(uint32_t id); + SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, + bool suppress_usage_tracking = false); + + std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, + AccessChainMeta *meta); + + std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, + AccessChainMeta *meta = nullptr, bool ptr_chain = false); + + std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + bool need_transpose); + std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset); + std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + bool need_transpose); + std::string flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + bool need_transpose); + std::pair<std::string, uint32_t> flattened_access_chain_offset(const SPIRType &basetype, const uint32_t *indices, + uint32_t count, uint32_t offset, + uint32_t word_stride, bool *need_transpose = nullptr, + uint32_t *matrix_stride = nullptr, + bool ptr_chain = false); + + const char *index_to_swizzle(uint32_t index); + std::string remap_swizzle(const SPIRType &result_type, uint32_t input_components, const std::string &expr); + std::string declare_temporary(uint32_t type, uint32_t id); + void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector<std::string> &arglist); + std::string to_expression(uint32_t id, bool register_expression_read = true); + std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true); + std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true); + std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true); + std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true); + std::string to_pointer_expression(uint32_t id, bool register_expression_read = true); + std::string to_enclosed_pointer_expression(uint32_t id, bool register_expression_read = true); + std::string to_extract_component_expression(uint32_t id, uint32_t index); + std::string enclose_expression(const std::string &expr); + std::string dereference_expression(const std::string &expr); + std::string address_of_expression(const std::string &expr); + void strip_enclosed_expression(std::string &expr); + std::string to_member_name(const SPIRType &type, uint32_t index); + virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain); + std::string type_to_glsl_constructor(const SPIRType &type); + std::string argument_decl(const SPIRFunction::Parameter &arg); + virtual std::string to_qualifiers_glsl(uint32_t id); + const char *to_precision_qualifiers_glsl(uint32_t id); + virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var); + const char *flags_to_precision_qualifiers_glsl(const SPIRType &type, const Bitset &flags); + const char *format_to_glsl(spv::ImageFormat format); + virtual std::string layout_for_member(const SPIRType &type, uint32_t index); + virtual std::string to_interpolation_qualifiers(const Bitset &flags); + std::string layout_for_variable(const SPIRVariable &variable); + std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id); + virtual bool skip_argument(uint32_t id) const; + virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id); + virtual void emit_block_hints(const SPIRBlock &block); + virtual std::string to_initializer_expression(const SPIRVariable &var); + + bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0, + uint32_t end_offset = ~(0u)); + uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing); + uint32_t type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + uint32_t type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + uint32_t type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing); + + std::string bitcast_glsl(const SPIRType &result_type, uint32_t arg); + virtual std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type); + + std::string bitcast_expression(SPIRType::BaseType target_type, uint32_t arg); + std::string bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, const std::string &expr); + + std::string build_composite_combiner(uint32_t result_type, const uint32_t *elems, uint32_t length); + bool remove_duplicate_swizzle(std::string &op); + bool remove_unity_swizzle(uint32_t base, std::string &op); + + // Can modify flags to remote readonly/writeonly if image type + // and force recompile. + bool check_atomic_image(uint32_t id); + + virtual void replace_illegal_names(); + virtual void emit_entry_point_declarations(); + + void replace_fragment_output(SPIRVariable &var); + void replace_fragment_outputs(); + bool check_explicit_lod_allowed(uint32_t lod); + std::string legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t id); + + uint32_t indent = 0; + + std::unordered_set<uint32_t> emitted_functions; + + std::unordered_set<uint32_t> flattened_buffer_blocks; + std::unordered_set<uint32_t> flattened_structs; + + std::string load_flattened_struct(SPIRVariable &var); + std::string to_flattened_struct_member(const SPIRVariable &var, uint32_t index); + void store_flattened_struct(SPIRVariable &var, uint32_t value); + + // Usage tracking. If a temporary is used more than once, use the temporary instead to + // avoid AST explosion when SPIRV is generated with pure SSA and doesn't write stuff to variables. + std::unordered_map<uint32_t, uint32_t> expression_usage_counts; + void track_expression_read(uint32_t id); + + std::vector<std::string> forced_extensions; + std::vector<std::string> header_lines; + + // Used when expressions emit extra opcodes with their own unique IDs, + // and we need to reuse the IDs across recompilation loops. + // Currently used by NMin/Max/Clamp implementations. + std::unordered_map<uint32_t, uint32_t> extra_sub_expressions; + + uint32_t statement_count; + + inline bool is_legacy() const + { + return (options.es && options.version < 300) || (!options.es && options.version < 130); + } + + inline bool is_legacy_es() const + { + return options.es && options.version < 300; + } + + inline bool is_legacy_desktop() const + { + return !options.es && options.version < 130; + } + + bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure); + void register_call_out_argument(uint32_t id); + void register_impure_function_call(); + void register_control_dependent_expression(uint32_t expr); + + // GL_EXT_shader_pixel_local_storage support. + std::vector<PlsRemap> pls_inputs; + std::vector<PlsRemap> pls_outputs; + std::string pls_decl(const PlsRemap &variable); + const char *to_pls_qualifiers_glsl(const SPIRVariable &variable); + void emit_pls(); + void remap_pls_variables(); + + // A variant which takes two sets of name. The secondary is only used to verify there are no collisions, + // but the set is not updated when we have found a new name. + // Used primarily when adding block interface names. + void add_variable(std::unordered_set<std::string> &variables_primary, + const std::unordered_set<std::string> &variables_secondary, std::string &name); + + void check_function_call_constraints(const uint32_t *args, uint32_t length); + void handle_invalid_expression(uint32_t id); + void find_static_extensions(); + + std::string emit_for_loop_initializers(const SPIRBlock &block); + void emit_while_loop_initializers(const SPIRBlock &block); + bool for_loop_initializers_are_same_type(const SPIRBlock &block); + bool optimize_read_modify_write(const SPIRType &type, const std::string &lhs, const std::string &rhs); + void fixup_image_load_store_access(); + + bool type_is_empty(const SPIRType &type); + + virtual void declare_undefined_values(); + + static std::string sanitize_underscores(const std::string &str); + + bool can_use_io_location(spv::StorageClass storage, bool block); + const Instruction *get_next_instruction_in_block(const Instruction &instr); + static uint32_t mask_relevant_memory_semantics(uint32_t semantics); + + std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + + std::string convert_separate_image_to_expression(uint32_t id); + + // Builtins in GLSL are always specific signedness, but the SPIR-V can declare them + // as either unsigned or signed. + // Sometimes we will need to automatically perform bitcasts on load and store to make this work. + virtual void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type); + virtual void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type); + void unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr); + + void handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id); + void disallow_forwarding_in_expression_chain(const SPIRExpression &expr); + + bool expression_is_constant_null(uint32_t id) const; + virtual void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression); + + uint32_t get_integer_width_for_instruction(const Instruction &instr) const; + uint32_t get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *arguments, uint32_t length) const; + + bool variable_is_lut(const SPIRVariable &var) const; + + char current_locale_radix_character = '.'; + +private: + void init(); +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp new file mode 100644 index 0000000..3f6b627 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp @@ -0,0 +1,4705 @@ +/* + * Copyright 2016-2019 Robert Konrad + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_hlsl.hpp" +#include "GLSL.std.450.h" +#include <algorithm> +#include <assert.h> + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +static unsigned image_format_to_components(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatR16f: + case ImageFormatR32f: + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + return 1; + + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRg16f: + case ImageFormatRg32f: + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + return 2; + + case ImageFormatR11fG11fB10f: + return 3; + + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + case ImageFormatRgba16f: + case ImageFormatRgba32f: + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + case ImageFormatRgb10a2ui: + return 4; + + case ImageFormatUnknown: + return 4; // Assume 4. + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + +static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float"; + case ImageFormatRg8: + case ImageFormatRg16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float2"; + case ImageFormatRgba8: + case ImageFormatRgba16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float4"; + case ImageFormatRgb10A2: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float4"; + + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float"; + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float2"; + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float4"; + + case ImageFormatR16f: + case ImageFormatR32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float"; + case ImageFormatRg16f: + case ImageFormatRg32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float2"; + case ImageFormatRgba16f: + case ImageFormatRgba32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float4"; + + case ImageFormatR11fG11fB10f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float3"; + + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int"; + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int2"; + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int4"; + + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint"; + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint2"; + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint4"; + case ImageFormatRgb10a2ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint4"; + + case ImageFormatUnknown: + switch (basetype) + { + case SPIRType::Float: + return "float4"; + case SPIRType::Int: + return "int4"; + case SPIRType::UInt: + return "uint4"; + default: + SPIRV_CROSS_THROW("Unsupported base type for image."); + } + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + +string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) +{ + auto &imagetype = get<SPIRType>(type.image.type); + const char *dim = nullptr; + bool typed_load = false; + uint32_t components = 4; + + switch (type.image.dim) + { + case Dim1D: + typed_load = type.image.sampled == 2; + dim = "1D"; + break; + case Dim2D: + typed_load = type.image.sampled == 2; + dim = "2D"; + break; + case Dim3D: + typed_load = type.image.sampled == 2; + dim = "3D"; + break; + case DimCube: + if (type.image.sampled == 2) + SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL."); + dim = "Cube"; + break; + case DimRect: + SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO + case DimBuffer: + if (type.image.sampled == 1) + return join("Buffer<", type_to_glsl(imagetype), components, ">"); + else if (type.image.sampled == 2) + return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">"); + else + SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); + case DimSubpassData: + dim = "2D"; + typed_load = false; + break; + default: + SPIRV_CROSS_THROW("Invalid dimension."); + } + const char *arrayed = type.image.arrayed ? "Array" : ""; + const char *ms = type.image.ms ? "MS" : ""; + const char *rw = typed_load ? "RW" : ""; + return join(rw, "Texture", dim, ms, arrayed, "<", + typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : + join(type_to_glsl(imagetype), components), + ">"); +} + +string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t id) +{ + auto &imagetype = get<SPIRType>(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int: + res = "i"; + break; + case SPIRType::UInt: + res = "u"; + break; + default: + break; + } + + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); + + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) + { + // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. + if (type.image.dim == DimBuffer && type.image.sampled == 1) + res += "sampler"; + else + res += type.image.sampled == 2 ? "image" : "texture"; + } + else + res += "sampler"; + + switch (type.image.dim) + { + case Dim1D: + res += "1D"; + break; + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "CUBE"; + break; + + case DimBuffer: + res += "Buffer"; + break; + + case DimSubpassData: + res += "2D"; + break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); + } + + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) + res += "Array"; + if (image_is_comparison(type, id)) + res += "Shadow"; + + return res; +} + +string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id) +{ + if (hlsl_options.shader_model <= 30) + return image_type_hlsl_legacy(type, id); + else + return image_type_hlsl_modern(type, id); +} + +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_hlsl(type, id); + + case SPIRType::Sampler: + return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Half: + return "min16float"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + return "int64_t"; + case SPIRType::UInt64: + return "uint64_t"; + default: + return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bool", type.vecsize); + case SPIRType::Int: + return join("int", type.vecsize); + case SPIRType::UInt: + return join("uint", type.vecsize); + case SPIRType::Half: + return join("min16float", type.vecsize); + case SPIRType::Float: + return join("float", type.vecsize); + case SPIRType::Double: + return join("double", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bool", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("int", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("uint", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join("min16float", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("float", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("double", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } +} + +void CompilerHLSL::emit_header() +{ + for (auto &header : header_lines) + statement(header); + + if (header_lines.size() > 0) + { + statement(""); + } +} + +void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var) +{ + add_resource_name(var.self); + + // The global copies of I/O variables should not contain interpolation qualifiers. + // These are emitted inside the interface structs. + auto &flags = ir.meta[var.self].decoration.decoration_flags; + auto old_flags = flags; + flags.reset(); + statement("static ", variable_decl(var), ";"); + flags = old_flags; +} + +const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) +{ + // Input and output variables are handled specially in HLSL backend. + // The variables are declared as global, private variables, and do not need any qualifiers. + if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || + var.storage == StorageClassPushConstant) + { + return "uniform "; + } + + return ""; +} + +void CompilerHLSL::emit_builtin_outputs_in_struct() +{ + auto &execution = get_entry_point(); + + bool legacy = hlsl_options.shader_model <= 30; + active_output_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast<BuiltIn>(i); + switch (builtin) + { + case BuiltInPosition: + type = "float4"; + semantic = legacy ? "POSITION" : "SV_Position"; + break; + + case BuiltInFragDepth: + type = "float"; + if (legacy) + { + semantic = "DEPTH"; + } + else + { + if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater)) + semantic = "SV_DepthGreaterEqual"; + else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess)) + semantic = "SV_DepthLessEqual"; + else + semantic = "SV_Depth"; + } + break; + + case BuiltInClipDistance: + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = clip / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } + break; + + case BuiltInCullDistance: + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = cull / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } + break; + + case BuiltInPointSize: + // If point_size_compat is enabled, just ignore PointSize. + // PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders, + // even if it means working around the missing feature. + if (hlsl_options.point_size_compat) + break; + else + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + break; + } + + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); + }); +} + +void CompilerHLSL::emit_builtin_inputs_in_struct() +{ + bool legacy = hlsl_options.shader_model <= 30; + active_input_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast<BuiltIn>(i); + switch (builtin) + { + case BuiltInFragCoord: + type = "float4"; + semantic = legacy ? "VPOS" : "SV_Position"; + break; + + case BuiltInVertexId: + case BuiltInVertexIndex: + if (legacy) + SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_VertexID"; + break; + + case BuiltInInstanceId: + case BuiltInInstanceIndex: + if (legacy) + SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_InstanceID"; + break; + + case BuiltInSampleId: + if (legacy) + SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_SampleIndex"; + break; + + case BuiltInGlobalInvocationId: + type = "uint3"; + semantic = "SV_DispatchThreadID"; + break; + + case BuiltInLocalInvocationId: + type = "uint3"; + semantic = "SV_GroupThreadID"; + break; + + case BuiltInLocalInvocationIndex: + type = "uint"; + semantic = "SV_GroupIndex"; + break; + + case BuiltInWorkgroupId: + type = "uint3"; + semantic = "SV_GroupID"; + break; + + case BuiltInFrontFacing: + type = "bool"; + semantic = "SV_IsFrontFace"; + break; + + case BuiltInNumWorkgroups: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + case BuiltInSubgroupEqMask: + case BuiltInSubgroupLtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupGeMask: + // Handled specially. + break; + + case BuiltInClipDistance: + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = clip / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } + break; + + case BuiltInCullDistance: + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; + + uint32_t semantic_index = cull / 4; + + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } + break; + + case BuiltInPointCoord: + // PointCoord is not supported, but provide a way to just ignore that, similar to PointSize. + if (hlsl_options.point_coord_compat) + break; + else + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + break; + } + + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";"); + }); +} + +uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const +{ + // TODO: Need to verify correctness. + uint32_t elements = 0; + + if (type.basetype == SPIRType::Struct) + { + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + elements += type_to_consumed_locations(get<SPIRType>(type.member_types[i])); + } + else + { + uint32_t array_multiplier = 1; + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + { + if (type.array_size_literal[i]) + array_multiplier *= type.array[i]; + else + array_multiplier *= get<SPIRConstant>(type.array[i]).scalar(); + } + elements += array_multiplier * type.columns; + } + return elements; +} + +string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags) +{ + string res; + //if (flags & (1ull << DecorationSmooth)) + // res += "linear "; + if (flags.get(DecorationFlat)) + res += "nointerpolation "; + if (flags.get(DecorationNoPerspective)) + res += "noperspective "; + if (flags.get(DecorationCentroid)) + res += "centroid "; + if (flags.get(DecorationPatch)) + res += "patch "; // Seems to be different in actual HLSL. + if (flags.get(DecorationSample)) + res += "sample "; + if (flags.get(DecorationInvariant)) + res += "invariant "; // Not supported? + + return res; +} + +std::string CompilerHLSL::to_semantic(uint32_t vertex_location) +{ + for (auto &attribute : remap_vertex_attributes) + if (attribute.location == vertex_location) + return attribute.semantic; + + return join("TEXCOORD", vertex_location); +} + +void CompilerHLSL::emit_io_block(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + add_resource_name(type.self); + + statement("struct ", to_name(type.self)); + begin_scope(); + type.member_name_cache.clear(); + + uint32_t base_location = get_decoration(var.self, DecorationLocation); + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + string semantic; + if (has_member_decoration(type.self, i, DecorationLocation)) + { + uint32_t location = get_member_decoration(type.self, i, DecorationLocation); + semantic = join(" : ", to_semantic(location)); + } + else + { + // If the block itself has a location, but not its members, use the implicit location. + // There could be a conflict if the block members partially specialize the locations. + // It is unclear how SPIR-V deals with this. Assume this does not happen for now. + uint32_t location = base_location + i; + semantic = join(" : ", to_semantic(location)); + } + + add_member_name(type, i); + + auto &membertype = get<SPIRType>(type.member_types[i]); + statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, i)), + variable_decl(membertype, to_member_name(type, i)), semantic, ";"); + } + + end_scope_decl(); + statement(""); + + statement("static ", variable_decl(var), ";"); + statement(""); +} + +void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set<uint32_t> &active_locations) +{ + auto &execution = get_entry_point(); + auto type = get<SPIRType>(var.basetype); + + string binding; + bool use_location_number = true; + bool legacy = hlsl_options.shader_model <= 30; + if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) + { + // Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1. + uint32_t index = get_decoration(var.self, DecorationIndex); + uint32_t location = get_decoration(var.self, DecorationLocation); + + if (index != 0 && location != 0) + SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL."); + + binding = join(legacy ? "COLOR" : "SV_Target", location + index); + use_location_number = false; + if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP) + type.vecsize = 4; + } + + const auto get_vacant_location = [&]() -> uint32_t { + for (uint32_t i = 0; i < 64; i++) + if (!active_locations.count(i)) + return i; + SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted."); + }; + + bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; + + auto &m = ir.meta[var.self].decoration; + auto name = to_name(var.self); + if (use_location_number) + { + uint32_t location_number; + + // If an explicit location exists, use it with TEXCOORD[N] semantic. + // Otherwise, pick a vacant location. + if (m.decoration_flags.get(DecorationLocation)) + location_number = m.location; + else + location_number = get_vacant_location(); + + // Allow semantic remap if specified. + auto semantic = to_semantic(location_number); + + if (need_matrix_unroll && type.columns > 1) + { + if (!type.array.empty()) + SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported."); + + // Unroll matrices. + for (uint32_t i = 0; i < type.columns; i++) + { + SPIRType newtype = type; + newtype.columns = 1; + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), + variable_decl(newtype, join(name, "_", i)), " : ", semantic, "_", i, ";"); + active_locations.insert(location_number++); + } + } + else + { + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(type, name), " : ", + semantic, ";"); + + // Structs and arrays should consume more locations. + uint32_t consumed_locations = type_to_consumed_locations(type); + for (uint32_t i = 0; i < consumed_locations; i++) + active_locations.insert(location_number + i); + } + } + else + statement(variable_decl(type, name), " : ", binding, ";"); +} + +std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) +{ + switch (builtin) + { + case BuiltInVertexId: + return "gl_VertexID"; + case BuiltInInstanceId: + return "gl_InstanceID"; + case BuiltInNumWorkgroups: + { + if (!num_workgroups_builtin) + SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. " + "Cannot emit code for this builtin."); + + auto &var = get<SPIRVariable>(num_workgroups_builtin); + auto &type = get<SPIRType>(var.basetype); + return sanitize_underscores(join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0))); + } + case BuiltInPointCoord: + // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set. + return "float2(0.5f, 0.5f)"; + case BuiltInSubgroupLocalInvocationId: + return "WaveGetLaneIndex()"; + case BuiltInSubgroupSize: + return "WaveGetLaneCount()"; + + default: + return CompilerGLSL::builtin_to_glsl(builtin, storage); + } +} + +void CompilerHLSL::emit_builtin_variables() +{ + Bitset builtins = active_input_builtins; + builtins.merge_or(active_output_builtins); + + bool need_base_vertex_info = false; + + // Emit global variables for the interface variables which are statically used by the shader. + builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + auto builtin = static_cast<BuiltIn>(i); + uint32_t array_size = 0; + + switch (builtin) + { + case BuiltInFragCoord: + case BuiltInPosition: + type = "float4"; + break; + + case BuiltInFragDepth: + type = "float"; + break; + + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + type = "int"; + if (hlsl_options.support_nonzero_base_vertex_base_instance) + need_base_vertex_info = true; + break; + + case BuiltInInstanceId: + case BuiltInSampleId: + type = "int"; + break; + + case BuiltInPointSize: + if (hlsl_options.point_size_compat) + { + // Just emit the global variable, it will be ignored. + type = "float"; + break; + } + else + SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); + + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + type = "uint3"; + break; + + case BuiltInLocalInvocationIndex: + type = "uint"; + break; + + case BuiltInFrontFacing: + type = "bool"; + break; + + case BuiltInNumWorkgroups: + case BuiltInPointCoord: + // Handled specially. + break; + + case BuiltInSubgroupLocalInvocationId: + case BuiltInSubgroupSize: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); + break; + + case BuiltInSubgroupEqMask: + case BuiltInSubgroupLtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupGeMask: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); + type = "uint4"; + break; + + case BuiltInClipDistance: + array_size = clip_distance_count; + type = "float"; + break; + + case BuiltInCullDistance: + array_size = cull_distance_count; + type = "float"; + break; + + default: + SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); + } + + StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput; + // FIXME: SampleMask can be both in and out with sample builtin, + // need to distinguish that when we add support for that. + + if (type) + { + if (array_size) + statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "];"); + else + statement("static ", type, " ", builtin_to_glsl(builtin, storage), ";"); + } + }); + + if (need_base_vertex_info) + { + statement("cbuffer SPIRV_Cross_VertexInfo"); + begin_scope(); + statement("int SPIRV_Cross_BaseVertex;"); + statement("int SPIRV_Cross_BaseInstance;"); + end_scope_decl(); + statement(""); + } +} + +void CompilerHLSL::emit_composite_constants() +{ + // HLSL cannot declare structs or arrays inline, so we must move them out to + // global constants directly. + bool emitted = false; + + ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; + + auto &type = this->get<SPIRType>(c.constant_type); + if (type.basetype == SPIRType::Struct || !type.array.empty()) + { + auto name = to_name(c.self); + statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); + emitted = true; + } + }); + + if (emitted) + statement(""); +} + +void CompilerHLSL::emit_specialization_constants_and_structs() +{ + bool emitted = false; + SpecializationConstant wg_x, wg_y, wg_z; + uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + for (auto &id_ : ir.ids_for_constant_or_type) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get<SPIRConstant>(); + + if (c.self == workgroup_size_id) + { + statement("static const uint3 gl_WorkGroupSize = ", + constant_expression(get<SPIRConstant>(workgroup_size_id)), ";"); + emitted = true; + } + else if (c.specialization) + { + auto &type = get<SPIRType>(c.constant_type); + auto name = to_name(c.self); + + // HLSL does not support specialization constants, so fallback to macros. + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";"); + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + auto &c = id.get<SPIRConstantOp>(); + auto &type = get<SPIRType>(c.basetype); + auto name = to_name(c.self); + statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + emitted = true; + } + else if (id.get_type() == TypeType) + { + auto &type = id.get<SPIRType>(); + if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && + (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && + !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) + { + if (emitted) + statement(""); + emitted = false; + + emit_struct(type); + } + } + } + + if (emitted) + statement(""); +} + +void CompilerHLSL::replace_illegal_names() +{ + static const unordered_set<string> keywords = { + // Additional HLSL specific keywords. + "line", "linear", "matrix", "point", "row_major", "sampler", + }; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + if (!is_hidden_variable(var)) + { + auto &m = ir.meta[var.self].decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + } + }); + + CompilerGLSL::replace_illegal_names(); +} + +void CompilerHLSL::emit_resources() +{ + auto &execution = get_entry_point(); + + replace_illegal_names(); + + emit_specialization_constants_and_structs(); + emit_composite_constants(); + + bool emitted = false; + + // Output UBOs and SSBOs + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; + bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && + has_block_flags) + { + emit_buffer_block(var); + emitted = true; + } + }); + + // Output push constant blocks + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + emitted = true; + } + }); + + if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30) + { + statement("uniform float4 gl_HalfPixel;"); + emitted = true; + } + + bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30; + + // Output Uniform Constants (values, samplers, images, etc). + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + // Sampler buffers are always used without a sampler, and they will also work in regular D3D. + bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (!sampler_buffer && (separate_image || separate_sampler)) + return; + } + + if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable && + type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter)) + { + emit_uniform(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + // Emit builtin input and output variables here. + emit_builtin_variables(); + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + // Do not emit I/O blocks here. + // I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders + // and tessellation down the line. + if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + // Only emit non-builtins which are not blocks here. Builtin variables are handled separately. + emit_interface_block_globally(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + require_input = false; + require_output = false; + unordered_set<uint32_t> active_inputs; + unordered_set<uint32_t> active_outputs; + vector<SPIRVariable *> input_variables; + vector<SPIRVariable *> output_variables; + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + + // Do not emit I/O blocks here. + // I/O blocks can be arrayed, so we must deal with them separately to support geometry shaders + // and tessellation down the line. + if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + if (var.storage == StorageClassInput) + input_variables.push_back(&var); + else + output_variables.push_back(&var); + } + + // Reserve input and output locations for block variables as necessary. + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto &active = var.storage == StorageClassInput ? active_inputs : active_outputs; + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + if (has_member_decoration(type.self, i, DecorationLocation)) + { + uint32_t location = get_member_decoration(type.self, i, DecorationLocation); + active.insert(location); + } + } + + // Emit the block struct and a global variable here. + emit_io_block(var); + } + }); + + const auto variable_compare = [&](const SPIRVariable *a, const SPIRVariable *b) -> bool { + // Sort input and output variables based on, from more robust to less robust: + // - Location + // - Variable has a location + // - Name comparison + // - Variable has a name + // - Fallback: ID + bool has_location_a = has_decoration(a->self, DecorationLocation); + bool has_location_b = has_decoration(b->self, DecorationLocation); + + if (has_location_a && has_location_b) + { + return get_decoration(a->self, DecorationLocation) < get_decoration(b->self, DecorationLocation); + } + else if (has_location_a && !has_location_b) + return true; + else if (!has_location_a && has_location_b) + return false; + + const auto &name1 = to_name(a->self); + const auto &name2 = to_name(b->self); + + if (name1.empty() && name2.empty()) + return a->self < b->self; + else if (name1.empty()) + return true; + else if (name2.empty()) + return false; + + return name1.compare(name2) < 0; + }; + + auto input_builtins = active_input_builtins; + input_builtins.clear(BuiltInNumWorkgroups); + input_builtins.clear(BuiltInPointCoord); + input_builtins.clear(BuiltInSubgroupSize); + input_builtins.clear(BuiltInSubgroupLocalInvocationId); + input_builtins.clear(BuiltInSubgroupEqMask); + input_builtins.clear(BuiltInSubgroupLtMask); + input_builtins.clear(BuiltInSubgroupLeMask); + input_builtins.clear(BuiltInSubgroupGtMask); + input_builtins.clear(BuiltInSubgroupGeMask); + + if (!input_variables.empty() || !input_builtins.empty()) + { + require_input = true; + statement("struct SPIRV_Cross_Input"); + + begin_scope(); + sort(input_variables.begin(), input_variables.end(), variable_compare); + for (auto var : input_variables) + emit_interface_block_in_struct(*var, active_inputs); + emit_builtin_inputs_in_struct(); + end_scope_decl(); + statement(""); + } + + if (!output_variables.empty() || !active_output_builtins.empty()) + { + require_output = true; + statement("struct SPIRV_Cross_Output"); + + begin_scope(); + // FIXME: Use locations properly if they exist. + sort(output_variables.begin(), output_variables.end(), variable_compare); + for (auto var : output_variables) + emit_interface_block_in_struct(*var, active_outputs); + emit_builtin_outputs_in_struct(); + end_scope_decl(); + statement(""); + } + + // Global variables. + for (auto global : global_variables) + { + auto &var = get<SPIRVariable>(global); + if (var.storage != StorageClassOutput) + { + if (!variable_is_lut(var)) + { + add_resource_name(var.self); + + const char *storage = nullptr; + switch (var.storage) + { + case StorageClassWorkgroup: + storage = "groupshared"; + break; + + default: + storage = "static"; + break; + } + statement(storage, " ", variable_decl(var), ";"); + emitted = true; + } + } + } + + if (emitted) + statement(""); + + declare_undefined_values(); + + if (requires_op_fmod) + { + static const char *types[] = { + "float", + "float2", + "float3", + "float4", + }; + + for (auto &type : types) + { + statement(type, " mod(", type, " x, ", type, " y)"); + begin_scope(); + statement("return x - y * floor(x / y);"); + end_scope(); + statement(""); + } + } + + if (required_textureSizeVariants != 0) + { + static const char *types[QueryTypeCount] = { "float4", "int4", "uint4" }; + static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", + "Texture3D", "Buffer", "TextureCube", "TextureCubeArray", + "Texture2DMS", "Texture2DMSArray" }; + + static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false }; + + static const char *ret_types[QueryDimCount] = { + "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3", + }; + + static const uint32_t return_arguments[QueryDimCount] = { + 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, + }; + + for (uint32_t index = 0; index < QueryDimCount; index++) + { + for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++) + { + uint32_t bit = 16 * type_index + index; + uint64_t mask = 1ull << bit; + + if ((required_textureSizeVariants & mask) == 0) + continue; + + statement(ret_types[index], " SPIRV_Cross_textureSize(", dims[index], "<", types[type_index], + "> Tex, uint Level, out uint Param)"); + begin_scope(); + statement(ret_types[index], " ret;"); + switch (return_arguments[index]) + { + case 1: + if (has_lod[index]) + statement("Tex.GetDimensions(Level, ret.x, Param);"); + else + { + statement("Tex.GetDimensions(ret.x);"); + statement("Param = 0u;"); + } + break; + case 2: + if (has_lod[index]) + statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);"); + else + statement("Tex.GetDimensions(ret.x, ret.y, Param);"); + break; + case 3: + if (has_lod[index]) + statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);"); + else + statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);"); + break; + } + + statement("return ret;"); + end_scope(); + statement(""); + } + } + } + + if (requires_fp16_packing) + { + // HLSL does not pack into a single word sadly :( + statement("uint SPIRV_Cross_packHalf2x16(float2 value)"); + begin_scope(); + statement("uint2 Packed = f32tof16(value);"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("float2 SPIRV_Cross_unpackHalf2x16(uint value)"); + begin_scope(); + statement("return f16tof32(uint2(value & 0xffff, value >> 16));"); + end_scope(); + statement(""); + } + + if (requires_explicit_fp16_packing) + { + // HLSL does not pack into a single word sadly :( + statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)"); + begin_scope(); + statement("uint2 Packed = f32tof16(value);"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)"); + begin_scope(); + statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); + end_scope(); + statement(""); + } + + // HLSL does not seem to have builtins for these operation, so roll them by hand ... + if (requires_unorm8_packing) + { + statement("uint SPIRV_Cross_packUnorm4x8(float4 value)"); + begin_scope(); + statement("uint4 Packed = uint4(round(saturate(value) * 255.0));"); + statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);"); + end_scope(); + statement(""); + + statement("float4 SPIRV_Cross_unpackUnorm4x8(uint value)"); + begin_scope(); + statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);"); + statement("return float4(Packed) / 255.0;"); + end_scope(); + statement(""); + } + + if (requires_snorm8_packing) + { + statement("uint SPIRV_Cross_packSnorm4x8(float4 value)"); + begin_scope(); + statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;"); + statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));"); + end_scope(); + statement(""); + + statement("float4 SPIRV_Cross_unpackSnorm4x8(uint value)"); + begin_scope(); + statement("int SignedValue = int(value);"); + statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;"); + statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);"); + end_scope(); + statement(""); + } + + if (requires_unorm16_packing) + { + statement("uint SPIRV_Cross_packUnorm2x16(float2 value)"); + begin_scope(); + statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("float2 SPIRV_Cross_unpackUnorm2x16(uint value)"); + begin_scope(); + statement("uint2 Packed = uint2(value & 0xffff, value >> 16);"); + statement("return float2(Packed) / 65535.0;"); + end_scope(); + statement(""); + } + + if (requires_snorm16_packing) + { + statement("uint SPIRV_Cross_packSnorm2x16(float2 value)"); + begin_scope(); + statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;"); + statement("return uint(Packed.x | (Packed.y << 16));"); + end_scope(); + statement(""); + + statement("float2 SPIRV_Cross_unpackSnorm2x16(uint value)"); + begin_scope(); + statement("int SignedValue = int(value);"); + statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;"); + statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);"); + end_scope(); + statement(""); + } + + if (requires_bitfield_insert) + { + static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : types) + { + statement(type, " SPIRV_Cross_bitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); + statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); + end_scope(); + statement(""); + } + } + + if (requires_bitfield_extract) + { + static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : unsigned_types) + { + statement(type, " SPIRV_Cross_bitfieldUExtract(", type, " Base, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); + statement("return (Base >> Offset) & Mask;"); + end_scope(); + statement(""); + } + + // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down. + static const char *signed_types[] = { "int", "int2", "int3", "int4" }; + for (auto &type : signed_types) + { + statement(type, " SPIRV_Cross_bitfieldSExtract(", type, " Base, int Offset, int Count)"); + begin_scope(); + statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); + statement(type, " Masked = (Base >> Offset) & Mask;"); + statement("int ExtendShift = (32 - Count) & 31;"); + statement("return (Masked << ExtendShift) >> ExtendShift;"); + end_scope(); + statement(""); + } + } + + if (requires_inverse_2x2) + { + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float2x2 SPIRV_Cross_Inverse(float2x2 m)"); + begin_scope(); + statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = m[1][1];"); + statement("adj[0][1] = -m[0][1];"); + statement_no_indent(""); + statement("adj[1][0] = -m[1][0];"); + statement("adj[1][1] = m[0][0];"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_inverse_3x3) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement_no_indent(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float3x3 SPIRV_Cross_Inverse(float3x3 m)"); + begin_scope(); + statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = SPIRV_Cross_Det2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); + statement("adj[0][1] = -SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); + statement("adj[0][2] = SPIRV_Cross_Det2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); + statement_no_indent(""); + statement("adj[1][0] = -SPIRV_Cross_Det2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); + statement("adj[1][1] = SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); + statement("adj[1][2] = -SPIRV_Cross_Det2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); + statement_no_indent(""); + statement("adj[2][0] = SPIRV_Cross_Det2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); + statement("adj[2][1] = -SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); + statement("adj[2][2] = SPIRV_Cross_Det2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_inverse_4x4) + { + if (!requires_inverse_3x3) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement("float SPIRV_Cross_Det2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement(""); + } + + statement("// Returns the determinant of a 3x3 matrix."); + statement("float SPIRV_Cross_Det3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + "float c2, float c3)"); + begin_scope(); + statement("return a1 * SPIRV_Cross_Det2x2(b2, b3, c2, c3) - b1 * SPIRV_Cross_Det2x2(a2, a3, c2, c3) + c1 * " + "SPIRV_Cross_Det2x2(a2, a3, " + "b2, b3);"); + end_scope(); + statement_no_indent(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float4x4 SPIRV_Cross_Inverse(float4x4 m)"); + begin_scope(); + statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement( + "adj[0][0] = SPIRV_Cross_Det3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][1] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][2] = SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][3] = -SPIRV_Cross_Det3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[1][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[2][0] = SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][1] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][2] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][3] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[3][0] = -SPIRV_Cross_Det3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][1] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][2] = -SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][3] = SPIRV_Cross_Det3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " + "m[2][2]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] " + "* m[3][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } +} + +string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) +{ + auto &flags = get_member_decoration_bitset(type.self, index); + + // HLSL can emit row_major or column_major decoration in any struct. + // Do not try to merge combined decorations for children like in GLSL. + + // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major". + // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout. + if (flags.get(DecorationColMajor)) + return "row_major "; + else if (flags.get(DecorationRowMajor)) + return "column_major "; + + return ""; +} + +void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t base_offset) +{ + auto &membertype = get<SPIRType>(member_type_id); + + Bitset memberflags; + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + string qualifiers; + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); + + string packing_offset; + bool is_push_constant = type.storage == StorageClassPushConstant; + + if ((has_extended_decoration(type.self, SPIRVCrossDecorationPacked) || is_push_constant) && + has_member_decoration(type.self, index, DecorationOffset)) + { + uint32_t offset = memb[index].offset - base_offset; + if (offset & 3) + SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL."); + + static const char *packing_swizzle[] = { "", ".y", ".z", ".w" }; + packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")"); + } + + statement(layout_for_member(type, index), qualifiers, qualifier, + variable_decl(membertype, to_member_name(type, index)), packing_offset, ";"); +} + +void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + + bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock); + + if (is_uav) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable); + bool is_coherent = flags.get(DecorationCoherent); + add_resource_name(var.self); + statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ", + to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + } + else + { + if (type.array.empty()) + { + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset)) + set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + else + SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset."); + + // Flatten the top-level struct so we can use packoffset, + // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. + flattened_structs.insert(var.self); + + // Prefer the block name if possible. + auto buffer_name = to_name(type.self, false); + if (ir.meta[type.self].decoration.alias.empty() || + resource_names.find(buffer_name) != end(resource_names) || + block_names.find(buffer_name) != end(block_names)) + { + buffer_name = get_block_fallback_name(var.self); + } + + add_variable(block_names, resource_names, buffer_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + if (buffer_name.empty()) + buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self); + + block_names.insert(buffer_name); + + // Save for post-reflection later. + declared_block_names[var.self] = buffer_name; + + type.member_name_cache.clear(); + // var.self can be used as a backup name for the block name, + // so we need to make sure we don't disturb the name here on a recompile. + // It will need to be reset if we have to recompile. + preserve_alias_on_reset(var.self); + add_resource_name(var.self); + statement("cbuffer ", buffer_name, to_resource_binding(var)); + begin_scope(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name))); + emit_struct_member(type, member, i, ""); + set_member_name(type.self, i, backup_name); + i++; + } + + end_scope_decl(); + statement(""); + } + else + { + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW( + "Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1."); + + // ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect. + if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer)) + SPIRV_CROSS_THROW("HLSL ConstantBuffer<T> cannot be expressed with normal HLSL packing rules."); + + add_resource_name(type.self); + add_resource_name(var.self); + + emit_struct(get<SPIRType>(type.self)); + statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); + } + } +} + +void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) +{ + if (root_constants_layout.empty()) + { + emit_buffer_block(var); + } + else + { + for (const auto &layout : root_constants_layout) + { + auto &type = get<SPIRType>(var.basetype); + + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end)) + set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + else + SPIRV_CROSS_THROW( + "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset."); + + flattened_structs.insert(var.self); + type.member_name_cache.clear(); + add_resource_name(var.self); + auto &memb = ir.meta[type.self].members; + + statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self), + to_resource_register('b', layout.binding, layout.space)); + begin_scope(); + + // Index of the next field in the generated root constant constant buffer + auto constant_index = 0u; + + // Iterate over all member of the push constant and check which of the fields + // fit into the given root constant layout. + for (auto i = 0u; i < memb.size(); i++) + { + const auto offset = memb[i].offset; + if (layout.start <= offset && offset < layout.end) + { + const auto &member = type.member_types[i]; + + add_member_name(type, constant_index); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + set_member_name(type.self, constant_index, + sanitize_underscores(join(to_name(var.self), "_", member_name))); + emit_struct_member(type, member, i, "", layout.start); + set_member_name(type.self, constant_index, backup_name); + + constant_index++; + } + } + + end_scope_decl(); + } + } +} + +string CompilerHLSL::to_sampler_expression(uint32_t id) +{ + auto expr = join("_", to_expression(id)); + auto index = expr.find_first_of('['); + if (index == string::npos) + { + return expr + "_sampler"; + } + else + { + // We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead. + return expr.insert(index, "_sampler"); + } +} + +void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty()) + { + set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id); + } + else + { + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + } +} + +string CompilerHLSL::to_func_call_arg(uint32_t id) +{ + string arg_str = CompilerGLSL::to_func_call_arg(id); + + if (hlsl_options.shader_model <= 30) + return arg_str; + + // Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL. + auto &type = expression_type(id); + + // We don't have to consider combined image samplers here via OpSampledImage because + // those variables cannot be passed as arguments to functions. + // Only global SampledImage variables may be used as arguments. + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + arg_str += ", " + to_sampler_expression(id); + + return arg_str; +} + +void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + auto &execution = get_entry_point(); + // Avoid shadow declarations. + local_variable_names = resource_names; + + string decl; + + auto &type = get<SPIRType>(func.return_type); + if (type.array.empty()) + { + decl += flags_to_precision_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += " "; + } + else + { + // We cannot return arrays in HLSL, so "return" through an out variable. + decl = "void "; + } + + if (func.self == ir.default_entry_point) + { + if (execution.model == ExecutionModelVertex) + decl += "vert_main"; + else if (execution.model == ExecutionModelFragment) + decl += "frag_main"; + else if (execution.model == ExecutionModelGLCompute) + decl += "comp_main"; + else + SPIRV_CROSS_THROW("Unsupported execution model."); + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + vector<string> arglist; + + if (!type.array.empty()) + { + // Fake array returns by writing to an out array instead. + string out_argument; + out_argument += "out "; + out_argument += type_to_glsl(type); + out_argument += " "; + out_argument += "SPIRV_Cross_return_value"; + out_argument += type_to_array_glsl(type); + arglist.push_back(move(out_argument)); + } + + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; + + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Flatten a combined sampler to two separate arguments in modern HLSL. + auto &arg_type = get<SPIRType>(arg.type); + if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage && + arg_type.image.dim != DimBuffer) + { + // Manufacture automatic sampler arg for SampledImage texture + arglist.push_back(join(image_is_comparison(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", + to_sampler_expression(arg.id), type_to_array_glsl(arg_type))); + } + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + var->parameter = &arg; + } + + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + var->parameter = &arg; + } + + decl += merge(arglist); + decl += ")"; + statement(decl); +} + +void CompilerHLSL::emit_hlsl_entry_point() +{ + vector<string> arguments; + + if (require_input) + arguments.push_back("SPIRV_Cross_Input stage_input"); + + // Add I/O blocks as separate arguments with appropriate storage qualifier. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + if (var.storage == StorageClassInput) + { + arguments.push_back(join("in ", variable_decl(type, join("stage_input", to_name(var.self))))); + } + else if (var.storage == StorageClassOutput) + { + arguments.push_back(join("out ", variable_decl(type, join("stage_output", to_name(var.self))))); + } + } + }); + + auto &execution = get_entry_point(); + + switch (execution.model) + { + case ExecutionModelGLCompute: + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + uint32_t x = execution.workgroup_size.x; + uint32_t y = execution.workgroup_size.y; + uint32_t z = execution.workgroup_size.z; + + auto x_expr = wg_x.id ? get<SPIRConstant>(wg_x.id).specialization_constant_macro_name : to_string(x); + auto y_expr = wg_y.id ? get<SPIRConstant>(wg_y.id).specialization_constant_macro_name : to_string(y); + auto z_expr = wg_z.id ? get<SPIRConstant>(wg_z.id).specialization_constant_macro_name : to_string(z); + + statement("[numthreads(", x_expr, ", ", y_expr, ", ", z_expr, ")]"); + break; + } + case ExecutionModelFragment: + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + statement("[earlydepthstencil]"); + break; + default: + break; + } + + statement(require_output ? "SPIRV_Cross_Output " : "void ", "main(", merge(arguments), ")"); + begin_scope(); + bool legacy = hlsl_options.shader_model <= 30; + + // Copy builtins from entry point arguments to globals. + active_input_builtins.for_each_bit([&](uint32_t i) { + auto builtin = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassInput); + switch (static_cast<BuiltIn>(i)) + { + case BuiltInFragCoord: + // VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent. + // TODO: Do we need an option here? Any reason why a D3D9 shader would be used + // on a D3D10+ system with a different rasterization config? + if (legacy) + statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);"); + else + statement(builtin, " = stage_input.", builtin, ";"); + break; + + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + // D3D semantics are uint, but shader wants int. + if (hlsl_options.support_nonzero_base_vertex_base_instance) + { + if (static_cast<BuiltIn>(i) == BuiltInInstanceIndex) + statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseInstance;"); + else + statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseVertex;"); + } + else + statement(builtin, " = int(stage_input.", builtin, ");"); + break; + + case BuiltInInstanceId: + // D3D semantics are uint, but shader wants int. + statement(builtin, " = int(stage_input.", builtin, ");"); + break; + + case BuiltInNumWorkgroups: + case BuiltInPointCoord: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + break; + + case BuiltInSubgroupEqMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;"); + statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;"); + statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;"); + break; + + case BuiltInSubgroupGeMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;"); + statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;"); + statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;"); + statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;"); + statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;"); + break; + + case BuiltInSubgroupGtMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("uint gt_lane_index = WaveGetLaneIndex() + 1;"); + statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);"); + statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;"); + statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;"); + statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;"); + statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;"); + statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;"); + statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;"); + statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;"); + break; + + case BuiltInSubgroupLeMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("uint le_lane_index = WaveGetLaneIndex() + 1;"); + statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;"); + statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;"); + statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;"); + statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;"); + statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;"); + statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;"); + statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;"); + statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;"); + break; + + case BuiltInSubgroupLtMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;"); + statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;"); + statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;"); + statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;"); + statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;"); + break; + + case BuiltInClipDistance: + for (uint32_t clip = 0; clip < clip_distance_count; clip++) + statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], + ";"); + break; + + case BuiltInCullDistance: + for (uint32_t cull = 0; cull < cull_distance_count; cull++) + statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], + ";"); + break; + + default: + statement(builtin, " = stage_input.", builtin, ";"); + break; + } + }); + + // Copy from stage input struct to globals. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassInput) + return; + + bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; + + if (!block && !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + auto &mtype = this->get<SPIRType>(var.basetype); + if (need_matrix_unroll && mtype.columns > 1) + { + // Unroll matrices. + for (uint32_t col = 0; col < mtype.columns; col++) + statement(name, "[", col, "] = stage_input.", name, "_", col, ";"); + } + else + { + statement(name, " = stage_input.", name, ";"); + } + } + + // I/O blocks don't use the common stage input/output struct, but separate outputs. + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + statement(name, " = stage_input", name, ";"); + } + }); + + // Run the shader. + if (execution.model == ExecutionModelVertex) + statement("vert_main();"); + else if (execution.model == ExecutionModelFragment) + statement("frag_main();"); + else if (execution.model == ExecutionModelGLCompute) + statement("comp_main();"); + else + SPIRV_CROSS_THROW("Unsupported shader stage."); + + // Copy block outputs. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassOutput) + return; + + // I/O blocks don't use the common stage input/output struct, but separate outputs. + if (block && !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + statement("stage_output", name, " = ", name, ";"); + } + }); + + // Copy stage outputs. + if (require_output) + { + statement("SPIRV_Cross_Output stage_output;"); + + // Copy builtins from globals to return struct. + active_output_builtins.for_each_bit([&](uint32_t i) { + // PointSize doesn't exist in HLSL. + if (i == BuiltInPointSize) + return; + + switch (static_cast<BuiltIn>(i)) + { + case BuiltInClipDistance: + for (uint32_t clip = 0; clip < clip_distance_count; clip++) + statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[", + clip, "];"); + break; + + case BuiltInCullDistance: + for (uint32_t cull = 0; cull < cull_distance_count; cull++) + statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[", + cull, "];"); + break; + + default: + { + auto builtin_expr = builtin_to_glsl(static_cast<BuiltIn>(i), StorageClassOutput); + statement("stage_output.", builtin_expr, " = ", builtin_expr, ";"); + break; + } + } + }); + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + + if (var.storage != StorageClassOutput) + return; + + if (!block && var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + !is_builtin_variable(var) && interface_variable_exists_in_entry_point(var.self)) + { + auto name = to_name(var.self); + + if (legacy && execution.model == ExecutionModelFragment) + { + string output_filler; + for (uint32_t size = type.vecsize; size < 4; ++size) + output_filler += ", 0.0"; + + statement("stage_output.", name, " = float4(", name, output_filler, ");"); + } + else + { + statement("stage_output.", name, " = ", name, ";"); + } + } + }); + + statement("return stage_output;"); + } + + end_scope(); +} + +void CompilerHLSL::emit_fixup() +{ + if (get_entry_point().model == ExecutionModelVertex) + { + // Do various mangling on the gl_Position. + if (hlsl_options.shader_model <= 30) + { + statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * " + "gl_Position.w;"); + statement("gl_Position.y = gl_Position.y + gl_HalfPixel.y * " + "gl_Position.w;"); + } + + if (options.vertex.flip_vert_y) + statement("gl_Position.y = -gl_Position.y;"); + if (options.vertex.fixup_clipspace) + statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;"); + } +} + +void CompilerHLSL::emit_texture_op(const Instruction &i) +{ + auto *ops = stream(i); + auto op = static_cast<Op>(i.op); + uint32_t length = i.length; + + vector<uint32_t> inherited_expressions; + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + const uint32_t *opt = nullptr; + auto *combined_image = maybe_get<SPIRCombinedImageSampler>(img); + auto img_expr = to_expression(combined_image ? combined_image->image : img); + + inherited_expressions.push_back(coord); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + dref = ops[4]; + proj = true; + opt = &ops[5]; + length -= 5; + break; + + case OpImageDrefGather: + dref = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageGather: + comp = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; + + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; + + case OpImageQueryLod: + opt = &ops[4]; + length -= 4; + break; + + default: + opt = &ops[4]; + length -= 4; + break; + } + + auto &imgtype = expression_type(img); + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: + coord_components = 1; + break; + case spv::Dim2D: + coord_components = 2; + break; + case spv::Dim3D: + coord_components = 3; + break; + case spv::DimCube: + coord_components = 3; + break; + case spv::DimBuffer: + coord_components = 1; + break; + default: + coord_components = 2; + break; + } + + if (dref) + inherited_expressions.push_back(dref); + + if (imgtype.image.arrayed) + coord_components++; + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t flags = 0; + + if (length) + { + flags = opt[0]; + opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + inherited_expressions.push_back(v); + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + + string expr; + string texop; + + if (op == OpImageFetch) + { + if (hlsl_options.shader_model < 40) + { + SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3."); + } + texop += img_expr; + texop += ".Load"; + } + else if (op == OpImageQueryLod) + { + texop += img_expr; + texop += ".CalculateLevelOfDetail"; + } + else + { + auto &imgformat = get<SPIRType>(imgtype.image.type); + if (imgformat.basetype != SPIRType::Float) + { + SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL."); + } + + if (hlsl_options.shader_model >= 40) + { + texop += img_expr; + + if (image_is_comparison(imgtype, img)) + { + if (gather) + { + SPIRV_CROSS_THROW("GatherCmp does not exist in HLSL."); + } + else if (lod || grad_x || grad_y) + { + // Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero. + texop += ".SampleCmpLevelZero"; + } + else + texop += ".SampleCmp"; + } + else if (gather) + { + uint32_t comp_num = get<SPIRConstant>(comp).scalar(); + if (hlsl_options.shader_model >= 50) + { + switch (comp_num) + { + case 0: + texop += ".GatherRed"; + break; + case 1: + texop += ".GatherGreen"; + break; + case 2: + texop += ".GatherBlue"; + break; + case 3: + texop += ".GatherAlpha"; + break; + default: + SPIRV_CROSS_THROW("Invalid component."); + } + } + else + { + if (comp_num == 0) + texop += ".Gather"; + else + SPIRV_CROSS_THROW("HLSL shader model 4 can only gather from the red component."); + } + } + else if (bias) + texop += ".SampleBias"; + else if (grad_x || grad_y) + texop += ".SampleGrad"; + else if (lod) + texop += ".SampleLevel"; + else + texop += ".Sample"; + } + else + { + switch (imgtype.image.dim) + { + case Dim1D: + texop += "tex1D"; + break; + case Dim2D: + texop += "tex2D"; + break; + case Dim3D: + texop += "tex3D"; + break; + case DimCube: + texop += "texCUBE"; + break; + case DimRect: + case DimBuffer: + case DimSubpassData: + SPIRV_CROSS_THROW("Buffer texture support is not yet implemented for HLSL"); // TODO + default: + SPIRV_CROSS_THROW("Invalid dimension."); + } + + if (gather) + SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3."); + if (offset || coffset) + SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3."); + if (proj) + texop += "proj"; + if (grad_x || grad_y) + texop += "grad"; + if (lod) + texop += "lod"; + if (bias) + texop += "bias"; + } + } + + expr += texop; + expr += "("; + if (hlsl_options.shader_model < 40) + { + if (combined_image) + SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3."); + expr += to_expression(img); + } + else if (op != OpImageFetch) + { + string sampler_expr; + if (combined_image) + sampler_expr = to_expression(combined_image->sampler); + else + sampler_expr = to_sampler_expression(img); + expr += sampler_expr; + } + + auto swizzle = [](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; + + switch (comps) + { + case 1: + return ".x"; + case 2: + return ".xy"; + case 3: + return ".xyz"; + default: + return ""; + } + }; + + bool forward = should_forward(coord); + + // The IR can give us more components than we need, so chop them off as needed. + string coord_expr; + if (coord_components != expression_type(coord).vecsize) + coord_expr = to_enclosed_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize); + else + coord_expr = to_expression(coord); + + if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us. + coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components); + + if (hlsl_options.shader_model < 40 && lod) + { + auto &coordtype = expression_type(coord); + string coord_filler; + for (uint32_t size = coordtype.vecsize; size < 3; ++size) + { + coord_filler += ", 0.0"; + } + coord_expr = "float4(" + coord_expr + coord_filler + ", " + to_expression(lod) + ")"; + } + + if (hlsl_options.shader_model < 40 && bias) + { + auto &coordtype = expression_type(coord); + string coord_filler; + for (uint32_t size = coordtype.vecsize; size < 3; ++size) + { + coord_filler += ", 0.0"; + } + coord_expr = "float4(" + coord_expr + coord_filler + ", " + to_expression(bias) + ")"; + } + + if (op == OpImageFetch) + { + auto &coordtype = expression_type(coord); + if (imgtype.image.dim != DimBuffer && !imgtype.image.ms) + coord_expr = + join("int", coordtype.vecsize + 1, "(", coord_expr, ", ", lod ? to_expression(lod) : string("0"), ")"); + } + else + expr += ", "; + expr += coord_expr; + + if (dref) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("Legacy HLSL does not support comparison sampling."); + + forward = forward && should_forward(dref); + expr += ", "; + + if (proj) + expr += to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, coord_components); + else + expr += to_expression(dref); + } + + if (!dref && (grad_x || grad_y)) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + expr += ", "; + expr += to_expression(grad_x); + expr += ", "; + expr += to_expression(grad_y); + } + + if (!dref && lod && hlsl_options.shader_model >= 40 && op != OpImageFetch) + { + forward = forward && should_forward(lod); + expr += ", "; + expr += to_expression(lod); + } + + if (!dref && bias && hlsl_options.shader_model >= 40) + { + forward = forward && should_forward(bias); + expr += ", "; + expr += to_expression(bias); + } + + if (coffset) + { + forward = forward && should_forward(coffset); + expr += ", "; + expr += to_expression(coffset); + } + else if (offset) + { + forward = forward && should_forward(offset); + expr += ", "; + expr += to_expression(offset); + } + + if (sample) + { + expr += ", "; + expr += to_expression(sample); + } + + expr += ")"; + + if (op == OpImageQueryLod) + { + // This is rather awkward. + // textureQueryLod returns two values, the "accessed level", + // as well as the actual LOD lambda. + // As far as I can tell, there is no way to get the .x component + // according to GLSL spec, and it depends on the sampler itself. + // Just assume X == Y, so we will need to splat the result to a float2. + statement("float _", id, "_tmp = ", expr, ";"); + emit_op(result_type, id, join("float2(_", id, "_tmp, _", id, "_tmp)"), true, true); + } + else + { + emit_op(result_type, id, expr, forward, false); + } + + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageQueryLod: + register_control_dependent_expression(id); + break; + + default: + break; + } +} + +string CompilerHLSL::to_resource_binding(const SPIRVariable &var) +{ + // TODO: Basic implementation, might need special consideration for RW/RO structured buffers, + // RW/RO images, and so on. + + if (!has_decoration(var.self, DecorationBinding)) + return ""; + + const auto &type = get<SPIRType>(var.basetype); + char space = '\0'; + + switch (type.basetype) + { + case SPIRType::SampledImage: + space = 't'; // SRV + break; + + case SPIRType::Image: + if (type.image.sampled == 2 && type.image.dim != DimSubpassData) + space = 'u'; // UAV + else + space = 't'; // SRV + break; + + case SPIRType::Sampler: + space = 's'; + break; + + case SPIRType::Struct: + { + auto storage = type.storage; + if (storage == StorageClassUniform) + { + if (has_decoration(type.self, DecorationBufferBlock)) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable); + space = is_readonly ? 't' : 'u'; // UAV + } + else if (has_decoration(type.self, DecorationBlock)) + space = 'b'; // Constant buffers + } + else if (storage == StorageClassPushConstant) + space = 'b'; // Constant buffers + else if (storage == StorageClassStorageBuffer) + { + // UAV or SRV depending on readonly flag. + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable); + space = is_readonly ? 't' : 'u'; + } + + break; + } + default: + break; + } + + if (!space) + return ""; + + return to_resource_register(space, get_decoration(var.self, DecorationBinding), + get_decoration(var.self, DecorationDescriptorSet)); +} + +string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var) +{ + // For combined image samplers. + if (!has_decoration(var.self, DecorationBinding)) + return ""; + + return to_resource_register('s', get_decoration(var.self, DecorationBinding), + get_decoration(var.self, DecorationDescriptorSet)); +} + +string CompilerHLSL::to_resource_register(char space, uint32_t binding, uint32_t space_set) +{ + if (hlsl_options.shader_model >= 51) + return join(" : register(", space, binding, ", space", space_set, ")"); + else + return join(" : register(", space, binding, ")"); +} + +void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + switch (type.basetype) + { + case SPIRType::SampledImage: + case SPIRType::Image: + { + bool is_coherent = false; + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + is_coherent = has_decoration(var.self, DecorationCoherent); + + statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ", + to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + { + // For combined image samplers, also emit a combined image sampler. + if (image_is_comparison(type, var.self)) + statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type), + to_resource_binding_sampler(var), ";"); + else + statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type), + to_resource_binding_sampler(var), ";"); + } + break; + } + + case SPIRType::Sampler: + if (comparison_ids.count(var.self)) + statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), + ";"); + else + statement("SamplerState ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + break; + + default: + statement(variable_decl(var), to_resource_binding(var), ";"); + break; + } +} + +void CompilerHLSL::emit_legacy_uniform(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + switch (type.basetype) + { + case SPIRType::Sampler: + case SPIRType::Image: + SPIRV_CROSS_THROW("Separate image and samplers not supported in legacy HLSL."); + + default: + statement(variable_decl(var), ";"); + break; + } +} + +void CompilerHLSL::emit_uniform(const SPIRVariable &var) +{ + add_resource_name(var.self); + if (hlsl_options.shader_model >= 40) + emit_modern_uniform(var); + else + emit_legacy_uniform(var); +} + +string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + return "asuint"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + return "asint"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + return "asfloat"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + return "asfloat"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + SPIRV_CROSS_THROW("Double to Int64 is not supported in HLSL."); + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + SPIRV_CROSS_THROW("Double to UInt64 is not supported in HLSL."); + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "asdouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "asdouble"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile = true; + } + return "SPIRV_Cross_unpackFloat2x16"; + } + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile = true; + } + return "SPIRV_Cross_packFloat2x16"; + } + else + return ""; +} + +void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) +{ + auto op = static_cast<GLSLstd450>(eop); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "rsqrt"); + break; + + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "frac"); + break; + + case GLSLstd450RoundEven: + SPIRV_CROSS_THROW("roundEven is not supported on HLSL."); + + case GLSLstd450Acosh: + case GLSLstd450Asinh: + case GLSLstd450Atanh: + SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL."); + + case GLSLstd450FMix: + case GLSLstd450IMix: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp"); + break; + + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + break; + + case GLSLstd450Fma: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mad"); + break; + + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "EvaluateAttributeAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeSnapped"); + break; + + case GLSLstd450PackHalf2x16: + if (!requires_fp16_packing) + { + requires_fp16_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packHalf2x16"); + break; + + case GLSLstd450UnpackHalf2x16: + if (!requires_fp16_packing) + { + requires_fp16_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackHalf2x16"); + break; + + case GLSLstd450PackSnorm4x8: + if (!requires_snorm8_packing) + { + requires_snorm8_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm4x8"); + break; + + case GLSLstd450UnpackSnorm4x8: + if (!requires_snorm8_packing) + { + requires_snorm8_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm4x8"); + break; + + case GLSLstd450PackUnorm4x8: + if (!requires_unorm8_packing) + { + requires_unorm8_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm4x8"); + break; + + case GLSLstd450UnpackUnorm4x8: + if (!requires_unorm8_packing) + { + requires_unorm8_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm4x8"); + break; + + case GLSLstd450PackSnorm2x16: + if (!requires_snorm16_packing) + { + requires_snorm16_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packSnorm2x16"); + break; + + case GLSLstd450UnpackSnorm2x16: + if (!requires_snorm16_packing) + { + requires_snorm16_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackSnorm2x16"); + break; + + case GLSLstd450PackUnorm2x16: + if (!requires_unorm16_packing) + { + requires_unorm16_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_packUnorm2x16"); + break; + + case GLSLstd450UnpackUnorm2x16: + if (!requires_unorm16_packing) + { + requires_unorm16_packing = true; + force_recompile = true; + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_unpackUnorm2x16"); + break; + + case GLSLstd450PackDouble2x32: + case GLSLstd450UnpackDouble2x32: + SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL."); + + case GLSLstd450FindILsb: + emit_unary_func_op(result_type, id, args[0], "firstbitlow"); + break; + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", uint_type, uint_type); + break; + + case GLSLstd450MatrixInverse: + { + auto &type = get<SPIRType>(result_type); + if (type.vecsize == 2 && type.columns == 2) + { + if (!requires_inverse_2x2) + { + requires_inverse_2x2 = true; + force_recompile = true; + } + } + else if (type.vecsize == 3 && type.columns == 3) + { + if (!requires_inverse_3x3) + { + requires_inverse_3x3 = true; + force_recompile = true; + } + } + else if (type.vecsize == 4 && type.columns == 4) + { + if (!requires_inverse_4x4) + { + requires_inverse_4x4 = true; + force_recompile = true; + } + } + emit_unary_func_op(result_type, id, args[0], "SPIRV_Cross_Inverse"); + break; + } + + default: + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } +} + +string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain) +{ + auto &type = get<SPIRType>(chain.basetype); + + SPIRType target_type; + target_type.basetype = SPIRType::UInt; + target_type.vecsize = type.vecsize; + target_type.columns = type.columns; + + if (type.basetype == SPIRType::Struct) + SPIRV_CROSS_THROW("Reading structs from ByteAddressBuffer not yet supported."); + + if (type.width != 32) + SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported."); + + if (!type.array.empty()) + SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported."); + + string load_expr; + + // Load a vector or scalar. + if (type.columns == 1 && !chain.row_major_matrix) + { + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + load_expr = join(chain.base, ".", load_op, "(", chain.dynamic_index, chain.static_index, ")"); + } + else if (type.columns == 1) + { + // Strided load since we are loading a column from a row-major matrix. + if (type.vecsize > 1) + { + load_expr = type_to_glsl(target_type); + load_expr += "("; + } + + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += + join(chain.base, ".Load(", chain.dynamic_index, chain.static_index + r * chain.matrix_stride, ")"); + if (r + 1 < type.vecsize) + load_expr += ", "; + } + + if (type.vecsize > 1) + load_expr += ")"; + } + else if (!chain.row_major_matrix) + { + // Load a matrix, column-major, the easy case. + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend, + // so row-major is technically column-major ... + load_expr = type_to_glsl(target_type); + load_expr += "("; + for (uint32_t c = 0; c < type.columns; c++) + { + load_expr += join(chain.base, ".", load_op, "(", chain.dynamic_index, + chain.static_index + c * chain.matrix_stride, ")"); + if (c + 1 < type.columns) + load_expr += ", "; + } + load_expr += ")"; + } + else + { + // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern + // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ... + + load_expr = type_to_glsl(target_type); + load_expr += "("; + for (uint32_t c = 0; c < type.columns; c++) + { + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += join(chain.base, ".Load(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")"); + + if ((r + 1 < type.vecsize) || (c + 1 < type.columns)) + load_expr += ", "; + } + } + load_expr += ")"; + } + + auto bitcast_op = bitcast_glsl_op(type, target_type); + if (!bitcast_op.empty()) + load_expr = join(bitcast_op, "(", load_expr, ")"); + + return load_expr; +} + +void CompilerHLSL::emit_load(const Instruction &instruction) +{ + auto ops = stream(instruction); + + auto *chain = maybe_get<SPIRAccessChain>(ops[2]); + if (chain) + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + auto load_expr = read_access_chain(*chain); + + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + if (!forward) + track_expression_read(chain->self); + + // Do not forward complex load sequences like matrices, structs and arrays. + auto &type = get<SPIRType>(result_type); + if (type.columns > 1 || !type.array.empty() || type.basetype == SPIRType::Struct) + forward = false; + + auto &e = emit_op(result_type, id, load_expr, forward, true); + e.need_transpose = false; + register_read(id, ptr, forward); + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(e, chain->self); + } + else + CompilerGLSL::emit_instruction(instruction); +} + +void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value) +{ + auto &type = get<SPIRType>(chain.basetype); + + // Make sure we trigger a read of the constituents in the access chain. + track_expression_read(chain.self); + + SPIRType target_type; + target_type.basetype = SPIRType::UInt; + target_type.vecsize = type.vecsize; + target_type.columns = type.columns; + + if (type.basetype == SPIRType::Struct) + SPIRV_CROSS_THROW("Writing structs to RWByteAddressBuffer not yet supported."); + if (type.width != 32) + SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported."); + if (!type.array.empty()) + SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported."); + + if (type.columns == 1 && !chain.row_major_matrix) + { + const char *store_op = nullptr; + switch (type.vecsize) + { + case 1: + store_op = "Store"; + break; + case 2: + store_op = "Store2"; + break; + case 3: + store_op = "Store3"; + break; + case 4: + store_op = "Store4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + auto store_expr = to_expression(value); + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index, ", ", store_expr, ");"); + } + else if (type.columns == 1) + { + // Strided store. + for (uint32_t r = 0; r < type.vecsize; r++) + { + auto store_expr = to_enclosed_expression(value); + if (type.vecsize > 1) + { + store_expr += "."; + store_expr += index_to_swizzle(r); + } + remove_duplicate_swizzle(store_expr); + + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".Store(", chain.dynamic_index, chain.static_index + chain.matrix_stride * r, ", ", + store_expr, ");"); + } + } + else if (!chain.row_major_matrix) + { + const char *store_op = nullptr; + switch (type.vecsize) + { + case 1: + store_op = "Store"; + break; + case 2: + store_op = "Store2"; + break; + case 3: + store_op = "Store3"; + break; + case 4: + store_op = "Store4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } + + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = join(to_enclosed_expression(value), "[", c, "]"); + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".", store_op, "(", chain.dynamic_index, chain.static_index + c * chain.matrix_stride, + ", ", store_expr, ");"); + } + } + else + { + for (uint32_t r = 0; r < type.vecsize; r++) + { + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = join(to_enclosed_expression(value), "[", c, "].", index_to_swizzle(r)); + remove_duplicate_swizzle(store_expr); + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(chain.base, ".Store(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");"); + } + } + } + + register_write(chain.self); +} + +void CompilerHLSL::emit_store(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto *chain = maybe_get<SPIRAccessChain>(ops[0]); + if (chain) + write_access_chain(*chain, ops[1]); + else + CompilerGLSL::emit_instruction(instruction); +} + +void CompilerHLSL::emit_access_chain(const Instruction &instruction) +{ + auto ops = stream(instruction); + uint32_t length = instruction.length; + + bool need_byte_access_chain = false; + auto &type = expression_type(ops[2]); + const auto *chain = maybe_get<SPIRAccessChain>(ops[2]); + + if (chain) + { + // Keep tacking on an existing access chain. + need_byte_access_chain = true; + } + else if (type.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) + { + // If we are starting to poke into an SSBO, we are dealing with ByteAddressBuffers, and we need + // to emit SPIRAccessChain rather than a plain SPIRExpression. + uint32_t chain_arguments = length - 3; + if (chain_arguments > type.array.size()) + need_byte_access_chain = true; + } + + if (need_byte_access_chain) + { + uint32_t to_plain_buffer_length = static_cast<uint32_t>(type.array.size()); + auto *backing_variable = maybe_get_backing_variable(ops[2]); + + string base; + if (to_plain_buffer_length != 0) + base = access_chain(ops[2], &ops[3], to_plain_buffer_length, get<SPIRType>(ops[0])); + else if (chain) + base = chain->base; + else + base = to_expression(ops[2]); + + // Start traversing type hierarchy at the proper non-pointer types. + auto *basetype = &get_pointee_type(type); + + // Traverse the type hierarchy down to the actual buffer types. + for (uint32_t i = 0; i < to_plain_buffer_length; i++) + { + assert(basetype->parent_type); + basetype = &get<SPIRType>(basetype->parent_type); + } + + uint32_t matrix_stride = 0; + bool row_major_matrix = false; + + // Inherit matrix information. + if (chain) + { + matrix_stride = chain->matrix_stride; + row_major_matrix = chain->row_major_matrix; + } + + auto offsets = + flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length], + length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, &matrix_stride); + + auto &e = set<SPIRAccessChain>(ops[1], ops[0], type.storage, base, offsets.first, offsets.second); + e.row_major_matrix = row_major_matrix; + e.matrix_stride = matrix_stride; + e.immutable = should_forward(ops[2]); + e.loaded_from = backing_variable ? backing_variable->self : 0; + + if (chain) + { + e.dynamic_index += chain->dynamic_index; + e.static_index += chain->static_index; + } + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(e, ops[i]); + } + } + else + { + CompilerGLSL::emit_instruction(instruction); + } +} + +void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) +{ + const char *atomic_op = nullptr; + + string value_expr; + if (op != OpAtomicIDecrement && op != OpAtomicIIncrement) + value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); + + switch (op) + { + case OpAtomicIIncrement: + atomic_op = "InterlockedAdd"; + value_expr = "1"; + break; + + case OpAtomicIDecrement: + atomic_op = "InterlockedAdd"; + value_expr = "-1"; + break; + + case OpAtomicISub: + atomic_op = "InterlockedAdd"; + value_expr = join("-", enclose_expression(value_expr)); + break; + + case OpAtomicSMin: + case OpAtomicUMin: + atomic_op = "InterlockedMin"; + break; + + case OpAtomicSMax: + case OpAtomicUMax: + atomic_op = "InterlockedMax"; + break; + + case OpAtomicAnd: + atomic_op = "InterlockedAnd"; + break; + + case OpAtomicOr: + atomic_op = "InterlockedOr"; + break; + + case OpAtomicXor: + atomic_op = "InterlockedXor"; + break; + + case OpAtomicIAdd: + atomic_op = "InterlockedAdd"; + break; + + case OpAtomicExchange: + atomic_op = "InterlockedExchange"; + break; + + case OpAtomicCompareExchange: + if (length < 8) + SPIRV_CROSS_THROW("Not enough data for opcode."); + atomic_op = "InterlockedCompareExchange"; + value_expr = join(to_expression(ops[7]), ", ", value_expr); + break; + + default: + SPIRV_CROSS_THROW("Unknown atomic opcode."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + forced_temporaries.insert(ops[1]); + + auto &type = get<SPIRType>(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + auto &data_type = expression_type(ops[2]); + auto *chain = maybe_get<SPIRAccessChain>(ops[2]); + SPIRType::BaseType expr_type; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + expr_type = data_type.basetype; + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + expr_type = SPIRType::UInt; + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ", + to_name(id), ");"); + } + + auto expr = bitcast_expression(type, expr_type, to_name(id)); + set<SPIRExpression>(id, expr, result_type, true); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); +} + +void CompilerHLSL::emit_subgroup_op(const Instruction &i) +{ + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher."); + + const uint32_t *ops = stream(i); + auto op = static_cast<Op>(i.op); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar()); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + const auto make_inclusive_Sum = [&](const string &expr) -> string { + return join(expr, " + ", to_expression(ops[4])); + }; + + const auto make_inclusive_Product = [&](const string &expr) -> string { + return join(expr, " * ", to_expression(ops[4])); + }; + +#define make_inclusive_BitAnd(expr) "" +#define make_inclusive_BitOr(expr) "" +#define make_inclusive_BitXor(expr) "" +#define make_inclusive_Min(expr) "" +#define make_inclusive_Max(expr) "" + + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "WaveIsFirstLane()", true); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "WaveReadLaneFirst"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveBallot"); + break; + + case OpGroupNonUniformInverseBallot: + SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL."); + break; + + case OpGroupNonUniformBallotBitExtract: + SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL."); + break; + + case OpGroupNonUniformBallotFindLSB: + SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL."); + break; + + case OpGroupNonUniformBallotFindMSB: + SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL."); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationReduce) + { + bool forward = should_forward(ops[4]); + auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(", + to_enclosed_expression(ops[4]), ".y)"); + auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(", + to_enclosed_expression(ops[4]), ".w)"); + emit_op(result_type, id, join(left, " + ", right), forward); + inherit_expression_dependencies(id, ops[4]); + } + else if (operation == GroupOperationInclusiveScan) + SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Inclusive Scan in HLSL."); + else if (operation == GroupOperationExclusiveScan) + SPIRV_CROSS_THROW("Cannot trivially implement BallotBitCount Exclusive Scan in HLSL."); + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + + case OpGroupNonUniformShuffle: + SPIRV_CROSS_THROW("Cannot trivially implement Shuffle in HLSL."); + case OpGroupNonUniformShuffleXor: + SPIRV_CROSS_THROW("Cannot trivially implement ShuffleXor in HLSL."); + case OpGroupNonUniformShuffleUp: + SPIRV_CROSS_THROW("Cannot trivially implement ShuffleUp in HLSL."); + case OpGroupNonUniformShuffleDown: + SPIRV_CROSS_THROW("Cannot trivially implement ShuffleDown in HLSL."); + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAnyTrue"); + break; + + case OpGroupNonUniformAllEqual: + { + auto &type = get<SPIRType>(result_type); + emit_unary_func_op(result_type, id, ops[3], + type.basetype == SPIRType::Boolean ? "WaveActiveAllEqualBool" : "WaveActiveAllEqual"); + break; + } + + // clang-format off +#define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast<GroupOperation>(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \ + else if (operation == GroupOperationInclusiveScan && supports_scan) \ + { \ + bool forward = should_forward(ops[4]); \ + emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \ + inherit_expression_dependencies(id, ops[4]); \ + } \ + else if (operation == GroupOperationExclusiveScan && supports_scan) \ + emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + HLSL_GROUP_OP(FAdd, Sum, true) + HLSL_GROUP_OP(FMul, Product, true) + HLSL_GROUP_OP(FMin, Min, false) + HLSL_GROUP_OP(FMax, Max, false) + HLSL_GROUP_OP(IAdd, Sum, true) + HLSL_GROUP_OP(IMul, Product, true) + HLSL_GROUP_OP(SMin, Min, false) + HLSL_GROUP_OP(SMax, Max, false) + HLSL_GROUP_OP(UMin, Min, false) + HLSL_GROUP_OP(UMax, Max, false) + HLSL_GROUP_OP(BitwiseAnd, BitAnd, false) + HLSL_GROUP_OP(BitwiseOr, BitOr, false) + HLSL_GROUP_OP(BitwiseXor, BitXor, false) +#undef HLSL_GROUP_OP + // clang-format on + + case OpGroupNonUniformQuadSwap: + { + uint32_t direction = get<SPIRConstant>(ops[4]).scalar(); + if (direction == 0) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX"); + else if (direction == 1) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossY"); + else if (direction == 2) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossDiagonal"); + else + SPIRV_CROSS_THROW("Invalid quad swap direction."); + break; + } + + case OpGroupNonUniformQuadBroadcast: + { + emit_binary_func_op(result_type, id, ops[3], ops[4], "QuadReadLaneAt"); + break; + } + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + +void CompilerHLSL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast<Op>(instruction.op); + +#define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define HLSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define HLSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define HLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + + switch (opcode) + { + case OpAccessChain: + case OpInBoundsAccessChain: + { + emit_access_chain(instruction); + break; + } + + case OpStore: + { + emit_store(instruction); + break; + } + + case OpLoad: + { + emit_load(instruction); + break; + } + + case OpMatrixTimesVector: + { + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpVectorTimesMatrix: + { + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpMatrixTimesMatrix: + { + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpFMod: + { + if (!requires_op_fmod) + { + requires_op_fmod = true; + force_recompile = true; + } + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpFRem: + emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod"); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *combined = maybe_get<SPIRCombinedImageSampler>(ops[2]); + + if (combined) + { + auto &e = emit_op(result_type, id, to_expression(combined->image), true, true); + auto *var = maybe_get_backing_variable(combined->image); + if (var) + e.loaded_from = var->self; + } + else + { + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + e.loaded_from = var->self; + } + break; + } + + case OpDPdx: + HLSL_UFOP(ddx); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + HLSL_UFOP(ddy); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxFine: + HLSL_UFOP(ddx_fine); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyFine: + HLSL_UFOP(ddy_fine); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxCoarse: + HLSL_UFOP(ddx_coarse); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyCoarse: + HLSL_UFOP(ddy_coarse); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + HLSL_UFOP(fwidth); + register_control_dependent_expression(ops[1]); + break; + + case OpLogicalNot: + { + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get<SPIRType>(result_type); + + if (type.vecsize > 1) + emit_unrolled_unary_op(result_type, id, ops[2], "!"); + else + HLSL_UOP(!); + break; + } + + case OpIEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + else + HLSL_BOP_CAST(==, int_type); + break; + } + + case OpLogicalEqual: + case OpFOrdEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "=="); + else + HLSL_BOP(==); + break; + } + + case OpINotEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + else + HLSL_BOP_CAST(!=, int_type); + break; + } + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!="); + else + HLSL_BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + else + HLSL_BOP_CAST(>, type); + break; + } + + case OpFOrdGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">"); + else + HLSL_BOP(>); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + else + HLSL_BOP_CAST(>=, type); + break; + } + + case OpFOrdGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">="); + else + HLSL_BOP(>=); + break; + } + + case OpULessThan: + case OpSLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + else + HLSL_BOP_CAST(<, type); + break; + } + + case OpFOrdLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<"); + else + HLSL_BOP(<); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + else + HLSL_BOP_CAST(<=, type); + break; + } + + case OpFOrdLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<="); + else + HLSL_BOP(<=); + break; + } + + case OpImageQueryLod: + emit_texture_op(instruction); + break; + + case OpImageQuerySizeLod: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(expression_type(ops[2])); + + auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); + statement("uint ", dummy_samples_levels, ";"); + + auto expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", ", + bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")"); + + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySize: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(expression_type(ops[2])); + + auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); + statement("uint ", dummy_samples_levels, ";"); + + auto expr = join("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySamples: + case OpImageQueryLevels: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(expression_type(ops[2])); + + // Keep it simple and do not emit special variants to make this look nicer ... + // This stuff is barely, if ever, used. + forced_temporaries.insert(id); + auto &type = get<SPIRType>(result_type); + statement(variable_decl(type, to_name(id)), ";"); + statement("SPIRV_Cross_textureSize(", to_expression(ops[2]), ", 0u, ", to_name(id), ");"); + + auto &restype = get<SPIRType>(ops[0]); + auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id)); + set<SPIRExpression>(id, expr, result_type, true); + break; + } + + case OpImageRead: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *var = maybe_get_backing_variable(ops[2]); + auto &type = expression_type(ops[2]); + bool subpass_data = type.image.dim == DimSubpassData; + bool pure = false; + + string imgexpr; + + if (subpass_data) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3."); + + // Similar to GLSL, implement subpass loads using texelFetch. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || instruction.length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); + uint32_t sample = ops[5]; + imgexpr = join(to_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); + } + else + imgexpr = join(to_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); + + pure = true; + } + else + { + imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"); + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + if (var && !subpass_data) + imgexpr = remap_swizzle(get<SPIRType>(result_type), + image_format_to_components(get<SPIRType>(var->basetype).image.format), imgexpr); + } + + if (var && var->forwardable) + { + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); + + if (!pure) + { + e.loaded_from = var->self; + if (forward) + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false); + + inherit_expression_dependencies(id, ops[2]); + if (type.image.ms) + inherit_expression_dependencies(id, ops[5]); + break; + } + + case OpImageWrite: + { + auto *var = maybe_get_backing_variable(ops[0]); + + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + auto value_expr = to_expression(ops[2]); + if (var) + { + auto &type = get<SPIRType>(var->basetype); + auto narrowed_type = get<SPIRType>(type.image.type); + narrowed_type.vecsize = image_format_to_components(type.image.format); + value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); + } + + statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = + set<SPIRExpression>(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpAtomicCompareExchange: + case OpAtomicExchange: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicIAdd: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + { + emit_atomic(ops, instruction.length, opcode); + break; + } + + case OpControlBarrier: + case OpMemoryBarrier: + { + uint32_t memory; + uint32_t semantics; + + if (opcode == OpMemoryBarrier) + { + memory = get<SPIRConstant>(ops[0]).scalar(); + semantics = get<SPIRConstant>(ops[1]).scalar(); + } + else + { + memory = get<SPIRConstant>(ops[1]).scalar(); + semantics = get<SPIRConstant>(ops[2]).scalar(); + } + + if (memory == ScopeSubgroup) + { + // No Wave-barriers in HLSL. + break; + } + + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar(); + uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar(); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + // There is no "just execution barrier" in HLSL. + // If there are no memory semantics for next instruction, we will imply group shared memory is synced. + if (next_semantics == 0) + next_semantics = MemorySemanticsWorkgroupMemoryMask; + + bool memory_scope_covered = false; + if (next_memory == memory) + memory_scope_covered = true; + else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) + { + // If we only care about workgroup memory, either Device or Workgroup scope is fine, + // scope does not have to match. + if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && + (memory == ScopeDevice || memory == ScopeWorkgroup)) + { + memory_scope_covered = true; + } + } + else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) + { + // The control barrier has device scope, but the memory barrier just has workgroup scope. + memory_scope_covered = true; + } + + // If we have the same memory scope, and all memory types are covered, we're good. + if (memory_scope_covered && (semantics & next_semantics) == semantics) + break; + } + } + + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) + { + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + } + + if (opcode == OpControlBarrier) + { + // We cannot emit just execution barrier, for no memory semantics pick the cheapest option. + if (semantics == MemorySemanticsWorkgroupMemoryMask || semantics == 0) + statement("GroupMemoryBarrierWithGroupSync();"); + else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) + statement("DeviceMemoryBarrierWithGroupSync();"); + else + statement("AllMemoryBarrierWithGroupSync();"); + } + else + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + statement("GroupMemoryBarrier();"); + else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) + statement("DeviceMemoryBarrier();"); + else + statement("AllMemoryBarrier();"); + } + break; + } + + case OpBitFieldInsert: + { + if (!requires_bitfield_insert) + { + requires_bitfield_insert = true; + force_recompile = true; + } + + auto expr = join("SPIRV_Cross_bitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + to_expression(ops[4]), ", ", to_expression(ops[5]), ")"); + + bool forward = + should_forward(ops[2]) && should_forward(ops[3]) && should_forward(ops[4]) && should_forward(ops[5]); + + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(ops[0], ops[1], expr, forward); + break; + } + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + { + if (!requires_bitfield_extract) + { + requires_bitfield_extract = true; + force_recompile = true; + } + + if (opcode == OpBitFieldSExtract) + HLSL_TFOP(SPIRV_Cross_bitfieldSExtract); + else + HLSL_TFOP(SPIRV_Cross_bitfieldUExtract); + break; + } + + case OpBitCount: + HLSL_UFOP(countbits); + break; + + case OpBitReverse: + HLSL_UFOP(reversebits); + break; + + default: + CompilerGLSL::emit_instruction(instruction); + break; + } +} + +void CompilerHLSL::require_texture_query_variant(const SPIRType &type) +{ + uint32_t bit = 0; + switch (type.image.dim) + { + case Dim1D: + bit = type.image.arrayed ? Query1DArray : Query1D; + break; + + case Dim2D: + if (type.image.ms) + bit = type.image.arrayed ? Query2DMSArray : Query2DMS; + else + bit = type.image.arrayed ? Query2DArray : Query2D; + break; + + case Dim3D: + bit = Query3D; + break; + + case DimCube: + bit = type.image.arrayed ? QueryCubeArray : QueryCube; + break; + + case DimBuffer: + bit = QueryBuffer; + break; + + default: + SPIRV_CROSS_THROW("Unsupported query type."); + } + + switch (get<SPIRType>(type.image.type).basetype) + { + case SPIRType::Float: + bit += QueryTypeFloat; + break; + + case SPIRType::Int: + bit += QueryTypeInt; + break; + + case SPIRType::UInt: + bit += QueryTypeUInt; + break; + + default: + SPIRV_CROSS_THROW("Unsupported query type."); + } + + uint64_t mask = 1ull << bit; + if ((required_textureSizeVariants & mask) == 0) + { + force_recompile = true; + required_textureSizeVariants |= mask; + } +} + +void CompilerHLSL::set_root_constant_layouts(vector<RootConstants> layout) +{ + root_constants_layout = move(layout); +} + +void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes) +{ + remap_vertex_attributes.push_back(vertex_attributes); +} + +uint32_t CompilerHLSL::remap_num_workgroups_builtin() +{ + update_active_builtins(); + + if (!active_input_builtins.get(BuiltInNumWorkgroups)) + return 0; + + // Create a new, fake UBO. + uint32_t offset = ir.increase_bound_by(4); + + uint32_t uint_type_id = offset; + uint32_t block_type_id = offset + 1; + uint32_t block_pointer_type_id = offset + 2; + uint32_t variable_id = offset + 3; + + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + uint_type.vecsize = 3; + uint_type.columns = 1; + set<SPIRType>(uint_type_id, uint_type); + + SPIRType block_type; + block_type.basetype = SPIRType::Struct; + block_type.member_types.push_back(uint_type_id); + set<SPIRType>(block_type_id, block_type); + set_decoration(block_type_id, DecorationBlock); + set_member_name(block_type_id, 0, "count"); + set_member_decoration(block_type_id, 0, DecorationOffset, 0); + + SPIRType block_pointer_type = block_type; + block_pointer_type.pointer = true; + block_pointer_type.storage = StorageClassUniform; + block_pointer_type.parent_type = block_type_id; + auto &ptr_type = set<SPIRType>(block_pointer_type_id, block_pointer_type); + + // Preserve self. + ptr_type.self = block_type_id; + + set<SPIRVariable>(variable_id, block_pointer_type_id, StorageClassUniform); + ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups"; + + num_workgroups_builtin = variable_id; + return variable_id; +} + +string CompilerHLSL::compile() +{ + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 450; + options.vulkan_semantics = true; + backend.float_literal_suffix = true; + backend.double_literal_suffix = false; + backend.long_long_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.int16_t_literal_suffix = ""; + backend.uint16_t_literal_suffix = "u"; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.swizzle_is_function = false; + backend.shared_is_implied = true; + backend.flexible_member_array_supported = false; + backend.explicit_struct_type = false; + backend.use_initializer_list = true; + backend.use_constructor_splatting = false; + backend.boolean_mix_support = false; + backend.can_swizzle_scalar = true; + backend.can_declare_struct_inline = false; + backend.can_declare_arrays_inline = false; + backend.can_return_array = false; + + build_function_control_flow_graphs_and_analyze(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + + // Subpass input needs SV_Position. + if (need_subpass_input) + active_input_builtins.set(BuiltInFragCoord); + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr<ostringstream>(new ostringstream()); + + emit_header(); + emit_resources(); + + emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset()); + emit_hlsl_entry_point(); + + pass_count++; + } while (force_recompile); + + // Entry point in HLSL is always main() for the time being. + get_entry_point().name = "main"; + + return buffer->str(); +} + +void CompilerHLSL::emit_block_hints(const SPIRBlock &block) +{ + switch (block.hint) + { + case SPIRBlock::HintFlatten: + statement("[flatten]"); + break; + case SPIRBlock::HintDontFlatten: + statement("[branch]"); + break; + case SPIRBlock::HintUnroll: + statement("[unroll]"); + break; + case SPIRBlock::HintDontUnroll: + statement("[loop]"); + break; + default: + break; + } +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp new file mode 100644 index 0000000..12b8ae1 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp @@ -0,0 +1,227 @@ +/* + * Copyright 2016-2019 Robert Konrad + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_HLSL_HPP +#define SPIRV_HLSL_HPP + +#include "spirv_glsl.hpp" +#include <utility> +#include <vector> + +namespace spirv_cross +{ +// Interface which remaps vertex inputs to a fixed semantic name to make linking easier. +struct HLSLVertexAttributeRemap +{ + uint32_t location; + std::string semantic; +}; +// Specifying a root constant (d3d12) or push constant range (vulkan). +// +// `start` and `end` denotes the range of the root constant in bytes. +// Both values need to be multiple of 4. +struct RootConstants +{ + uint32_t start; + uint32_t end; + + uint32_t binding; + uint32_t space; +}; + +class CompilerHLSL : public CompilerGLSL +{ +public: + struct Options + { + uint32_t shader_model = 30; // TODO: map ps_4_0_level_9_0,... somehow + + // Allows the PointSize builtin, and ignores it, as PointSize is not supported in HLSL. + bool point_size_compat = false; + + // Allows the PointCoord builtin, returns float2(0.5, 0.5), as PointCoord is not supported in HLSL. + bool point_coord_compat = false; + + // If true, the backend will assume that VertexIndex and InstanceIndex will need to apply + // a base offset, and you will need to fill in a cbuffer with offsets. + // Set to false if you know you will never use base instance or base vertex + // functionality as it might remove an internal cbuffer. + bool support_nonzero_base_vertex_base_instance = false; + }; + + explicit CompilerHLSL(std::vector<uint32_t> spirv_) + : CompilerGLSL(move(spirv_)) + { + } + + CompilerHLSL(const uint32_t *ir_, size_t size) + : CompilerGLSL(ir_, size) + { + } + + explicit CompilerHLSL(const ParsedIR &ir_) + : CompilerGLSL(ir_) + { + } + + explicit CompilerHLSL(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) + { + } + + const Options &get_hlsl_options() const + { + return hlsl_options; + } + + void set_hlsl_options(const Options &opts) + { + hlsl_options = opts; + } + + // Optionally specify a custom root constant layout. + // + // Push constants ranges will be split up according to the + // layout specified. + void set_root_constant_layouts(std::vector<RootConstants> layout); + + // Compiles and remaps vertex attributes at specific locations to a fixed semantic. + // The default is TEXCOORD# where # denotes location. + // Matrices are unrolled to vectors with notation ${SEMANTIC}_#, where # denotes row. + // $SEMANTIC is either TEXCOORD# or a semantic name specified here. + void add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes); + std::string compile() override; + + // This is a special HLSL workaround for the NumWorkGroups builtin. + // This does not exist in HLSL, so the calling application must create a dummy cbuffer in + // which the application will store this builtin. + // The cbuffer layout will be: + // cbuffer SPIRV_Cross_NumWorkgroups : register(b#, space#) { uint3 SPIRV_Cross_NumWorkgroups_count; }; + // This must be called before compile(). + // The function returns 0 if NumWorkGroups builtin is not statically used in the shader from the current entry point. + // If non-zero, this returns the variable ID of a cbuffer which corresponds to + // the cbuffer declared above. By default, no binding or descriptor set decoration is set, + // so the calling application should declare explicit bindings on this ID before calling compile(). + uint32_t remap_num_workgroups_builtin(); + +private: + std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; + std::string image_type_hlsl(const SPIRType &type, uint32_t id); + std::string image_type_hlsl_modern(const SPIRType &type, uint32_t id); + std::string image_type_hlsl_legacy(const SPIRType &type, uint32_t id); + void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; + void emit_hlsl_entry_point(); + void emit_header() override; + void emit_resources(); + void emit_interface_block_globally(const SPIRVariable &type); + void emit_interface_block_in_struct(const SPIRVariable &type, std::unordered_set<uint32_t> &active_locations); + void emit_builtin_inputs_in_struct(); + void emit_builtin_outputs_in_struct(); + void emit_texture_op(const Instruction &i) override; + void emit_instruction(const Instruction &instruction) override; + void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count) override; + void emit_buffer_block(const SPIRVariable &type) override; + void emit_push_constant_block(const SPIRVariable &var) override; + void emit_uniform(const SPIRVariable &var) override; + void emit_modern_uniform(const SPIRVariable &var); + void emit_legacy_uniform(const SPIRVariable &var); + void emit_specialization_constants_and_structs(); + void emit_composite_constants(); + void emit_fixup() override; + std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; + std::string layout_for_member(const SPIRType &type, uint32_t index) override; + std::string to_interpolation_qualifiers(const Bitset &flags) override; + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; + std::string to_func_call_arg(uint32_t id) override; + std::string to_sampler_expression(uint32_t id); + std::string to_resource_binding(const SPIRVariable &var); + std::string to_resource_binding_sampler(const SPIRVariable &var); + std::string to_resource_register(char space, uint32_t binding, uint32_t set); + void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; + void emit_access_chain(const Instruction &instruction); + void emit_load(const Instruction &instruction); + std::string read_access_chain(const SPIRAccessChain &chain); + void write_access_chain(const SPIRAccessChain &chain, uint32_t value); + void emit_store(const Instruction &instruction); + void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op); + void emit_subgroup_op(const Instruction &i) override; + void emit_block_hints(const SPIRBlock &block) override; + + void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier, + uint32_t base_offset = 0) override; + + const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override; + void replace_illegal_names() override; + + Options hlsl_options; + bool requires_op_fmod = false; + bool requires_fp16_packing = false; + bool requires_explicit_fp16_packing = false; + bool requires_unorm8_packing = false; + bool requires_snorm8_packing = false; + bool requires_unorm16_packing = false; + bool requires_snorm16_packing = false; + bool requires_bitfield_insert = false; + bool requires_bitfield_extract = false; + bool requires_inverse_2x2 = false; + bool requires_inverse_3x3 = false; + bool requires_inverse_4x4 = false; + uint64_t required_textureSizeVariants = 0; + void require_texture_query_variant(const SPIRType &type); + + enum TextureQueryVariantDim + { + Query1D = 0, + Query1DArray, + Query2D, + Query2DArray, + Query3D, + QueryBuffer, + QueryCube, + QueryCubeArray, + Query2DMS, + Query2DMSArray, + QueryDimCount + }; + + enum TextureQueryVariantType + { + QueryTypeFloat = 0, + QueryTypeInt = 16, + QueryTypeUInt = 32, + QueryTypeCount = 3 + }; + + void emit_builtin_variables(); + bool require_output = false; + bool require_input = false; + std::vector<HLSLVertexAttributeRemap> remap_vertex_attributes; + + uint32_t type_to_consumed_locations(const SPIRType &type) const; + + void emit_io_block(const SPIRVariable &var); + std::string to_semantic(uint32_t vertex_location); + + uint32_t num_workgroups_builtin = 0; + + // Custom root constant layout, which should be emitted + // when translating push constant ranges. + std::vector<RootConstants> root_constants_layout; +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp new file mode 100644 index 0000000..41a3aaa --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp @@ -0,0 +1,7715 @@ +/* + * Copyright 2016-2019 The Brenwill Workshop Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_msl.hpp" +#include "GLSL.std.450.h" + +#include <algorithm> +#include <assert.h> +#include <numeric> + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +static const uint32_t k_unknown_location = ~0u; +static const uint32_t k_unknown_component = ~0u; + +static const uint32_t k_aux_mbr_idx_swizzle_const = 0u; + +CompilerMSL::CompilerMSL(vector<uint32_t> spirv_) + : CompilerGLSL(move(spirv_)) +{ +} + +CompilerMSL::CompilerMSL(const uint32_t *ir_, size_t word_count) + : CompilerGLSL(ir_, word_count) +{ +} + +CompilerMSL::CompilerMSL(const ParsedIR &ir_) + : CompilerGLSL(ir_) +{ +} + +CompilerMSL::CompilerMSL(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) +{ +} + +void CompilerMSL::add_msl_vertex_attribute(const MSLVertexAttr &va) +{ + vtx_attrs_by_location[va.location] = va; + if (va.builtin != BuiltInMax && !vtx_attrs_by_builtin.count(va.builtin)) + vtx_attrs_by_builtin[va.builtin] = va; +} + +void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding) +{ + resource_bindings.push_back({ binding, false }); +} + +void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set) +{ + if (desc_set < kMaxArgumentBuffers) + argument_buffer_discrete_mask |= 1u << desc_set; +} + +bool CompilerMSL::is_msl_vertex_attribute_used(uint32_t location) +{ + return vtx_attrs_in_use.count(location) != 0; +} + +bool CompilerMSL::is_msl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) +{ + auto itr = find_if(begin(resource_bindings), end(resource_bindings), + [&](const std::pair<MSLResourceBinding, bool> &resource) -> bool { + return model == resource.first.stage && desc_set == resource.first.desc_set && + binding == resource.first.binding; + }); + return itr != end(resource_bindings) && itr->second; +} + +void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components) +{ + fragment_output_components[location] = components; +} + +void CompilerMSL::build_implicit_builtins() +{ + bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition); + bool need_vertex_params = capture_output_to_buffer && get_execution_model() == ExecutionModelVertex; + bool need_tesc_params = get_execution_model() == ExecutionModelTessellationControl; + if (need_subpass_input || need_sample_pos || need_vertex_params || need_tesc_params) + { + bool has_frag_coord = false; + bool has_sample_id = false; + bool has_vertex_idx = false; + bool has_base_vertex = false; + bool has_instance_idx = false; + bool has_base_instance = false; + bool has_invocation_id = false; + bool has_primitive_id = false; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin) + return; + + if (need_subpass_input && ir.meta[var.self].decoration.builtin_type == BuiltInFragCoord) + { + builtin_frag_coord_id = var.self; + has_frag_coord = true; + } + + if (need_sample_pos && ir.meta[var.self].decoration.builtin_type == BuiltInSampleId) + { + builtin_sample_id_id = var.self; + has_sample_id = true; + } + + if (need_vertex_params) + { + switch (ir.meta[var.self].decoration.builtin_type) + { + case BuiltInVertexIndex: + builtin_vertex_idx_id = var.self; + has_vertex_idx = true; + break; + case BuiltInBaseVertex: + builtin_base_vertex_id = var.self; + has_base_vertex = true; + break; + case BuiltInInstanceIndex: + builtin_instance_idx_id = var.self; + has_instance_idx = true; + break; + case BuiltInBaseInstance: + builtin_base_instance_id = var.self; + has_base_instance = true; + break; + default: + break; + } + } + + if (need_tesc_params) + { + switch (ir.meta[var.self].decoration.builtin_type) + { + case BuiltInInvocationId: + builtin_invocation_id_id = var.self; + has_invocation_id = true; + break; + case BuiltInPrimitiveId: + builtin_primitive_id_id = var.self; + has_primitive_id = true; + break; + default: + break; + } + } + }); + + if (!has_frag_coord && need_subpass_input) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_FragCoord. + SPIRType vec4_type; + vec4_type.basetype = SPIRType::Float; + vec4_type.width = 32; + vec4_type.vecsize = 4; + set<SPIRType>(type_id, vec4_type); + + SPIRType vec4_type_ptr; + vec4_type_ptr = vec4_type; + vec4_type_ptr.pointer = true; + vec4_type_ptr.parent_type = type_id; + vec4_type_ptr.storage = StorageClassInput; + auto &ptr_type = set<SPIRType>(type_ptr_id, vec4_type_ptr); + ptr_type.self = type_id; + + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInFragCoord); + builtin_frag_coord_id = var_id; + } + + if (!has_sample_id && need_sample_pos) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_SampleID. + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_ptr; + uint_type_ptr = uint_type; + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInSampleId); + builtin_sample_id_id = var_id; + } + + if (need_vertex_params && (!has_vertex_idx || !has_base_vertex || !has_instance_idx || !has_base_instance)) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_ptr; + uint_type_ptr = uint_type; + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + + if (!has_vertex_idx) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_VertexIndex. + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInVertexIndex); + builtin_vertex_idx_id = var_id; + } + if (!has_base_vertex) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_BaseVertex. + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInBaseVertex); + builtin_base_vertex_id = var_id; + } + if (!has_instance_idx) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_InstanceIndex. + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInInstanceIndex); + builtin_instance_idx_id = var_id; + } + if (!has_base_instance) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_BaseInstance. + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInBaseInstance); + builtin_base_instance_id = var_id; + } + } + + if (need_tesc_params && (!has_invocation_id || !has_primitive_id)) + { + uint32_t offset = ir.increase_bound_by(2); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_ptr; + uint_type_ptr = uint_type; + uint_type_ptr.pointer = true; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; + auto &ptr_type = set<SPIRType>(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + + if (!has_invocation_id) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_InvocationID. + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInInvocationId); + builtin_invocation_id_id = var_id; + } + if (!has_primitive_id) + { + uint32_t var_id = ir.increase_bound_by(1); + + // Create gl_PrimitiveID. + set<SPIRVariable>(var_id, type_ptr_id, StorageClassInput); + set_decoration(var_id, DecorationBuiltIn, BuiltInPrimitiveId); + builtin_primitive_id_id = var_id; + } + } + } + + if (needs_aux_buffer_def) + { + uint32_t offset = ir.increase_bound_by(5); + uint32_t type_id = offset; + uint32_t type_arr_id = offset + 1; + uint32_t struct_id = offset + 2; + uint32_t struct_ptr_id = offset + 3; + uint32_t var_id = offset + 4; + + // Create a buffer to hold extra data, including the swizzle constants. + SPIRType uint_type; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRType>(type_id, uint_type); + + SPIRType uint_type_arr = uint_type; + uint_type_arr.array.push_back(0); + uint_type_arr.array_size_literal.push_back(true); + uint_type_arr.parent_type = type_id; + set<SPIRType>(type_arr_id, uint_type_arr); + set_decoration(type_arr_id, DecorationArrayStride, 4); + + SPIRType struct_type; + struct_type.basetype = SPIRType::Struct; + struct_type.member_types.push_back(type_arr_id); + auto &type = set<SPIRType>(struct_id, struct_type); + type.self = struct_id; + set_decoration(struct_id, DecorationBlock); + set_name(struct_id, "spvAux"); + set_member_name(struct_id, k_aux_mbr_idx_swizzle_const, "swizzleConst"); + set_member_decoration(struct_id, k_aux_mbr_idx_swizzle_const, DecorationOffset, 0); + + SPIRType struct_type_ptr = struct_type; + struct_type_ptr.pointer = true; + struct_type_ptr.parent_type = struct_id; + struct_type_ptr.storage = StorageClassUniform; + auto &ptr_type = set<SPIRType>(struct_ptr_id, struct_type_ptr); + ptr_type.self = struct_id; + + set<SPIRVariable>(var_id, struct_ptr_id, StorageClassUniform); + set_name(var_id, "spvAuxBuffer"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, 0xFFFFFFFE); + set_decoration(var_id, DecorationBinding, msl_options.aux_buffer_index); + aux_buffer_id = var_id; + } +} + +static string create_sampler_address(const char *prefix, MSLSamplerAddress addr) +{ + switch (addr) + { + case MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE: + return join(prefix, "address::clamp_to_edge"); + case MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO: + return join(prefix, "address::clamp_to_zero"); + case MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER: + return join(prefix, "address::clamp_to_border"); + case MSL_SAMPLER_ADDRESS_REPEAT: + return join(prefix, "address::repeat"); + case MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT: + return join(prefix, "address::mirrored_repeat"); + default: + SPIRV_CROSS_THROW("Invalid sampler addressing mode."); + } +} + +SPIRType &CompilerMSL::get_stage_in_struct_type() +{ + auto &si_var = get<SPIRVariable>(stage_in_var_id); + return get_variable_data_type(si_var); +} + +SPIRType &CompilerMSL::get_stage_out_struct_type() +{ + auto &so_var = get<SPIRVariable>(stage_out_var_id); + return get_variable_data_type(so_var); +} + +SPIRType &CompilerMSL::get_patch_stage_in_struct_type() +{ + auto &si_var = get<SPIRVariable>(patch_stage_in_var_id); + return get_variable_data_type(si_var); +} + +SPIRType &CompilerMSL::get_patch_stage_out_struct_type() +{ + auto &so_var = get<SPIRVariable>(patch_stage_out_var_id); + return get_variable_data_type(so_var); +} + +std::string CompilerMSL::get_tess_factor_struct_name() +{ + if (get_entry_point().flags.get(ExecutionModeTriangles)) + return "MTLTriangleTessellationFactorsHalf"; + return "MTLQuadTessellationFactorsHalf"; +} + +void CompilerMSL::emit_entry_point_declarations() +{ + // FIXME: Get test coverage here ... + + // Emit constexpr samplers here. + for (auto &samp : constexpr_samplers) + { + auto &var = get<SPIRVariable>(samp.first); + auto &type = get<SPIRType>(var.basetype); + if (type.basetype == SPIRType::Sampler) + add_resource_name(samp.first); + + vector<string> args; + auto &s = samp.second; + + if (s.coord != MSL_SAMPLER_COORD_NORMALIZED) + args.push_back("coord::pixel"); + + if (s.min_filter == s.mag_filter) + { + if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST) + args.push_back("filter::linear"); + } + else + { + if (s.min_filter != MSL_SAMPLER_FILTER_NEAREST) + args.push_back("min_filter::linear"); + if (s.mag_filter != MSL_SAMPLER_FILTER_NEAREST) + args.push_back("mag_filter::linear"); + } + + switch (s.mip_filter) + { + case MSL_SAMPLER_MIP_FILTER_NONE: + // Default + break; + case MSL_SAMPLER_MIP_FILTER_NEAREST: + args.push_back("mip_filter::nearest"); + break; + case MSL_SAMPLER_MIP_FILTER_LINEAR: + args.push_back("mip_filter::linear"); + break; + default: + SPIRV_CROSS_THROW("Invalid mip filter."); + } + + if (s.s_address == s.t_address && s.s_address == s.r_address) + { + if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("", s.s_address)); + } + else + { + if (s.s_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("s_", s.s_address)); + if (s.t_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("t_", s.t_address)); + if (s.r_address != MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE) + args.push_back(create_sampler_address("r_", s.r_address)); + } + + if (s.compare_enable) + { + switch (s.compare_func) + { + case MSL_SAMPLER_COMPARE_FUNC_ALWAYS: + args.push_back("compare_func::always"); + break; + case MSL_SAMPLER_COMPARE_FUNC_NEVER: + args.push_back("compare_func::never"); + break; + case MSL_SAMPLER_COMPARE_FUNC_EQUAL: + args.push_back("compare_func::equal"); + break; + case MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL: + args.push_back("compare_func::not_equal"); + break; + case MSL_SAMPLER_COMPARE_FUNC_LESS: + args.push_back("compare_func::less"); + break; + case MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL: + args.push_back("compare_func::less_equal"); + break; + case MSL_SAMPLER_COMPARE_FUNC_GREATER: + args.push_back("compare_func::greater"); + break; + case MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL: + args.push_back("compare_func::greater_equal"); + break; + default: + SPIRV_CROSS_THROW("Invalid sampler compare function."); + } + } + + if (s.s_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || s.t_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER || + s.r_address == MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER) + { + switch (s.border_color) + { + case MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK: + args.push_back("border_color::opaque_black"); + break; + case MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE: + args.push_back("border_color::opaque_white"); + break; + case MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK: + args.push_back("border_color::transparent_black"); + break; + default: + SPIRV_CROSS_THROW("Invalid sampler border color."); + } + } + + if (s.anisotropy_enable) + args.push_back(join("max_anisotropy(", s.max_anisotropy, ")")); + if (s.lod_clamp_enable) + { + args.push_back(join("lod_clamp(", convert_to_string(s.lod_clamp_min, current_locale_radix_character), ", ", + convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")")); + } + + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + "(", merge(args), ");"); + } + + // Emit buffer arrays here. + for (uint32_t array_id : buffer_arrays) + { + const auto &var = get<SPIRVariable>(array_id); + const auto &type = get_variable_data_type(var); + string name = to_name(array_id); + statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] ="); + begin_scope(); + for (uint32_t i = 0; i < type.array[0]; ++i) + statement(name + "_" + convert_to_string(i) + ","); + end_scope_decl(); + statement_no_indent(""); + } + // For some reason, without this, we end up emitting the arrays twice. + buffer_arrays.clear(); +} + +string CompilerMSL::compile() +{ + // Do not deal with GLES-isms like precision, older extensions and such. + options.vulkan_semantics = true; + options.es = false; + options.version = 450; + backend.null_pointer_literal = "nullptr"; + backend.float_literal_suffix = false; + backend.uint32_t_literal_suffix = true; + backend.int16_t_literal_suffix = ""; + backend.uint16_t_literal_suffix = "u"; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.basic_int8_type = "char"; + backend.basic_uint8_type = "uchar"; + backend.basic_int16_type = "short"; + backend.basic_uint16_type = "ushort"; + backend.discard_literal = "discard_fragment()"; + backend.swizzle_is_function = false; + backend.shared_is_implied = false; + backend.use_initializer_list = true; + backend.use_typed_initializer_list = true; + backend.native_row_major_matrix = false; + backend.flexible_member_array_supported = false; + backend.can_declare_arrays_inline = false; + backend.can_return_array = false; + backend.boolean_mix_support = false; + backend.allow_truncated_access_chain = true; + backend.array_is_value_type = false; + backend.comparison_image_samples_scalar = true; + + capture_output_to_buffer = msl_options.capture_output_to_buffer; + is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; + + replace_illegal_names(); + + struct_member_padding.clear(); + + build_function_control_flow_graphs_and_analyze(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + analyze_sampled_image_usage(); + build_implicit_builtins(); + + fixup_image_load_store_access(); + + set_enabled_interface_variables(get_active_interface_variables()); + if (aux_buffer_id) + active_interface_variables.insert(aux_buffer_id); + + // Preprocess OpCodes to extract the need to output additional header content + preprocess_op_codes(); + + // Create structs to hold input, output and uniform variables. + // Do output first to ensure out. is declared at top of entry function. + qual_pos_var_name = ""; + stage_out_var_id = add_interface_block(StorageClassOutput); + patch_stage_out_var_id = add_interface_block(StorageClassOutput, true); + stage_in_var_id = add_interface_block(StorageClassInput); + if (get_execution_model() == ExecutionModelTessellationEvaluation) + patch_stage_in_var_id = add_interface_block(StorageClassInput, true); + + if (get_execution_model() == ExecutionModelTessellationControl) + stage_out_ptr_var_id = add_interface_block_pointer(stage_out_var_id, StorageClassOutput); + if (is_tessellation_shader()) + stage_in_ptr_var_id = add_interface_block_pointer(stage_in_var_id, StorageClassInput); + + // Metal vertex functions that define no output must disable rasterization and return void. + if (!stage_out_var_id) + is_rasterization_disabled = true; + + // Convert the use of global variables to recursively-passed function parameters + localize_global_variables(); + extract_global_variables_from_functions(); + + // Mark any non-stage-in structs to be tightly packed. + mark_packable_structs(); + + // Add fixup hooks required by shader inputs and outputs. This needs to happen before + // the loop, so the hooks aren't added multiple times. + fix_up_shader_inputs_outputs(); + + // If we are using argument buffers, we create argument buffer structures for them here. + // These buffers will be used in the entry point, not the individual resources. + if (msl_options.argument_buffers) + { + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("Argument buffers can only be used with MSL 2.0 and up."); + analyze_argument_buffers(); + } + + uint32_t pass_count = 0; + do + { + if (pass_count >= 3) + SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); + + reset(); + + // Start bindings at zero. + next_metal_resource_index_buffer = 0; + next_metal_resource_index_texture = 0; + next_metal_resource_index_sampler = 0; + + // Move constructor for this type is broken on GCC 4.9 ... + buffer = unique_ptr<ostringstream>(new ostringstream()); + + emit_header(); + emit_specialization_constants_and_structs(); + emit_resources(); + emit_custom_functions(); + emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset()); + + pass_count++; + } while (force_recompile); + + return buffer->str(); +} + +// Register the need to output any custom functions. +void CompilerMSL::preprocess_op_codes() +{ + OpCodePreprocessor preproc(*this); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), preproc); + + if (preproc.suppress_missing_prototypes) + add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + + if (preproc.uses_atomics) + { + add_header_line("#include <metal_atomic>"); + add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\""); + } + + // Metal vertex functions that write to resources must disable rasterization and return void. + if (preproc.uses_resource_write) + is_rasterization_disabled = true; + + // Tessellation control shaders are run as compute functions in Metal, and so + // must capture their output to a buffer. + if (get_execution_model() == ExecutionModelTessellationControl) + { + is_rasterization_disabled = true; + capture_output_to_buffer = true; + } +} + +// Move the Private and Workgroup global variables to the entry function. +// Non-constant variables cannot have global scope in Metal. +void CompilerMSL::localize_global_variables() +{ + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + auto iter = global_variables.begin(); + while (iter != global_variables.end()) + { + uint32_t v_id = *iter; + auto &var = get<SPIRVariable>(v_id); + if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup) + { + if (!variable_is_lut(var)) + entry_func.add_local_variable(v_id); + iter = global_variables.erase(iter); + } + else + iter++; + } +} + +// For any global variable accessed directly by a function, +// extract that variable and add it as an argument to that function. +void CompilerMSL::extract_global_variables_from_functions() +{ + // Uniforms + unordered_set<uint32_t> global_var_ids; + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + if (var.storage == StorageClassInput || var.storage == StorageClassOutput || + var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) + { + global_var_ids.insert(var.self); + } + }); + + // Local vars that are declared in the main function and accessed directly by a function + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + for (auto &var : entry_func.local_variables) + if (get<SPIRVariable>(var).storage != StorageClassFunction) + global_var_ids.insert(var); + + std::set<uint32_t> added_arg_ids; + unordered_set<uint32_t> processed_func_ids; + extract_global_variables_from_function(ir.default_entry_point, added_arg_ids, global_var_ids, processed_func_ids); +} + +// MSL does not support the use of global variables for shader input content. +// For any global variable accessed directly by the specified function, extract that variable, +// add it as an argument to that function, and the arg to the added_arg_ids collection. +void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids, + unordered_set<uint32_t> &global_var_ids, + unordered_set<uint32_t> &processed_func_ids) +{ + // Avoid processing a function more than once + if (processed_func_ids.find(func_id) != processed_func_ids.end()) + { + // Return function global variables + added_arg_ids = function_global_vars[func_id]; + return; + } + + processed_func_ids.insert(func_id); + + auto &func = get<SPIRFunction>(func_id); + + // Recursively establish global args added to functions on which we depend. + for (auto block : func.blocks) + { + auto &b = get<SPIRBlock>(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast<Op>(i.op); + + switch (op) + { + case OpLoad: + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + uint32_t base_id = ops[2]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + + auto &type = get<SPIRType>(ops[0]); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + { + // Implicitly reads gl_FragCoord. + assert(builtin_frag_coord_id != 0); + added_arg_ids.insert(builtin_frag_coord_id); + } + + break; + } + + case OpFunctionCall: + { + // First see if any of the function call args are globals + for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++) + { + uint32_t arg_id = ops[arg_idx]; + if (global_var_ids.find(arg_id) != global_var_ids.end()) + added_arg_ids.insert(arg_id); + } + + // Then recurse into the function itself to extract globals used internally in the function + uint32_t inner_func_id = ops[2]; + std::set<uint32_t> inner_func_args; + extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids, + processed_func_ids); + added_arg_ids.insert(inner_func_args.begin(), inner_func_args.end()); + break; + } + + case OpStore: + { + uint32_t base_id = ops[0]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + + case OpSelect: + { + uint32_t base_id = ops[3]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + base_id = ops[4]; + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + break; + } + + default: + break; + } + + // TODO: Add all other operations which can affect memory. + // We should consider a more unified system here to reduce boiler-plate. + // This kind of analysis is done in several places ... + } + } + + function_global_vars[func_id] = added_arg_ids; + + // Add the global variables as arguments to the function + if (func_id != ir.default_entry_point) + { + bool added_in = false; + bool added_out = false; + for (uint32_t arg_id : added_arg_ids) + { + auto &var = get<SPIRVariable>(arg_id); + uint32_t type_id = var.basetype; + auto *p_type = &get<SPIRType>(type_id); + BuiltIn bi_type = BuiltIn(get_decoration(arg_id, DecorationBuiltIn)); + + if (((is_tessellation_shader() && var.storage == StorageClassInput) || + (get_execution_model() == ExecutionModelTessellationControl && var.storage == StorageClassOutput)) && + !(has_decoration(arg_id, DecorationPatch) || is_patch_block(*p_type)) && + (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + p_type->basetype == SPIRType::Struct)) + { + // Tessellation control shaders see inputs and per-vertex outputs as arrays. + // Similarly, tessellation evaluation shaders see per-vertex inputs as arrays. + // We collected them into a structure; we must pass the array of this + // structure to the function. + std::string name; + if (var.storage == StorageClassInput) + { + if (added_in) + continue; + name = input_wg_var_name; + arg_id = stage_in_ptr_var_id; + added_in = true; + } + else if (var.storage == StorageClassOutput) + { + if (added_out) + continue; + name = "gl_out"; + arg_id = stage_out_ptr_var_id; + added_out = true; + } + type_id = get<SPIRVariable>(arg_id).basetype; + p_type = &get<SPIRType>(type_id); + uint32_t next_id = ir.increase_bound_by(1); + func.add_parameter(type_id, next_id, true); + set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id); + + set_name(next_id, name); + } + else if (is_builtin_variable(var) && p_type->basetype == SPIRType::Struct) + { + // Get the pointee type + type_id = get_pointee_type_id(type_id); + p_type = &get<SPIRType>(type_id); + + uint32_t mbr_idx = 0; + for (auto &mbr_type_id : p_type->member_types) + { + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(*p_type, mbr_idx, &builtin); + if (is_builtin && has_active_builtin(builtin, var.storage)) + { + // Add a arg variable with the same type and decorations as the member + uint32_t next_ids = ir.increase_bound_by(2); + uint32_t ptr_type_id = next_ids + 0; + uint32_t var_id = next_ids + 1; + + // Make sure we have an actual pointer type, + // so that we will get the appropriate address space when declaring these builtins. + auto &ptr = set<SPIRType>(ptr_type_id, get<SPIRType>(mbr_type_id)); + ptr.self = mbr_type_id; + ptr.storage = var.storage; + ptr.pointer = true; + ptr.parent_type = mbr_type_id; + + func.add_parameter(mbr_type_id, var_id, true); + set<SPIRVariable>(var_id, ptr_type_id, StorageClassFunction); + ir.meta[var_id].decoration = ir.meta[type_id].members[mbr_idx]; + } + mbr_idx++; + } + } + else + { + uint32_t next_id = ir.increase_bound_by(1); + func.add_parameter(type_id, next_id, true); + set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id); + + // Ensure the existing variable has a valid name and the new variable has all the same meta info + set_name(arg_id, ensure_valid_name(to_name(arg_id), "v")); + ir.meta[next_id] = ir.meta[arg_id]; + } + } + } +} + +// For all variables that are some form of non-input-output interface block, mark that all the structs +// that are recursively contained within the type referenced by that variable should be packed tightly. +void CompilerMSL::mark_packable_structs() +{ + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + if (var.storage != StorageClassFunction && !is_hidden_variable(var)) + { + auto &type = this->get<SPIRType>(var.basetype); + if (type.pointer && + (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassPushConstant || type.storage == StorageClassStorageBuffer) && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + mark_as_packable(type); + } + }); +} + +// If the specified type is a struct, it and any nested structs +// are marked as packable with the SPIRVCrossDecorationPacked decoration, +void CompilerMSL::mark_as_packable(SPIRType &type) +{ + // If this is not the base type (eg. it's a pointer or array), tunnel down + if (type.parent_type) + { + mark_as_packable(get<SPIRType>(type.parent_type)); + return; + } + + if (type.basetype == SPIRType::Struct) + { + set_extended_decoration(type.self, SPIRVCrossDecorationPacked); + + // Recurse + size_t mbr_cnt = type.member_types.size(); + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + uint32_t mbr_type_id = type.member_types[mbr_idx]; + auto &mbr_type = get<SPIRType>(mbr_type_id); + mark_as_packable(mbr_type); + if (mbr_type.type_alias) + { + auto &mbr_type_alias = get<SPIRType>(mbr_type.type_alias); + mark_as_packable(mbr_type_alias); + } + } + } +} + +// If a vertex attribute exists at the location, it is marked as being used by this shader +void CompilerMSL::mark_location_as_used_by_shader(uint32_t location, StorageClass storage) +{ + if ((get_execution_model() == ExecutionModelVertex || is_tessellation_shader()) && (storage == StorageClassInput)) + vtx_attrs_in_use.insert(location); +} + +uint32_t CompilerMSL::get_target_components_for_fragment_location(uint32_t location) const +{ + auto itr = fragment_output_components.find(location); + if (itr == end(fragment_output_components)) + return 4; + else + return itr->second; +} + +uint32_t CompilerMSL::build_extended_vector_type(uint32_t type_id, uint32_t components) +{ + uint32_t new_type_id = ir.increase_bound_by(1); + auto &type = set<SPIRType>(new_type_id, get<SPIRType>(type_id)); + type.vecsize = components; + type.self = new_type_id; + type.parent_type = type_id; + type.pointer = false; + + return new_type_id; +} + +void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, bool strip_array) +{ + bool is_builtin = is_builtin_variable(var); + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + bool is_flat = has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_decoration(var.self, DecorationCentroid); + bool is_sample = has_decoration(var.self, DecorationSample); + + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + uint32_t type_id = ensure_correct_builtin_type(var.basetype, builtin); + var.basetype = type_id; + + type_id = get_pointee_type_id(var.basetype); + if (strip_array && is_array(get<SPIRType>(type_id))) + type_id = get<SPIRType>(type_id).parent_type; + auto &type = get<SPIRType>(type_id); + uint32_t target_components = 0; + uint32_t type_components = type.vecsize; + bool padded_output = false; + + // Check if we need to pad fragment output to match a certain number of components. + if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components && + get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + target_components = get_target_components_for_fragment_location(locn); + if (type_components < target_components) + { + // Make a new type here. + type_id = build_extended_vector_type(type_id, target_components); + padded_output = true; + } + } + + ib_type.member_types.push_back(type_id); + + // Give the member a name + string mbr_name = ensure_valid_name(to_expression(var.self), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // Update the original variable reference to include the structure reference + string qual_var_name = ib_var_ref + "." + mbr_name; + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + + if (padded_output) + { + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + + entry_func.fixup_hooks_out.push_back([=, &var]() { + SPIRType &padded_type = this->get<SPIRType>(type_id); + statement(qual_var_name, " = ", remap_swizzle(padded_type, type_components, to_name(var.self)), ";"); + }); + } + else if (!strip_array) + ir.meta[var.self].decoration.qualified_alias = qual_var_name; + + if (var.storage == StorageClassOutput && var.initializer != 0) + { + entry_func.fixup_hooks_in.push_back( + [=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); }); + } + + // Copy the variable location from the original variable to the member + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) + { + type_id = ensure_correct_attribute_type(var.basetype, locn); + var.basetype = type_id; + type_id = get_pointee_type_id(type_id); + if (strip_array && is_array(get<SPIRType>(type_id))) + type_id = get<SPIRType>(type_id).parent_type; + ib_type.member_types[ib_mbr_idx] = type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + { + uint32_t locn = vtx_attrs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + + if (get_decoration_bitset(var.self).get(DecorationComponent)) + { + uint32_t comp = get_decoration(var.self, DecorationComponent); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp); + } + + if (get_decoration_bitset(var.self).get(DecorationIndex)) + { + uint32_t index = get_decoration(var.self, DecorationIndex); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index); + } + + // Mark the member as builtin if needed + if (is_builtin) + { + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + if (builtin == BuiltInPosition && storage == StorageClassOutput) + qual_pos_var_name = qual_var_name; + } + + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); +} + +void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, bool strip_array) +{ + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + uint32_t elem_cnt = 0; + + if (is_matrix(var_type)) + { + if (is_array(var_type)) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables."); + + elem_cnt = var_type.columns; + } + else if (is_array(var_type)) + { + if (var_type.array.size() != 1) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables."); + + elem_cnt = to_array_size_literal(var_type); + } + + bool is_builtin = is_builtin_variable(var); + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + bool is_flat = has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_decoration(var.self, DecorationCentroid); + bool is_sample = has_decoration(var.self, DecorationSample); + + auto *usable_type = &var_type; + if (usable_type->pointer) + usable_type = &get<SPIRType>(usable_type->parent_type); + while (is_array(*usable_type) || is_matrix(*usable_type)) + usable_type = &get<SPIRType>(usable_type->parent_type); + + // If a builtin, force it to have the proper name. + if (is_builtin) + set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); + + entry_func.add_local_variable(var.self); + + // We need to declare the variable early and at entry-point scope. + vars_needing_early_declaration.push_back(var.self); + + for (uint32_t i = 0; i < elem_cnt; i++) + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + + uint32_t target_components = 0; + bool padded_output = false; + uint32_t type_id = usable_type->self; + + // Check if we need to pad fragment output to match a certain number of components. + if (get_decoration_bitset(var.self).get(DecorationLocation) && msl_options.pad_fragment_output_components && + get_entry_point().model == ExecutionModelFragment && storage == StorageClassOutput) + { + uint32_t locn = get_decoration(var.self, DecorationLocation) + i; + target_components = get_target_components_for_fragment_location(locn); + if (usable_type->vecsize < target_components) + { + // Make a new type here. + type_id = build_extended_vector_type(usable_type->self, target_components); + padded_output = true; + } + } + + ib_type.member_types.push_back(get_pointee_type_id(type_id)); + + // Give the member a name + string mbr_name = ensure_valid_name(join(to_expression(var.self), "_", i), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // There is no qualified alias since we need to flatten the internal array on return. + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation) + i; + if (storage == StorageClassInput && + (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) + { + var.basetype = ensure_correct_attribute_type(var.basetype, locn); + uint32_t mbr_type_id = ensure_correct_attribute_type(usable_type->self, locn); + ib_type.member_types[ib_mbr_idx] = mbr_type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + { + uint32_t locn = vtx_attrs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + + if (get_decoration_bitset(var.self).get(DecorationIndex)) + { + uint32_t index = get_decoration(var.self, DecorationIndex); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationIndex, index); + } + + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + + if (!strip_array) + { + switch (storage) + { + case StorageClassInput: + entry_func.fixup_hooks_in.push_back( + [=, &var]() { statement(to_name(var.self), "[", i, "] = ", ib_var_ref, ".", mbr_name, ";"); }); + break; + + case StorageClassOutput: + entry_func.fixup_hooks_out.push_back([=, &var]() { + if (padded_output) + { + auto &padded_type = this->get<SPIRType>(type_id); + statement( + ib_var_ref, ".", mbr_name, " = ", + remap_swizzle(padded_type, usable_type->vecsize, join(to_name(var.self), "[", i, "]")), + ";"); + } + else + statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), "[", i, "];"); + }); + break; + + default: + break; + } + } + } +} + +uint32_t CompilerMSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) +{ + auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + uint32_t location = get_decoration(var.self, DecorationLocation); + + for (uint32_t i = 0; i < mbr_idx; i++) + { + auto &mbr_type = get<SPIRType>(type.member_types[i]); + + // Start counting from any place we have a new location decoration. + if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) + location = get_member_decoration(type.self, mbr_idx, DecorationLocation); + + uint32_t location_count = 1; + + if (mbr_type.columns > 1) + location_count = mbr_type.columns; + + if (!mbr_type.array.empty()) + for (uint32_t j = 0; j < uint32_t(mbr_type.array.size()); j++) + location_count *= to_array_size_literal(mbr_type, j); + + location += location_count; + } + + return location; +} + +void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, + uint32_t mbr_idx, bool strip_array) +{ + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + + BuiltIn builtin; + bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + bool is_flat = + has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) || + has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) || + has_decoration(var.self, DecorationCentroid); + bool is_sample = + has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample); + + uint32_t mbr_type_id = var_type.member_types[mbr_idx]; + auto &mbr_type = get<SPIRType>(mbr_type_id); + uint32_t elem_cnt = 0; + + if (is_matrix(mbr_type)) + { + if (is_array(mbr_type)) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-matrices in input and output variables."); + + elem_cnt = mbr_type.columns; + } + else if (is_array(mbr_type)) + { + if (mbr_type.array.size() != 1) + SPIRV_CROSS_THROW("MSL cannot emit arrays-of-arrays in input and output variables."); + + elem_cnt = to_array_size_literal(mbr_type); + } + + auto *usable_type = &mbr_type; + if (usable_type->pointer) + usable_type = &get<SPIRType>(usable_type->parent_type); + while (is_array(*usable_type) || is_matrix(*usable_type)) + usable_type = &get<SPIRType>(usable_type->parent_type); + + for (uint32_t i = 0; i < elem_cnt; i++) + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(usable_type->self); + + // Give the member a name + string mbr_name = ensure_valid_name(join(to_qualified_member_name(var_type, mbr_idx), "_", i), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + { + uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + else if (has_decoration(var.self, DecorationLocation)) + { + uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array) + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + { + uint32_t locn = vtx_attrs_by_builtin[builtin].location + i; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + + if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) + SPIRV_CROSS_THROW("DecorationComponent on matrices and arrays make little sense."); + + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); + + // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. + if (!strip_array) + { + switch (storage) + { + case StorageClassInput: + entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() { + statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), "[", i, "] = ", ib_var_ref, + ".", mbr_name, ";"); + }); + break; + + case StorageClassOutput: + entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() { + statement(ib_var_ref, ".", mbr_name, " = ", to_name(var.self), ".", + to_member_name(var_type, mbr_idx), "[", i, "];"); + }); + break; + + default: + break; + } + } + } +} + +void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, uint32_t mbr_idx, + bool strip_array) +{ + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + bool is_flat = + has_member_decoration(var_type.self, mbr_idx, DecorationFlat) || has_decoration(var.self, DecorationFlat); + bool is_noperspective = has_member_decoration(var_type.self, mbr_idx, DecorationNoPerspective) || + has_decoration(var.self, DecorationNoPerspective); + bool is_centroid = has_member_decoration(var_type.self, mbr_idx, DecorationCentroid) || + has_decoration(var.self, DecorationCentroid); + bool is_sample = + has_member_decoration(var_type.self, mbr_idx, DecorationSample) || has_decoration(var.self, DecorationSample); + + // Add a reference to the member to the interface struct. + uint32_t mbr_type_id = var_type.member_types[mbr_idx]; + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + mbr_type_id = ensure_correct_builtin_type(mbr_type_id, builtin); + var_type.member_types[mbr_idx] = mbr_type_id; + ib_type.member_types.push_back(mbr_type_id); + + // Give the member a name + string mbr_name = ensure_valid_name(to_qualified_member_name(var_type, mbr_idx), "m"); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // Update the original variable reference to include the structure reference + string qual_var_name = ib_var_ref + "." + mbr_name; + + if (is_builtin && !strip_array) + { + // For the builtin gl_PerVertex, we cannot treat it as a block anyways, + // so redirect to qualified name. + set_member_qualified_name(var_type.self, mbr_idx, qual_var_name); + } + else if (!strip_array) + { + // Unflatten or flatten from [[stage_in]] or [[stage_out]] as appropriate. + switch (storage) + { + case StorageClassInput: + entry_func.fixup_hooks_in.push_back([=, &var, &var_type]() { + statement(to_name(var.self), ".", to_member_name(var_type, mbr_idx), " = ", qual_var_name, ";"); + }); + break; + + case StorageClassOutput: + entry_func.fixup_hooks_out.push_back([=, &var, &var_type]() { + statement(qual_var_name, " = ", to_name(var.self), ".", to_member_name(var_type, mbr_idx), ";"); + }); + break; + + default: + break; + } + } + + // Copy the variable location from the original variable to the member + if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + { + uint32_t locn = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); + if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) + { + mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn); + var_type.member_types[mbr_idx] = mbr_type_id; + ib_type.member_types[ib_mbr_idx] = mbr_type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + else if (has_decoration(var.self, DecorationLocation)) + { + // The block itself might have a location and in this case, all members of the block + // receive incrementing locations. + uint32_t locn = get_accumulated_member_location(var, mbr_idx, strip_array); + if (storage == StorageClassInput && (get_execution_model() == ExecutionModelVertex || is_tessellation_shader())) + { + mbr_type_id = ensure_correct_attribute_type(mbr_type_id, locn); + var_type.member_types[mbr_idx] = mbr_type_id; + ib_type.member_types[ib_mbr_idx] = mbr_type_id; + } + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + else if (is_builtin && is_tessellation_shader() && vtx_attrs_by_builtin.count(builtin)) + { + uint32_t locn = 0; + auto builtin_itr = vtx_attrs_by_builtin.find(builtin); + if (builtin_itr != end(vtx_attrs_by_builtin)) + locn = builtin_itr->second.location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, storage); + } + + // Copy the component location, if present. + if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) + { + uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationComponent, comp); + } + + // Mark the member as builtin if needed + if (is_builtin) + { + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationBuiltIn, builtin); + if (builtin == BuiltInPosition && storage == StorageClassOutput) + qual_pos_var_name = qual_var_name; + } + + // Copy interpolation decorations if needed + if (is_flat) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationFlat); + if (is_noperspective) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationNoPerspective); + if (is_centroid) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationCentroid); + if (is_sample) + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationSample); + + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, mbr_idx); +} + +// In Metal, the tessellation levels are stored as tightly packed half-precision floating point values. +// But, stage-in attribute offsets and strides must be multiples of four, so we can't pass the levels +// individually. Therefore, we must pass them as vectors. Triangles get a single float4, with the outer +// levels in 'xyz' and the inner level in 'w'. Quads get a float4 containing the outer levels and a +// float2 containing the inner levels. +void CompilerMSL::add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var) +{ + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + auto &var_type = get_variable_element_type(var); + + BuiltIn builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + + // Force the variable to have the proper name. + set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); + + if (get_entry_point().flags.get(ExecutionModeTriangles)) + { + // Triangles are tricky, because we want only one member in the struct. + + // We need to declare the variable early and at entry-point scope. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + + string mbr_name = "gl_TessLevel"; + + // If we already added the other one, we can skip this step. + if (!added_builtin_tess_level) + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + + uint32_t type_id = build_extended_vector_type(var_type.self, 4); + + ib_type.member_types.push_back(type_id); + + // Give the member a name + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // There is no qualified alias since we need to flatten the internal array on return. + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, StorageClassInput); + } + else if (vtx_attrs_by_builtin.count(builtin)) + { + uint32_t locn = vtx_attrs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, StorageClassInput); + } + + added_builtin_tess_level = true; + } + + switch (builtin) + { + case BuiltInTessLevelOuter: + entry_func.fixup_hooks_in.push_back([=, &var]() { + statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".x;"); + statement(to_name(var.self), "[1] = ", ib_var_ref, ".", mbr_name, ".y;"); + statement(to_name(var.self), "[2] = ", ib_var_ref, ".", mbr_name, ".z;"); + }); + break; + + case BuiltInTessLevelInner: + entry_func.fixup_hooks_in.push_back( + [=, &var]() { statement(to_name(var.self), "[0] = ", ib_var_ref, ".", mbr_name, ".w;"); }); + break; + + default: + assert(false); + break; + } + } + else + { + // Add a reference to the variable type to the interface struct. + uint32_t ib_mbr_idx = uint32_t(ib_type.member_types.size()); + + uint32_t type_id = build_extended_vector_type(var_type.self, builtin == BuiltInTessLevelOuter ? 4 : 2); + // Change the type of the variable, too. + uint32_t ptr_type_id = ir.increase_bound_by(1); + auto &new_var_type = set<SPIRType>(ptr_type_id, get<SPIRType>(type_id)); + new_var_type.pointer = true; + new_var_type.storage = StorageClassInput; + new_var_type.parent_type = type_id; + var.basetype = ptr_type_id; + + ib_type.member_types.push_back(type_id); + + // Give the member a name + string mbr_name = to_expression(var.self); + set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + + // Since vectors can be indexed like arrays, there is no need to unpack this. We can + // just refer to the vector directly. So give it a qualified alias. + string qual_var_name = ib_var_ref + "." + mbr_name; + ir.meta[var.self].decoration.qualified_alias = qual_var_name; + + if (get_decoration_bitset(var.self).get(DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, StorageClassInput); + } + else if (vtx_attrs_by_builtin.count(builtin)) + { + uint32_t locn = vtx_attrs_by_builtin[builtin].location; + set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, locn); + mark_location_as_used_by_shader(locn, StorageClassInput); + } + } +} + +void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, bool strip_array) +{ + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + // Tessellation control I/O variables and tessellation evaluation per-point inputs are + // usually declared as arrays. In these cases, we want to add the element type to the + // interface block, since in Metal it's the interface block itself which is arrayed. + auto &var_type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + bool is_builtin = is_builtin_variable(var); + auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + + if (var_type.basetype == SPIRType::Struct) + { + if (!is_builtin_type(var_type) && (!capture_output_to_buffer || storage == StorageClassInput) && !strip_array) + { + // For I/O blocks or structs, we will need to pass the block itself around + // to functions if they are used globally in leaf functions. + // Rather than passing down member by member, + // we unflatten I/O blocks while running the shader, + // and pass the actual struct type down to leaf functions. + // We then unflatten inputs, and flatten outputs in the "fixup" stages. + entry_func.add_local_variable(var.self); + vars_needing_early_declaration.push_back(var.self); + } + + if (capture_output_to_buffer && storage != StorageClassInput && !has_decoration(var_type.self, DecorationBlock)) + { + // In Metal tessellation shaders, the interface block itself is arrayed. This makes things + // very complicated, since stage-in structures in MSL don't support nested structures. + // Luckily, for stage-out when capturing output, we can avoid this and just add + // composite members directly, because the stage-out structure is stored to a buffer, + // not returned. + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); + } + else + { + // Flatten the struct members into the interface struct + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(var_type.member_types.size()); mbr_idx++) + { + builtin = BuiltInMax; + is_builtin = is_member_builtin(var_type, mbr_idx, &builtin); + auto &mbr_type = get<SPIRType>(var_type.member_types[mbr_idx]); + + if (!is_builtin || has_active_builtin(builtin, storage)) + { + if ((!is_builtin || + (storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) && + (storage == StorageClassInput || storage == StorageClassOutput) && + (is_matrix(mbr_type) || is_array(mbr_type))) + { + add_composite_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, + strip_array); + } + else + { + add_plain_member_variable_to_interface_block(storage, ib_var_ref, ib_type, var, mbr_idx, + strip_array); + } + } + } + } + } + else if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && + !strip_array && is_builtin && (builtin == BuiltInTessLevelOuter || builtin == BuiltInTessLevelInner)) + { + add_tess_level_input_to_interface_block(ib_var_ref, ib_type, var); + } + else if (var_type.basetype == SPIRType::Boolean || var_type.basetype == SPIRType::Char || + type_is_integral(var_type) || type_is_floating_point(var_type) || var_type.basetype == SPIRType::Boolean) + { + if (!is_builtin || has_active_builtin(builtin, storage)) + { + // MSL does not allow matrices or arrays in input or output variables, so need to handle it specially. + if ((!is_builtin || (storage == StorageClassInput && get_execution_model() != ExecutionModelFragment)) && + (storage == StorageClassInput || (storage == StorageClassOutput && !capture_output_to_buffer)) && + (is_matrix(var_type) || is_array(var_type))) + { + add_composite_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); + } + else + { + add_plain_variable_to_interface_block(storage, ib_var_ref, ib_type, var, strip_array); + } + } + } +} + +// Fix up the mapping of variables to interface member indices, which is used to compile access chains +// for per-vertex variables in a tessellation control shader. +void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id) +{ + // Only needed for tessellation shaders. + if (get_execution_model() != ExecutionModelTessellationControl && + !(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) + return; + + bool in_array = false; + for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++) + { + auto &mbr_dec = ir.meta[ib_type_id].members[i]; + uint32_t var_id = mbr_dec.extended.ib_orig_id; + if (!var_id) + continue; + auto &var = get<SPIRVariable>(var_id); + + // Unfortunately, all this complexity is needed to handle flattened structs and/or + // arrays. + if (storage == StorageClassInput) + { + auto &type = get_variable_element_type(var); + if (is_array(type) || is_matrix(type)) + { + if (in_array) + continue; + in_array = true; + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + else + { + if (type.basetype == SPIRType::Struct) + { + uint32_t mbr_idx = + get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); + auto &mbr_type = get<SPIRType>(type.member_types[mbr_idx]); + + if (is_array(mbr_type) || is_matrix(mbr_type)) + { + if (in_array) + continue; + in_array = true; + set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + else + { + in_array = false; + set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + } + else + { + in_array = false; + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } + } + } + else + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } +} + +// Add an interface structure for the type of storage, which is either StorageClassInput or StorageClassOutput. +// Returns the ID of the newly added variable, or zero if no variable was added. +uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) +{ + // Accumulate the variables that should appear in the interface struct + vector<SPIRVariable *> vars; + bool incl_builtins = (storage == StorageClassOutput || is_tessellation_shader()); + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + BuiltIn bi_type = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + if (var.storage == storage && interface_variable_exists_in_entry_point(var.self) && + !is_hidden_variable(var, incl_builtins) && type.pointer && + (has_decoration(var_id, DecorationPatch) || is_patch_block(type)) == patch && + (!is_builtin_variable(var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || bi_type == BuiltInLayer || + bi_type == BuiltInViewportIndex || bi_type == BuiltInFragDepth || bi_type == BuiltInSampleMask || + (get_execution_model() == ExecutionModelTessellationEvaluation && + (bi_type == BuiltInTessLevelOuter || bi_type == BuiltInTessLevelInner)))) + { + vars.push_back(&var); + } + }); + + // If no variables qualify, leave. + // For patch input in a tessellation evaluation shader, the per-vertex stage inputs + // are included in a special patch control point array. + if (vars.empty() && !(storage == StorageClassInput && patch && stage_in_var_id)) + return 0; + + // Add a new typed variable for this interface structure. + // The initializer expression is allocated here, but populated when the function + // declaraion is emitted, because it is cleared after each compilation pass. + uint32_t next_id = ir.increase_bound_by(3); + uint32_t ib_type_id = next_id++; + auto &ib_type = set<SPIRType>(ib_type_id); + ib_type.basetype = SPIRType::Struct; + ib_type.storage = storage; + set_decoration(ib_type_id, DecorationBlock); + + uint32_t ib_var_id = next_id++; + auto &var = set<SPIRVariable>(ib_var_id, ib_type_id, storage, 0); + var.initializer = next_id++; + + string ib_var_ref; + auto &entry_func = get<SPIRFunction>(ir.default_entry_point); + switch (storage) + { + case StorageClassInput: + ib_var_ref = patch ? patch_stage_in_var_name : stage_in_var_name; + if (get_execution_model() == ExecutionModelTessellationControl) + { + // Add a hook to populate the shared workgroup memory containing + // the gl_in array. + entry_func.fixup_hooks_in.push_back([=]() { + // Can't use PatchVertices yet; the hook for that may not have run yet. + statement("if (", to_expression(builtin_invocation_id_id), " < ", "spvIndirectParams[0])"); + statement(" ", input_wg_var_name, "[", to_expression(builtin_invocation_id_id), "] = ", ib_var_ref, + ";"); + statement("threadgroup_barrier(mem_flags::mem_threadgroup);"); + statement("if (", to_expression(builtin_invocation_id_id), " >= ", get_entry_point().output_vertices, + ")"); + statement(" return;"); + }); + } + break; + + case StorageClassOutput: + { + ib_var_ref = patch ? patch_stage_out_var_name : stage_out_var_name; + + // Add the output interface struct as a local variable to the entry function. + // If the entry point should return the output struct, set the entry function + // to return the output interface struct, otherwise to return nothing. + // Indicate the output var requires early initialization. + bool ep_should_return_output = !get_is_rasterization_disabled(); + uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0; + if (!capture_output_to_buffer) + { + entry_func.add_local_variable(ib_var_id); + for (auto &blk_id : entry_func.blocks) + { + auto &blk = get<SPIRBlock>(blk_id); + if (blk.terminator == SPIRBlock::Return) + blk.return_value = rtn_id; + } + vars_needing_early_declaration.push_back(ib_var_id); + } + else + { + switch (get_execution_model()) + { + case ExecutionModelVertex: + case ExecutionModelTessellationEvaluation: + // Instead of declaring a struct variable to hold the output and then + // copying that to the output buffer, we'll declare the output variable + // as a reference to the final output element in the buffer. Then we can + // avoid the extra copy. + entry_func.fixup_hooks_in.push_back([=]() { + if (stage_out_var_id) + { + // The first member of the indirect buffer is always the number of vertices + // to draw. + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", + output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), " - ", + to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", + to_expression(builtin_vertex_idx_id), " - ", to_expression(builtin_base_vertex_id), + "];"); + } + }); + break; + case ExecutionModelTessellationControl: + if (patch) + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", + patch_output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), "];"); + }); + else + entry_func.fixup_hooks_in.push_back([=]() { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "* gl_out = &", + output_buffer_var_name, "[", to_expression(builtin_primitive_id_id), " * ", + get_entry_point().output_vertices, "];"); + }); + break; + default: + break; + } + } + break; + } + + default: + break; + } + + set_name(ib_type_id, to_name(ir.default_entry_point) + "_" + ib_var_ref); + set_name(ib_var_id, ib_var_ref); + + for (auto p_var : vars) + { + bool strip_array = + (get_execution_model() == ExecutionModelTessellationControl || + (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) && + !patch; + add_variable_to_interface_block(storage, ib_var_ref, ib_type, *p_var, strip_array); + } + + // Sort the members of the structure by their locations. + MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Location); + member_sorter.sort(); + + // The member indices were saved to the original variables, but after the members + // were sorted, those indices are now likely incorrect. Fix those up now. + if (!patch) + fix_up_interface_member_indices(storage, ib_type_id); + + // For patch inputs, add one more member, holding the array of control point data. + if (get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput && patch && + stage_in_var_id) + { + uint32_t pcp_type_id = ir.increase_bound_by(1); + auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type); + pcp_type.basetype = SPIRType::ControlPointArray; + pcp_type.parent_type = pcp_type.type_alias = get_stage_in_struct_type().self; + pcp_type.storage = storage; + ir.meta[pcp_type_id] = ir.meta[ib_type.self]; + uint32_t mbr_idx = uint32_t(ib_type.member_types.size()); + ib_type.member_types.push_back(pcp_type_id); + set_member_name(ib_type.self, mbr_idx, "gl_in"); + } + + return ib_var_id; +} + +uint32_t CompilerMSL::add_interface_block_pointer(uint32_t ib_var_id, StorageClass storage) +{ + if (!ib_var_id) + return 0; + + uint32_t ib_ptr_var_id; + uint32_t next_id = ir.increase_bound_by(3); + auto &ib_type = expression_type(ib_var_id); + if (get_execution_model() == ExecutionModelTessellationControl) + { + // Tessellation control per-vertex I/O is presented as an array, so we must + // do the same with our struct here. + uint32_t ib_ptr_type_id = next_id++; + auto &ib_ptr_type = set<SPIRType>(ib_ptr_type_id, ib_type); + ib_ptr_type.parent_type = ib_ptr_type.type_alias = ib_type.self; + ib_ptr_type.pointer = true; + ib_ptr_type.storage = storage == StorageClassInput ? StorageClassWorkgroup : StorageClassStorageBuffer; + ir.meta[ib_ptr_type_id] = ir.meta[ib_type.self]; + // To ensure that get_variable_data_type() doesn't strip off the pointer, + // which we need, use another pointer. + uint32_t ib_ptr_ptr_type_id = next_id++; + auto &ib_ptr_ptr_type = set<SPIRType>(ib_ptr_ptr_type_id, ib_ptr_type); + ib_ptr_ptr_type.parent_type = ib_ptr_type_id; + ib_ptr_ptr_type.type_alias = ib_type.self; + ib_ptr_ptr_type.storage = StorageClassFunction; + ir.meta[ib_ptr_ptr_type_id] = ir.meta[ib_type.self]; + + ib_ptr_var_id = next_id; + set<SPIRVariable>(ib_ptr_var_id, ib_ptr_ptr_type_id, StorageClassFunction, 0); + set_name(ib_ptr_var_id, storage == StorageClassInput ? input_wg_var_name : "gl_out"); + } + else + { + // Tessellation evaluation per-vertex inputs are also presented as arrays. + // But, in Metal, this array uses a very special type, 'patch_control_point<T>', + // which is a container that can be used to access the control point data. + // To represent this, a special 'ControlPointArray' type has been added to the + // SPIRV-Cross type system. It should only be generated by and seen in the MSL + // backend (i.e. this one). + uint32_t pcp_type_id = next_id++; + auto &pcp_type = set<SPIRType>(pcp_type_id, ib_type); + pcp_type.basetype = SPIRType::ControlPointArray; + pcp_type.parent_type = pcp_type.type_alias = ib_type.self; + pcp_type.storage = storage; + ir.meta[pcp_type_id] = ir.meta[ib_type.self]; + + ib_ptr_var_id = next_id; + set<SPIRVariable>(ib_ptr_var_id, pcp_type_id, storage, 0); + set_name(ib_ptr_var_id, "gl_in"); + ir.meta[ib_ptr_var_id].decoration.qualified_alias = join(patch_stage_in_var_name, ".gl_in"); + } + return ib_ptr_var_id; +} + +// Ensure that the type is compatible with the builtin. +// If it is, simply return the given type ID. +// Otherwise, create a new type, and return it's ID. +uint32_t CompilerMSL::ensure_correct_builtin_type(uint32_t type_id, BuiltIn builtin) +{ + auto &type = get<SPIRType>(type_id); + + if ((builtin == BuiltInSampleMask && is_array(type)) || + ((builtin == BuiltInLayer || builtin == BuiltInViewportIndex) && type.basetype != SPIRType::UInt)) + { + uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1); + uint32_t base_type_id = next_id++; + auto &base_type = set<SPIRType>(base_type_id); + base_type.basetype = SPIRType::UInt; + base_type.width = 32; + + if (!type.pointer) + return base_type_id; + + uint32_t ptr_type_id = next_id++; + auto &ptr_type = set<SPIRType>(ptr_type_id); + ptr_type = base_type; + ptr_type.pointer = true; + ptr_type.storage = type.storage; + ptr_type.parent_type = base_type_id; + return ptr_type_id; + } + + return type_id; +} + +// Ensure that the type is compatible with the vertex attribute. +// If it is, simply return the given type ID. +// Otherwise, create a new type, and return its ID. +uint32_t CompilerMSL::ensure_correct_attribute_type(uint32_t type_id, uint32_t location) +{ + auto &type = get<SPIRType>(type_id); + + auto p_va = vtx_attrs_by_location.find(location); + if (p_va == end(vtx_attrs_by_location)) + return type_id; + + switch (p_va->second.format) + { + case MSL_VERTEX_FORMAT_UINT8: + { + switch (type.basetype) + { + case SPIRType::UByte: + case SPIRType::UShort: + case SPIRType::UInt: + return type_id; + case SPIRType::Short: + case SPIRType::Int: + break; + default: + SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader"); + } + uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1); + uint32_t base_type_id = next_id++; + auto &base_type = set<SPIRType>(base_type_id); + base_type = type; + base_type.basetype = type.basetype == SPIRType::Short ? SPIRType::UShort : SPIRType::UInt; + base_type.pointer = false; + + if (!type.pointer) + return base_type_id; + + uint32_t ptr_type_id = next_id++; + auto &ptr_type = set<SPIRType>(ptr_type_id); + ptr_type = base_type; + ptr_type.pointer = true; + ptr_type.storage = type.storage; + ptr_type.parent_type = base_type_id; + return ptr_type_id; + } + + case MSL_VERTEX_FORMAT_UINT16: + { + switch (type.basetype) + { + case SPIRType::UShort: + case SPIRType::UInt: + return type_id; + case SPIRType::Int: + break; + default: + SPIRV_CROSS_THROW("Vertex attribute type mismatch between host and shader"); + } + uint32_t next_id = ir.increase_bound_by(type.pointer ? 2 : 1); + uint32_t base_type_id = next_id++; + auto &base_type = set<SPIRType>(base_type_id); + base_type = type; + base_type.basetype = SPIRType::UInt; + base_type.pointer = false; + + if (!type.pointer) + return base_type_id; + + uint32_t ptr_type_id = next_id++; + auto &ptr_type = set<SPIRType>(ptr_type_id); + ptr_type = base_type; + ptr_type.pointer = true; + ptr_type.storage = type.storage; + ptr_type.parent_type = base_type_id; + return ptr_type_id; + } + + default: + case MSL_VERTEX_FORMAT_OTHER: + break; + } + + return type_id; +} + +// Sort the members of the struct type by offset, and pack and then pad members where needed +// to align MSL members with SPIR-V offsets. The struct members are iterated twice. Packing +// occurs first, followed by padding, because packing a member reduces both its size and its +// natural alignment, possibly requiring a padding member to be added ahead of it. +void CompilerMSL::align_struct(SPIRType &ib_type) +{ + uint32_t &ib_type_id = ib_type.self; + + // Sort the members of the interface structure by their offset. + // They should already be sorted per SPIR-V spec anyway. + MemberSorter member_sorter(ib_type, ir.meta[ib_type_id], MemberSorter::Offset); + member_sorter.sort(); + + uint32_t mbr_cnt = uint32_t(ib_type.member_types.size()); + + // Test the alignment of each member, and if a member should be closer to the previous + // member than the default spacing expects, it is likely that the previous member is in + // a packed format. If so, and the previous member is packable, pack it. + // For example...this applies to any 3-element vector that is followed by a scalar. + uint32_t curr_offset = 0; + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + if (is_member_packable(ib_type, mbr_idx)) + { + set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPacked); + set_extended_member_decoration(ib_type_id, mbr_idx, SPIRVCrossDecorationPackedType, + ib_type.member_types[mbr_idx]); + } + + // Align current offset to the current member's default alignment. + size_t align_mask = get_declared_struct_member_alignment(ib_type, mbr_idx) - 1; + uint32_t aligned_curr_offset = uint32_t((curr_offset + align_mask) & ~align_mask); + + // Fetch the member offset as declared in the SPIRV. + uint32_t mbr_offset = get_member_decoration(ib_type_id, mbr_idx, DecorationOffset); + if (mbr_offset > aligned_curr_offset) + { + // Since MSL and SPIR-V have slightly different struct member alignment and + // size rules, we'll pad to standard C-packing rules. If the member is farther + // away than C-packing, expects, add an inert padding member before the the member. + MSLStructMemberKey key = get_struct_member_key(ib_type_id, mbr_idx); + struct_member_padding[key] = mbr_offset - curr_offset; + } + + // Increment the current offset to be positioned immediately after the current member. + // Don't do this for the last member since it can be unsized, and it is not relevant for padding purposes here. + if (mbr_idx + 1 < mbr_cnt) + curr_offset = mbr_offset + uint32_t(get_declared_struct_member_size(ib_type, mbr_idx)); + } +} + +// Returns whether the specified struct member supports a packable type +// variation that is smaller than the unpacked variation of that type. +bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index) +{ + // We've already marked it as packable + if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked)) + return true; + + auto &mbr_type = get<SPIRType>(ib_type.member_types[index]); + + uint32_t component_size = mbr_type.width / 8; + uint32_t unpacked_mbr_size; + if (mbr_type.vecsize == 3) + unpacked_mbr_size = component_size * (mbr_type.vecsize + 1) * mbr_type.columns; + else + unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns; + + // Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead, + // but we will use the same mechanism. + if (is_array(mbr_type) && (is_scalar(mbr_type) || is_vector(mbr_type)) && mbr_type.vecsize <= 2 && + type_struct_member_array_stride(ib_type, index) == 4 * component_size) + { + return true; + } + + // Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself. + // This can happen for struct A { float a }; A a[]; in std140 layout. + // TODO: Emit a padded struct which can be used for this purpose. + if (is_array(mbr_type) && mbr_type.basetype == SPIRType::Struct) + { + size_t declared_struct_size = get_declared_struct_size(mbr_type); + size_t alignment = get_declared_struct_member_alignment(ib_type, index); + declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1); + if (type_struct_member_array_stride(ib_type, index) > declared_struct_size) + return true; + } + + // TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column. + //if (is_matrix(mbr_type) && mbr_type.vecsize <= 2 && type_struct_member_matrix_stride(ib_type, index) == 16) + // SPIRV_CROSS_THROW("Currently cannot support matrices with small vector size in std140 layout."); + + // Only vectors or 3-row matrices need to be packed. + if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3)) + return false; + + // Only row-major matrices need to be packed. + if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor)) + return false; + + if (is_array(mbr_type)) + { + // If member is an array, and the array stride is larger than the type needs, don't pack it. + // Take into consideration multi-dimentional arrays. + uint32_t md_elem_cnt = 1; + size_t last_elem_idx = mbr_type.array.size() - 1; + for (uint32_t i = 0; i < last_elem_idx; i++) + md_elem_cnt *= max(to_array_size_literal(mbr_type, i), 1u); + + uint32_t unpacked_array_stride = unpacked_mbr_size * md_elem_cnt; + uint32_t array_stride = type_struct_member_array_stride(ib_type, index); + return unpacked_array_stride > array_stride; + } + else + { + uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset); + // For vectors, pack if the member's offset doesn't conform to the + // type's usual alignment. For example, a float3 at offset 4. + if (!is_matrix(mbr_type) && (mbr_offset_curr % unpacked_mbr_size)) + return true; + // Pack if there is not enough space between this member and next. + // If last member, only pack if it's a row-major matrix. + if (index < ib_type.member_types.size() - 1) + { + uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset); + return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr; + } + else + return is_matrix(mbr_type); + } +} + +// Returns a combination of type ID and member index for use as hash key +MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t index) +{ + MSLStructMemberKey k = type_id; + k <<= 32; + k += index; + return k; +} + +void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) +{ + if (!has_extended_decoration(lhs_expression, SPIRVCrossDecorationPacked) || + get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType) == 0) + { + CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); + } + else + { + // Special handling when storing to a float[] or float2[] in std140 layout. + + auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType)); + string lhs = to_dereferenced_expression(lhs_expression); + string rhs = to_pointer_expression(rhs_expression); + + // Unpack the expression so we can store to it with a float or float2. + // It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead. + if (is_scalar(type) && is_array(type)) + lhs = enclose_expression(lhs) + ".x"; + else if (is_vector(type) && type.vecsize == 2 && is_array(type)) + lhs = enclose_expression(lhs) + ".xy"; + + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + register_write(lhs_expression); + } +} + +// Converts the format of the current expression from packed to unpacked, +// by wrapping the expression in a constructor of the appropriate type. +string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id) +{ + const SPIRType *packed_type = nullptr; + if (packed_type_id) + packed_type = &get<SPIRType>(packed_type_id); + + // float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead. + if (packed_type && is_array(*packed_type) && is_scalar(*packed_type)) + return enclose_expression(expr_str) + ".x"; + else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2) + return enclose_expression(expr_str) + ".xy"; + else + return join(type_to_glsl(type), "(", expr_str, ")"); +} + +// Emits the file header info +void CompilerMSL::emit_header() +{ + for (auto &pragma : pragma_lines) + statement(pragma); + + if (!pragma_lines.empty()) + statement(""); + + statement("#include <metal_stdlib>"); + statement("#include <simd/simd.h>"); + + for (auto &header : header_lines) + statement(header); + + statement(""); + statement("using namespace metal;"); + statement(""); + + for (auto &td : typedef_lines) + statement(td); + + if (!typedef_lines.empty()) + statement(""); +} + +void CompilerMSL::add_pragma_line(const string &line) +{ + auto rslt = pragma_lines.insert(line); + if (rslt.second) + force_recompile = true; +} + +void CompilerMSL::add_typedef_line(const string &line) +{ + auto rslt = typedef_lines.insert(line); + if (rslt.second) + force_recompile = true; +} + +// Emits any needed custom function bodies. +void CompilerMSL::emit_custom_functions() +{ + for (uint32_t i = SPVFuncImplArrayCopyMultidimMax; i >= 2; i--) + if (spv_function_implementations.count(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i))) + spv_function_implementations.insert(static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + i - 1)); + + for (auto &spv_func : spv_function_implementations) + { + switch (spv_func) + { + case SPVFuncImplMod: + statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()"); + statement("template<typename Tx, typename Ty>"); + statement("Tx mod(Tx x, Ty y)"); + begin_scope(); + statement("return x - y * floor(x / y);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRadians: + statement("// Implementation of the GLSL radians() function"); + statement("template<typename T>"); + statement("T radians(T d)"); + begin_scope(); + statement("return d * T(0.01745329251);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplDegrees: + statement("// Implementation of the GLSL degrees() function"); + statement("template<typename T>"); + statement("T degrees(T r)"); + begin_scope(); + statement("return r * T(57.2957795131);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplFindILsb: + statement("// Implementation of the GLSL findLSB() function"); + statement("template<typename T>"); + statement("T findLSB(T x)"); + begin_scope(); + statement("return select(ctz(x), T(-1), x == T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplFindUMsb: + statement("// Implementation of the unsigned GLSL findMSB() function"); + statement("template<typename T>"); + statement("T findUMSB(T x)"); + begin_scope(); + statement("return select(clz(T(0)) - (clz(x) + T(1)), T(-1), x == T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplFindSMsb: + statement("// Implementation of the signed GLSL findMSB() function"); + statement("template<typename T>"); + statement("T findSMSB(T x)"); + begin_scope(); + statement("T v = select(x, T(-1) - x, x < T(0));"); + statement("return select(clz(T(0)) - (clz(v) + T(1)), T(-1), v == T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplSSign: + statement("// Implementation of the GLSL sign() function for integer types"); + statement("template<typename T, typename E = typename enable_if<is_integral<T>::value>::type>"); + statement("T sign(T x)"); + begin_scope(); + statement("return select(select(select(x, T(0), x == T(0)), T(1), x > T(0)), T(-1), x < T(0));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplArrayCopy: + statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via " + "assignment."); + statement("template<typename T, uint N>"); + statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])"); + begin_scope(); + statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); + end_scope(); + statement(""); + + statement("template<typename T, uint N>"); + statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])"); + begin_scope(); + statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); + end_scope(); + statement(""); + break; + + case SPVFuncImplArrayOfArrayCopy2Dim: + case SPVFuncImplArrayOfArrayCopy3Dim: + case SPVFuncImplArrayOfArrayCopy4Dim: + case SPVFuncImplArrayOfArrayCopy5Dim: + case SPVFuncImplArrayOfArrayCopy6Dim: + { + static const char *function_name_tags[] = { + "FromStack", + "FromConstant", + }; + + static const char *src_address_space[] = { + "thread const", + "constant", + }; + + for (uint32_t variant = 0; variant < 2; variant++) + { + uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase; + string tmp = "template<typename T"; + for (uint8_t i = 0; i < dimensions; i++) + { + tmp += ", uint "; + tmp += 'A' + i; + } + tmp += ">"; + statement(tmp); + + string array_arg; + for (uint8_t i = 0; i < dimensions; i++) + { + array_arg += "["; + array_arg += 'A' + i; + array_arg += "]"; + } + + statement("void spvArrayCopy", function_name_tags[variant], dimensions, "(thread T (&dst)", array_arg, + ", ", src_address_space[variant], " T (&src)", array_arg, ")"); + + begin_scope(); + statement("for (uint i = 0; i < A; i++)"); + begin_scope(); + statement("spvArrayCopy", function_name_tags[variant], dimensions - 1, "(dst[i], src[i]);"); + end_scope(); + end_scope(); + statement(""); + } + break; + } + + case SPVFuncImplTexelBufferCoords: + { + string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width); + statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); + statement("uint2 spvTexelBufferCoord(uint tc)"); + begin_scope(); + statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");")); + end_scope(); + statement(""); + break; + } + + case SPVFuncImplInverse4x4: + statement("// Returns the determinant of a 2x2 matrix."); + statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement(""); + + statement("// Returns the determinant of a 3x3 matrix."); + statement("inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + "float c2, float c3)"); + begin_scope(); + statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, " + "b2, b3);"); + end_scope(); + statement(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float4x4 spvInverse4x4(float4x4 m)"); + begin_scope(); + statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement("adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement("adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement("adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement("adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement("adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement("adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement("adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement("adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement("adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement("adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement("adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " + "m[2][3]);"); + statement_no_indent(""); + statement("adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement("adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement("adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement("adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " + "m[2][2]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] " + "* m[3][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplInverse3x3: + if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement(""); + } + + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float3x3 spvInverse3x3(float3x3 m)"); + begin_scope(); + statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); + statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); + statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); + statement_no_indent(""); + statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); + statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); + statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); + statement_no_indent(""); + statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); + statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); + statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplInverse2x2: + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float2x2 spvInverse2x2(float2x2 m)"); + begin_scope(); + statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = m[1][1];"); + statement("adj[0][1] = -m[0][1];"); + statement_no_indent(""); + statement("adj[1][0] = -m[1][0];"); + statement("adj[1][1] = m[0][0];"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor2x3: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float2x3 spvConvertFromRowMajor2x3(float2x3 m)"); + begin_scope(); + statement("return float2x3(float3(m[0][0], m[0][2], m[1][1]), float3(m[0][1], m[1][0], m[1][2]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor2x4: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float2x4 spvConvertFromRowMajor2x4(float2x4 m)"); + begin_scope(); + statement("return float2x4(float4(m[0][0], m[0][2], m[1][0], m[1][2]), float4(m[0][1], m[0][3], m[1][1], " + "m[1][3]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor3x2: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float3x2 spvConvertFromRowMajor3x2(float3x2 m)"); + begin_scope(); + statement("return float3x2(float2(m[0][0], m[1][1]), float2(m[0][1], m[2][0]), float2(m[1][0], m[2][1]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor3x4: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float3x4 spvConvertFromRowMajor3x4(float3x4 m)"); + begin_scope(); + statement("return float3x4(float4(m[0][0], m[0][3], m[1][2], m[2][1]), float4(m[0][1], m[1][0], m[1][3], " + "m[2][2]), float4(m[0][2], m[1][1], m[2][0], m[2][3]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor4x2: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float4x2 spvConvertFromRowMajor4x2(float4x2 m)"); + begin_scope(); + statement("return float4x2(float2(m[0][0], m[2][0]), float2(m[0][1], m[2][1]), float2(m[1][0], m[3][0]), " + "float2(m[1][1], m[3][1]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplRowMajor4x3: + statement("// Implementation of a conversion of matrix content from RowMajor to ColumnMajor organization."); + statement("float4x3 spvConvertFromRowMajor4x3(float4x3 m)"); + begin_scope(); + statement("return float4x3(float3(m[0][0], m[1][1], m[2][2]), float3(m[0][1], m[1][2], m[3][0]), " + "float3(m[0][2], m[2][0], m[3][1]), float3(m[1][0], m[2][1], m[3][2]));"); + end_scope(); + statement(""); + break; + + case SPVFuncImplTextureSwizzle: + statement("enum class spvSwizzle : uint"); + begin_scope(); + statement("none = 0,"); + statement("zero,"); + statement("one,"); + statement("red,"); + statement("green,"); + statement("blue,"); + statement("alpha"); + end_scope_decl(); + statement(""); + statement("template<typename T> struct spvRemoveReference { typedef T type; };"); + statement("template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };"); + statement("template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };"); + statement("template<typename T> inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference<T>::type& x)"); + begin_scope(); + statement("return static_cast<thread T&&>(x);"); + end_scope(); + statement("template<typename T> inline constexpr thread T&& spvForward(thread typename " + "spvRemoveReference<T>::type&& x)"); + begin_scope(); + statement("return static_cast<thread T&&>(x);"); + end_scope(); + statement(""); + statement("template<typename T>"); + statement("inline T spvGetSwizzle(vec<T, 4> x, T c, spvSwizzle s)"); + begin_scope(); + statement("switch (s)"); + begin_scope(); + statement("case spvSwizzle::none:"); + statement(" return c;"); + statement("case spvSwizzle::zero:"); + statement(" return 0;"); + statement("case spvSwizzle::one:"); + statement(" return 1;"); + statement("case spvSwizzle::red:"); + statement(" return x.r;"); + statement("case spvSwizzle::green:"); + statement(" return x.g;"); + statement("case spvSwizzle::blue:"); + statement(" return x.b;"); + statement("case spvSwizzle::alpha:"); + statement(" return x.a;"); + end_scope(); + end_scope(); + statement(""); + statement("// Wrapper function that swizzles texture samples and fetches."); + statement("template<typename T>"); + statement("inline vec<T, 4> spvTextureSwizzle(vec<T, 4> x, uint s)"); + begin_scope(); + statement("if (!s)"); + statement(" return x;"); + statement("return vec<T, 4>(spvGetSwizzle(x, x.r, spvSwizzle((s >> 0) & 0xFF)), " + "spvGetSwizzle(x, x.g, spvSwizzle((s >> 8) & 0xFF)), spvGetSwizzle(x, x.b, spvSwizzle((s >> 16) " + "& 0xFF)), " + "spvGetSwizzle(x, x.a, spvSwizzle((s >> 24) & 0xFF)));"); + end_scope(); + statement(""); + statement("template<typename T>"); + statement("inline T spvTextureSwizzle(T x, uint s)"); + begin_scope(); + statement("return spvTextureSwizzle(vec<T, 4>(x, 0, 0, 1), s).x;"); + end_scope(); + statement(""); + statement("// Wrapper function that swizzles texture gathers."); + statement("template<typename T, typename Tex, typename... Ts>"); + statement( + "inline vec<T, 4> spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) " + "METAL_CONST_ARG(c)"); + begin_scope(); + statement("if (sw)"); + begin_scope(); + statement("switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF))"); + begin_scope(); + statement("case spvSwizzle::none:"); + statement(" break;"); + statement("case spvSwizzle::zero:"); + statement(" return vec<T, 4>(0, 0, 0, 0);"); + statement("case spvSwizzle::one:"); + statement(" return vec<T, 4>(1, 1, 1, 1);"); + statement("case spvSwizzle::red:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::x);"); + statement("case spvSwizzle::green:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::y);"); + statement("case spvSwizzle::blue:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::z);"); + statement("case spvSwizzle::alpha:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::w);"); + end_scope(); + end_scope(); + // texture::gather insists on its component parameter being a constant + // expression, so we need this silly workaround just to compile the shader. + statement("switch (c)"); + begin_scope(); + statement("case component::x:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::x);"); + statement("case component::y:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::y);"); + statement("case component::z:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::z);"); + statement("case component::w:"); + statement(" return t.gather(s, spvForward<Ts>(params)..., component::w);"); + end_scope(); + end_scope(); + statement(""); + statement("// Wrapper function that swizzles depth texture gathers."); + statement("template<typename T, typename Tex, typename... Ts>"); + statement( + "inline vec<T, 4> spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) "); + begin_scope(); + statement("if (sw)"); + begin_scope(); + statement("switch (spvSwizzle(sw & 0xFF))"); + begin_scope(); + statement("case spvSwizzle::none:"); + statement("case spvSwizzle::red:"); + statement(" break;"); + statement("case spvSwizzle::zero:"); + statement("case spvSwizzle::green:"); + statement("case spvSwizzle::blue:"); + statement("case spvSwizzle::alpha:"); + statement(" return vec<T, 4>(0, 0, 0, 0);"); + statement("case spvSwizzle::one:"); + statement(" return vec<T, 4>(1, 1, 1, 1);"); + end_scope(); + end_scope(); + statement("return t.gather_compare(s, spvForward<Ts>(params)...);"); + end_scope(); + statement(""); + + default: + break; + } + } +} + +// Undefined global memory is not allowed in MSL. +// Declare constant and init to zeros. Use {}, as global constructors can break Metal. +void CompilerMSL::declare_undefined_values() +{ + bool emitted = false; + ir.for_each_typed_id<SPIRUndef>([&](uint32_t, SPIRUndef &undef) { + auto &type = this->get<SPIRType>(undef.basetype); + statement("constant ", variable_decl(type, to_name(undef.self), undef.self), " = {};"); + emitted = true; + }); + + if (emitted) + statement(""); +} + +void CompilerMSL::declare_constant_arrays() +{ + // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to + // global constants directly, so we are able to use constants as variable expressions. + bool emitted = false; + + ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; + + auto &type = this->get<SPIRType>(c.constant_type); + if (!type.array.empty()) + { + auto name = to_name(c.self); + statement("constant ", variable_decl(type, name), " = ", constant_expression(c), ";"); + emitted = true; + } + }); + + if (emitted) + statement(""); +} + +void CompilerMSL::emit_resources() +{ + declare_constant_arrays(); + declare_undefined_values(); + + // Emit the special [[stage_in]] and [[stage_out]] interface blocks which we created. + emit_interface_block(stage_out_var_id); + emit_interface_block(patch_stage_out_var_id); + emit_interface_block(stage_in_var_id); + emit_interface_block(patch_stage_in_var_id); +} + +// Emit declarations for the specialization Metal function constants +void CompilerMSL::emit_specialization_constants_and_structs() +{ + SpecializationConstant wg_x, wg_y, wg_z; + uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + bool emitted = false; + + unordered_set<uint32_t> declared_structs; + + for (auto &id_ : ir.ids_for_constant_or_type) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get<SPIRConstant>(); + + if (c.self == workgroup_size_id) + { + // TODO: This can be expressed as a [[threads_per_threadgroup]] input semantic, but we need to know + // the work group size at compile time in SPIR-V, and [[threads_per_threadgroup]] would need to be passed around as a global. + // The work group size may be a specialization constant. + statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup), " [[maybe_unused]] = ", + constant_expression(get<SPIRConstant>(workgroup_size_id)), ";"); + emitted = true; + } + else if (c.specialization) + { + auto &type = get<SPIRType>(c.constant_type); + string sc_type_name = type_to_glsl(type); + string sc_name = to_name(c.self); + string sc_tmp_name = sc_name + "_tmp"; + + // Function constants are only supported in MSL 1.2 and later. + // If we don't support it just declare the "default" directly. + // This "default" value can be overridden to the true specialization constant by the API user. + // Specialization constants which are used as array length expressions cannot be function constants in MSL, + // so just fall back to macros. + if (msl_options.supports_msl_version(1, 2) && has_decoration(c.self, DecorationSpecId) && + !c.is_used_as_array_length) + { + uint32_t constant_id = get_decoration(c.self, DecorationSpecId); + // Only scalar, non-composite values can be function constants. + statement("constant ", sc_type_name, " ", sc_tmp_name, " [[function_constant(", constant_id, + ")]];"); + statement("constant ", sc_type_name, " ", sc_name, " = is_function_constant_defined(", sc_tmp_name, + ") ? ", sc_tmp_name, " : ", constant_expression(c), ";"); + } + else if (has_decoration(c.self, DecorationSpecId)) + { + // Fallback to macro overrides. + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name, + ";"); + } + else + { + // Composite specialization constants must be built from other specialization constants. + statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";"); + } + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + auto &c = id.get<SPIRConstantOp>(); + auto &type = get<SPIRType>(c.basetype); + auto name = to_name(c.self); + statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + emitted = true; + } + else if (id.get_type() == TypeType) + { + // Output non-builtin interface structs. These include local function structs + // and structs nested within uniform and read-write buffers. + auto &type = id.get<SPIRType>(); + uint32_t type_id = type.self; + + bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty(); + bool is_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + bool is_builtin_block = is_block && is_builtin_type(type); + bool is_declarable_struct = is_struct && !is_builtin_block; + + // We'll declare this later. + if (stage_out_var_id && get_stage_out_struct_type().self == type_id) + is_declarable_struct = false; + if (patch_stage_out_var_id && get_patch_stage_out_struct_type().self == type_id) + is_declarable_struct = false; + if (stage_in_var_id && get_stage_in_struct_type().self == type_id) + is_declarable_struct = false; + if (patch_stage_in_var_id && get_patch_stage_in_struct_type().self == type_id) + is_declarable_struct = false; + + // Align and emit declarable structs...but avoid declaring each more than once. + if (is_declarable_struct && declared_structs.count(type_id) == 0) + { + if (emitted) + statement(""); + emitted = false; + + declared_structs.insert(type_id); + + if (has_extended_decoration(type_id, SPIRVCrossDecorationPacked)) + align_struct(type); + + // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc. + emit_struct(get<SPIRType>(type_id)); + } + } + } + + if (emitted) + statement(""); +} + +void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, + join("(isunordered(", to_enclosed_unpacked_expression(op0), ", ", to_enclosed_unpacked_expression(op1), + ") || ", to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1), + ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t length) +{ + // If this is a per-vertex output, remap it to the I/O array buffer. + auto *var = maybe_get<SPIRVariable>(ops[2]); + BuiltIn bi_type = BuiltIn(get_decoration(ops[2], DecorationBuiltIn)); + if (var && + (var->storage == StorageClassInput || + (get_execution_model() == ExecutionModelTessellationControl && var->storage == StorageClassOutput)) && + !(has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var))) && + (!is_builtin_variable(*var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || + bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || + get_variable_data_type(*var).basetype == SPIRType::Struct)) + { + AccessChainMeta meta; + std::vector<uint32_t> indices; + uint32_t next_id = ir.increase_bound_by(2); + + indices.reserve(length - 3 + 1); + uint32_t type_id = next_id++; + SPIRType new_uint_type; + new_uint_type.basetype = SPIRType::UInt; + new_uint_type.width = 32; + set<SPIRType>(type_id, new_uint_type); + + indices.push_back(ops[3]); + + uint32_t const_mbr_id = next_id++; + uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); + uint32_t ptr = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + if (var->storage == StorageClassInput || has_decoration(get_variable_element_type(*var).self, DecorationBlock)) + { + uint32_t i = 4; + auto *type = &get_variable_element_type(*var); + if (index == uint32_t(-1) && length >= 5) + { + // Maybe this is a struct type in the input class, in which case + // we put it as a decoration on the corresponding member. + index = get_extended_member_decoration(ops[2], get_constant(ops[4]).scalar(), + SPIRVCrossDecorationInterfaceMemberIndex); + assert(index != uint32_t(-1)); + i++; + type = &get<SPIRType>(type->member_types[get_constant(ops[4]).scalar()]); + } + // In this case, we flattened structures and arrays, so now we have to + // combine the following indices. If we encounter a non-constant index, + // we're hosed. + for (; i < length; ++i) + { + if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct) + break; + + auto &c = get_constant(ops[i]); + index += c.scalar(); + if (type->parent_type) + type = &get<SPIRType>(type->parent_type); + else if (type->basetype == SPIRType::Struct) + type = &get<SPIRType>(type->member_types[c.scalar()]); + } + // If the access chain terminates at a composite type, the composite + // itself might be copied. In that case, we must unflatten it. + if (is_matrix(*type) || is_array(*type) || type->basetype == SPIRType::Struct) + { + std::string temp_name = join(to_name(var->self), "_", ops[1]); + statement(variable_decl(*type, temp_name, var->self), ";"); + // Set up the initializer for this temporary variable. + indices.push_back(const_mbr_id); + if (type->basetype == SPIRType::Struct) + { + for (uint32_t j = 0; j < type->member_types.size(); j++) + { + index = get_extended_member_decoration(ops[2], j, SPIRVCrossDecorationInterfaceMemberIndex); + const auto &mbr_type = get<SPIRType>(type->member_types[j]); + if (is_matrix(mbr_type)) + { + for (uint32_t k = 0; k < mbr_type.columns; k++, index++) + { + set<SPIRConstant>(const_mbr_id, type_id, index, false); + auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, + true); + statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";"); + } + } + else if (is_array(mbr_type)) + { + for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++) + { + set<SPIRConstant>(const_mbr_id, type_id, index, false); + auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, + true); + statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";"); + } + } + else + { + set<SPIRConstant>(const_mbr_id, type_id, index, false); + auto e = + access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, true); + statement(temp_name, ".", to_member_name(*type, j), " = ", e, ";"); + } + } + } + else if (is_matrix(*type)) + { + for (uint32_t j = 0; j < type->columns; j++, index++) + { + set<SPIRConstant>(const_mbr_id, type_id, index, false); + auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); + statement(temp_name, "[", j, "] = ", e, ";"); + } + } + else // Must be an array + { + assert(is_array(*type)); + for (uint32_t j = 0; j < type->array[0]; j++, index++) + { + set<SPIRConstant>(const_mbr_id, type_id, index, false); + auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); + statement(temp_name, "[", j, "] = ", e, ";"); + } + } + + // This needs to be a variable instead of an expression so we don't + // try to dereference this as a variable pointer. + set<SPIRVariable>(ops[1], ops[0], var->storage); + ir.meta[ops[1]] = ir.meta[ops[2]]; + set_name(ops[1], temp_name); + if (has_decoration(var->self, DecorationInvariant)) + set_decoration(ops[1], DecorationInvariant); + for (uint32_t j = 2; j < length; j++) + inherit_expression_dependencies(ops[1], ops[j]); + return true; + } + else + { + set<SPIRConstant>(const_mbr_id, type_id, index, false); + indices.push_back(const_mbr_id); + + if (i < length) + indices.insert(indices.end(), ops + i, ops + length); + } + } + else + { + assert(index != uint32_t(-1)); + set<SPIRConstant>(const_mbr_id, type_id, index, false); + indices.push_back(const_mbr_id); + + indices.insert(indices.end(), ops + 4, ops + length); + } + + // We use the pointer to the base of the input/output array here, + // so this is always a pointer chain. + auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), get<SPIRType>(ops[0]), &meta, true); + auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2])); + expr.loaded_from = var->self; + expr.need_transpose = meta.need_transpose; + expr.access_chain = true; + + // Mark the result as being packed if necessary. + if (meta.storage_is_packed) + set_extended_decoration(ops[1], SPIRVCrossDecorationPacked); + if (meta.storage_packed_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPackedType, meta.storage_packed_type); + if (meta.storage_is_invariant) + set_decoration(ops[1], DecorationInvariant); + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(expr, ops[i]); + } + + return true; + } + + // If this is the inner tessellation level, and we're tessellating triangles, + // drop the last index. It isn't an array in this case, so we can't have an + // array reference here. We need to make this ID a variable instead of an + // expression so we don't try to dereference it as a variable pointer. + // Don't do this if the index is a constant 1, though. We need to drop stores + // to that one. + auto *m = ir.find_meta(var ? var->self : 0); + if (get_execution_model() == ExecutionModelTessellationControl && var && m && + m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles)) + { + auto *c = maybe_get<SPIRConstant>(ops[3]); + if (c && c->scalar() == 1) + return false; + auto &dest_var = set<SPIRVariable>(ops[1], *var); + dest_var.basetype = ops[0]; + ir.meta[ops[1]] = ir.meta[ops[2]]; + inherit_expression_dependencies(ops[1], ops[2]); + return true; + } + + return false; +} + +bool CompilerMSL::is_out_of_bounds_tessellation_level(uint32_t id_lhs) +{ + if (!get_entry_point().flags.get(ExecutionModeTriangles)) + return false; + + // In SPIR-V, TessLevelInner always has two elements and TessLevelOuter always has + // four. This is true even if we are tessellating triangles. This allows clients + // to use a single tessellation control shader with multiple tessellation evaluation + // shaders. + // In Metal, however, only the first element of TessLevelInner and the first three + // of TessLevelOuter are accessible. This stems from how in Metal, the tessellation + // levels must be stored to a dedicated buffer in a particular format that depends + // on the patch type. Therefore, in Triangles mode, any access to the second + // inner level or the fourth outer level must be dropped. + const auto *e = maybe_get<SPIRExpression>(id_lhs); + if (!e || !e->access_chain) + return false; + BuiltIn builtin = BuiltIn(get_decoration(e->loaded_from, DecorationBuiltIn)); + if (builtin != BuiltInTessLevelInner && builtin != BuiltInTessLevelOuter) + return false; + auto *c = maybe_get<SPIRConstant>(e->implied_read_expressions[1]); + if (!c) + return false; + return (builtin == BuiltInTessLevelInner && c->scalar() == 1) || + (builtin == BuiltInTessLevelOuter && c->scalar() == 3); +} + +// Override for MSL-specific syntax instructions +void CompilerMSL::emit_instruction(const Instruction &instruction) +{ +#define MSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define MSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define MSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define MSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define MSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define MSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define MSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define MSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) +#define MSL_UNORD_BOP(op) emit_binary_unord_op(ops[0], ops[1], ops[2], ops[3], #op) + + auto ops = stream(instruction); + auto opcode = static_cast<Op>(instruction.op); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (opcode) + { + + // Comparisons + case OpIEqual: + MSL_BOP_CAST(==, int_type); + break; + + case OpLogicalEqual: + case OpFOrdEqual: + MSL_BOP(==); + break; + + case OpINotEqual: + MSL_BOP_CAST(!=, int_type); + break; + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + MSL_BOP(!=); + break; + + case OpUGreaterThan: + MSL_BOP_CAST(>, uint_type); + break; + + case OpSGreaterThan: + MSL_BOP_CAST(>, int_type); + break; + + case OpFOrdGreaterThan: + MSL_BOP(>); + break; + + case OpUGreaterThanEqual: + MSL_BOP_CAST(>=, uint_type); + break; + + case OpSGreaterThanEqual: + MSL_BOP_CAST(>=, int_type); + break; + + case OpFOrdGreaterThanEqual: + MSL_BOP(>=); + break; + + case OpULessThan: + MSL_BOP_CAST(<, uint_type); + break; + + case OpSLessThan: + MSL_BOP_CAST(<, int_type); + break; + + case OpFOrdLessThan: + MSL_BOP(<); + break; + + case OpULessThanEqual: + MSL_BOP_CAST(<=, uint_type); + break; + + case OpSLessThanEqual: + MSL_BOP_CAST(<=, int_type); + break; + + case OpFOrdLessThanEqual: + MSL_BOP(<=); + break; + + case OpFUnordEqual: + MSL_UNORD_BOP(==); + break; + + case OpFUnordNotEqual: + MSL_UNORD_BOP(!=); + break; + + case OpFUnordGreaterThan: + MSL_UNORD_BOP(>); + break; + + case OpFUnordGreaterThanEqual: + MSL_UNORD_BOP(>=); + break; + + case OpFUnordLessThan: + MSL_UNORD_BOP(<); + break; + + case OpFUnordLessThanEqual: + MSL_UNORD_BOP(<=); + break; + + // Derivatives + case OpDPdx: + case OpDPdxFine: + case OpDPdxCoarse: + MSL_UFOP(dfdx); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + case OpDPdyFine: + case OpDPdyCoarse: + MSL_UFOP(dfdy); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + MSL_UFOP(fwidth); + register_control_dependent_expression(ops[1]); + break; + + // Bitfield + case OpBitFieldInsert: + MSL_QFOP(insert_bits); + break; + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + MSL_TFOP(extract_bits); + break; + + case OpBitReverse: + MSL_UFOP(reverse_bits); + break; + + case OpBitCount: + MSL_UFOP(popcount); + break; + + case OpFRem: + MSL_BFOP(fmod); + break; + + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t mem_sem = ops[4]; + uint32_t val = ops[5]; + emit_atomic_func_op(result_type, id, "atomic_exchange_explicit", mem_sem, mem_sem, false, ptr, val); + break; + } + + case OpAtomicCompareExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t mem_sem_pass = ops[4]; + uint32_t mem_sem_fail = ops[5]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", mem_sem_pass, mem_sem_fail, true, + ptr, comp, true, false, val); + break; + } + + case OpAtomicCompareExchangeWeak: + SPIRV_CROSS_THROW("OpAtomicCompareExchangeWeak is only supported in kernel profile."); + + case OpAtomicLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t mem_sem = ops[4]; + emit_atomic_func_op(result_type, id, "atomic_load_explicit", mem_sem, mem_sem, false, ptr, 0); + break; + } + + case OpAtomicStore: + { + uint32_t result_type = expression_type(ops[0]).self; + uint32_t id = ops[0]; + uint32_t ptr = ops[0]; + uint32_t mem_sem = ops[2]; + uint32_t val = ops[3]; + emit_atomic_func_op(result_type, id, "atomic_store_explicit", mem_sem, mem_sem, false, ptr, val); + break; + } + +#define MSL_AFMO_IMPL(op, valsrc, valconst) \ + do \ + { \ + uint32_t result_type = ops[0]; \ + uint32_t id = ops[1]; \ + uint32_t ptr = ops[2]; \ + uint32_t mem_sem = ops[4]; \ + uint32_t val = valsrc; \ + emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val, \ + false, valconst); \ + } while (false) + +#define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5], false) +#define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1, true) + + case OpAtomicIIncrement: + MSL_AFMIO(add); + break; + + case OpAtomicIDecrement: + MSL_AFMIO(sub); + break; + + case OpAtomicIAdd: + MSL_AFMO(add); + break; + + case OpAtomicISub: + MSL_AFMO(sub); + break; + + case OpAtomicSMin: + case OpAtomicUMin: + MSL_AFMO(min); + break; + + case OpAtomicSMax: + case OpAtomicUMax: + MSL_AFMO(max); + break; + + case OpAtomicAnd: + MSL_AFMO(and); + break; + + case OpAtomicOr: + MSL_AFMO(or); + break; + + case OpAtomicXor: + MSL_AFMO(xor); + break; + + // Images + + // Reads == Fetches in Metal + case OpImageRead: + { + // Mark that this shader reads from this image + uint32_t img_id = ops[2]; + auto &type = expression_type(img_id); + if (type.image.dim != DimSubpassData) + { + auto *p_var = maybe_get_backing_variable(img_id); + if (p_var && has_decoration(p_var->self, DecorationNonReadable)) + { + unset_decoration(p_var->self, DecorationNonReadable); + force_recompile = true; + } + } + + emit_texture_op(instruction); + break; + } + + case OpImageWrite: + { + uint32_t img_id = ops[0]; + uint32_t coord_id = ops[1]; + uint32_t texel_id = ops[2]; + const uint32_t *opt = &ops[3]; + uint32_t length = instruction.length - 3; + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img_id); + auto &img_type = get<SPIRType>(type.self); + + // Ensure this image has been marked as being written to and force a + // recommpile so that the image type output will include write access + auto *p_var = maybe_get_backing_variable(img_id); + if (p_var && has_decoration(p_var->self, DecorationNonWritable)) + { + unset_decoration(p_var->self, DecorationNonWritable); + force_recompile = true; + } + + bool forward = false; + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t flags = 0; + + if (length) + { + flags = *opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + + auto &texel_type = expression_type(texel_id); + auto store_type = texel_type; + store_type.vecsize = 4; + + statement(join( + to_expression(img_id), ".write(", remap_swizzle(store_type, texel_type.vecsize, to_expression(texel_id)), + ", ", + to_function_args(img_id, img_type, true, false, false, coord_id, 0, 0, 0, 0, lod, 0, 0, 0, 0, 0, &forward), + ");")); + + if (p_var && variable_storage_is_aliased(*p_var)) + flush_all_aliased_variables(); + + break; + } + + case OpImageQuerySize: + case OpImageQuerySizeLod: + { + uint32_t rslt_type_id = ops[0]; + auto &rslt_type = get<SPIRType>(rslt_type_id); + + uint32_t id = ops[1]; + + uint32_t img_id = ops[2]; + string img_exp = to_expression(img_id); + auto &img_type = expression_type(img_id); + Dim img_dim = img_type.image.dim; + bool img_is_array = img_type.image.arrayed; + + if (img_type.basetype != SPIRType::Image) + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); + + string lod; + if (opcode == OpImageQuerySizeLod) + { + // LOD index defaults to zero, so don't bother outputing level zero index + string decl_lod = to_expression(ops[3]); + if (decl_lod != "0") + lod = decl_lod; + } + + string expr = type_to_glsl(rslt_type) + "("; + expr += img_exp + ".get_width(" + lod + ")"; + + if (img_dim == Dim2D || img_dim == DimCube || img_dim == Dim3D) + expr += ", " + img_exp + ".get_height(" + lod + ")"; + + if (img_dim == Dim3D) + expr += ", " + img_exp + ".get_depth(" + lod + ")"; + + if (img_is_array) + expr += ", " + img_exp + ".get_array_size()"; + + expr += ")"; + + emit_op(rslt_type_id, id, expr, should_forward(img_id)); + + break; + } + + case OpImageQueryLod: + SPIRV_CROSS_THROW("MSL does not support textureQueryLod()."); + +#define MSL_ImgQry(qrytype) \ + do \ + { \ + uint32_t rslt_type_id = ops[0]; \ + auto &rslt_type = get<SPIRType>(rslt_type_id); \ + uint32_t id = ops[1]; \ + uint32_t img_id = ops[2]; \ + string img_exp = to_expression(img_id); \ + string expr = type_to_glsl(rslt_type) + "(" + img_exp + ".get_num_" #qrytype "())"; \ + emit_op(rslt_type_id, id, expr, should_forward(img_id)); \ + } while (false) + + case OpImageQueryLevels: + MSL_ImgQry(mip_levels); + break; + + case OpImageQuerySamples: + MSL_ImgQry(samples); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *combined = maybe_get<SPIRCombinedImageSampler>(ops[2]); + + if (combined) + { + auto &e = emit_op(result_type, id, to_expression(combined->image), true, true); + auto *var = maybe_get_backing_variable(combined->image); + if (var) + e.loaded_from = var->self; + } + else + { + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + e.loaded_from = var->self; + } + break; + } + + // Casting + case OpQuantizeToF16: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + string exp; + auto &type = get<SPIRType>(result_type); + + switch (type.vecsize) + { + case 1: + exp = join("float(half(", to_expression(arg), "))"); + break; + case 2: + exp = join("float2(half2(", to_expression(arg), "))"); + break; + case 3: + exp = join("float3(half3(", to_expression(arg), "))"); + break; + case 4: + exp = join("float4(half4(", to_expression(arg), "))"); + break; + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + + emit_op(result_type, id, exp, should_forward(arg)); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + if (is_tessellation_shader()) + { + if (!emit_tessellation_access_chain(ops, instruction.length)) + CompilerGLSL::emit_instruction(instruction); + } + else + CompilerGLSL::emit_instruction(instruction); + break; + + case OpStore: + if (is_out_of_bounds_tessellation_level(ops[0])) + break; + + if (maybe_emit_array_assignment(ops[0], ops[1])) + break; + + CompilerGLSL::emit_instruction(instruction); + break; + + // Compute barriers + case OpMemoryBarrier: + emit_barrier(0, ops[0], ops[1]); + break; + + case OpControlBarrier: + // In GLSL a memory barrier is often followed by a control barrier. + // But in MSL, memory barriers are also control barriers, so don't + // emit a simple control barrier if a memory barrier has just been emitted. + if (previous_instruction_opcode != OpMemoryBarrier) + emit_barrier(ops[0], ops[1], ops[2]); + break; + + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + // If the matrix needs transpose and it is square or packed, just flip the multiply order. + uint32_t mtx_id = ops[opcode == OpMatrixTimesVector ? 2 : 3]; + auto *e = maybe_get<SPIRExpression>(mtx_id); + auto &t = expression_type(mtx_id); + bool is_packed = has_extended_decoration(mtx_id, SPIRVCrossDecorationPacked); + if (e && e->need_transpose && (t.columns == t.vecsize || is_packed)) + { + e->need_transpose = false; + + // This is important for matrices. Packed matrices + // are generally transposed, so unpacking using a constructor argument + // will result in an error. + // The simplest solution for now is to just avoid unpacking the matrix in this operation. + unset_extended_decoration(mtx_id, SPIRVCrossDecorationPacked); + + emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); + if (is_packed) + set_extended_decoration(mtx_id, SPIRVCrossDecorationPacked); + e->need_transpose = true; + } + else + MSL_BOP(*); + break; + } + + // OpOuterProduct + + case OpIAddCarry: + case OpISubBorrow: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + forced_temporaries.insert(result_id); + auto &type = get<SPIRType>(result_type); + statement(variable_decl(type, to_name(result_id)), ";"); + set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + + auto &res_type = get<SPIRType>(type.member_types[1]); + if (opcode == OpIAddCarry) + { + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " + ", + to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type), + "(1), ", type_to_glsl(res_type), "(0), ", to_expression(result_id), ".", to_member_name(type, 0), + " >= max(", to_expression(op0), ", ", to_expression(op1), "));"); + } + else + { + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " - ", + to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = select(", type_to_glsl(res_type), + "(1), ", type_to_glsl(res_type), "(0), ", to_enclosed_expression(op0), + " >= ", to_enclosed_expression(op1), ");"); + } + break; + } + + case OpUMulExtended: + case OpSMulExtended: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + forced_temporaries.insert(result_id); + auto &type = get<SPIRType>(result_type); + statement(variable_decl(type, to_name(result_id)), ";"); + set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(op0), " * ", + to_enclosed_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = mulhi(", to_expression(op0), ", ", + to_expression(op1), ");"); + break; + } + + default: + CompilerGLSL::emit_instruction(instruction); + break; + } + + previous_instruction_opcode = opcode; +} + +void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem) +{ + if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl) + return; + + string bar_stmt = "threadgroup_barrier(mem_flags::"; + + uint32_t mem_sem = id_mem_sem ? get<SPIRConstant>(id_mem_sem).scalar() : uint32_t(MemorySemanticsMaskNone); + + if (get_execution_model() == ExecutionModelTessellationControl) + // For tesc shaders, this also affects objects in the Output storage class. + // Since in Metal, these are placed in a device buffer, we have to sync device memory here. + bar_stmt += "mem_device"; + else if (mem_sem & MemorySemanticsCrossWorkgroupMemoryMask) + bar_stmt += "mem_device"; + else if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask)) + bar_stmt += "mem_threadgroup"; + else if (mem_sem & MemorySemanticsImageMemoryMask) + bar_stmt += "mem_texture"; + else + bar_stmt += "mem_none"; + + if (msl_options.is_ios() && (msl_options.supports_msl_version(2) && !msl_options.supports_msl_version(2, 1))) + { + bar_stmt += ", "; + + // Use the wider of the two scopes (smaller value) + uint32_t exe_scope = id_exe_scope ? get<SPIRConstant>(id_exe_scope).scalar() : uint32_t(ScopeInvocation); + uint32_t mem_scope = id_mem_scope ? get<SPIRConstant>(id_mem_scope).scalar() : uint32_t(ScopeInvocation); + uint32_t scope = min(exe_scope, mem_scope); + switch (scope) + { + case ScopeCrossDevice: + case ScopeDevice: + bar_stmt += "memory_scope_device"; + break; + + case ScopeSubgroup: + case ScopeInvocation: + bar_stmt += "memory_scope_simdgroup"; + break; + + case ScopeWorkgroup: + default: + bar_stmt += "memory_scope_threadgroup"; + break; + } + } + + bar_stmt += ");"; + + statement(bar_stmt); + + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); +} + +void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id) +{ + // Assignment from an array initializer is fine. + auto &type = expression_type(rhs_id); + auto *var = maybe_get_backing_variable(rhs_id); + + // Unfortunately, we cannot template on address space in MSL, + // so explicit address space redirection it is ... + bool is_constant = false; + if (ir.ids[rhs_id].get_type() == TypeConstant) + { + is_constant = true; + } + else if (var && var->remapped_variable && var->statically_assigned && + ir.ids[var->static_expression].get_type() == TypeConstant) + { + is_constant = true; + } + + const char *tag = is_constant ? "FromConstant" : "FromStack"; + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); +} + +// Since MSL does not allow arrays to be copied via simple variable assignment, +// if the LHS and RHS represent an assignment of an entire array, it must be +// implemented by calling an array copy function. +// Returns whether the struct assignment was emitted. +bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs) +{ + // We only care about assignments of an entire array + auto &type = expression_type(id_rhs); + if (type.array.size() == 0) + return false; + + auto *var = maybe_get<SPIRVariable>(id_lhs); + + // Is this a remapped, static constant? Don't do anything. + if (var && var->remapped_variable && var->statically_assigned) + return true; + + if (ir.ids[id_rhs].get_type() == TypeConstant && var && var->deferred_declaration) + { + // Special case, if we end up declaring a variable when assigning the constant array, + // we can avoid the copy by directly assigning the constant expression. + // This is likely necessary to be able to use a variable as a true look-up table, as it is unlikely + // the compiler will be able to optimize the spvArrayCopy() into a constant LUT. + // After a variable has been declared, we can no longer assign constant arrays in MSL unfortunately. + statement(to_expression(id_lhs), " = ", constant_expression(get<SPIRConstant>(id_rhs)), ";"); + return true; + } + + // Ensure the LHS variable has been declared + auto *p_v_lhs = maybe_get_backing_variable(id_lhs); + if (p_v_lhs) + flush_variable_declaration(p_v_lhs->self); + + emit_array_copy(to_expression(id_lhs), id_rhs); + register_write(id_lhs); + + return true; +} + +// Emits one of the atomic functions. In MSL, the atomic functions operate on pointers +void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, + uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1, + bool op1_is_pointer, bool op1_is_literal, uint32_t op2) +{ + forced_temporaries.insert(result_id); + + string exp = string(op) + "("; + + auto &type = get_pointee_type(expression_type(obj)); + exp += "(volatile "; + auto *var = maybe_get_backing_variable(obj); + if (!var) + SPIRV_CROSS_THROW("No backing variable for atomic operation."); + exp += get_argument_address_space(*var); + exp += " atomic_"; + exp += type_to_glsl(type); + exp += "*)"; + + exp += "&"; + exp += to_enclosed_expression(obj); + + bool is_atomic_compare_exchange_strong = op1_is_pointer && op1; + + if (is_atomic_compare_exchange_strong) + { + assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0); + assert(op2); + assert(has_mem_order_2); + exp += ", &"; + exp += to_name(result_id); + exp += ", "; + exp += to_expression(op2); + exp += ", "; + exp += get_memory_order(mem_order_1); + exp += ", "; + exp += get_memory_order(mem_order_2); + exp += ")"; + + // MSL only supports the weak atomic compare exchange, + // so emit a CAS loop here. + statement(variable_decl(type, to_name(result_id)), ";"); + statement("do"); + begin_scope(); + statement(to_name(result_id), " = ", to_expression(op1), ";"); + end_scope_decl(join("while (!", exp, ")")); + set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + } + else + { + assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0); + if (op1) + { + if (op1_is_literal) + exp += join(", ", op1); + else + exp += ", " + to_expression(op1); + } + if (op2) + exp += ", " + to_expression(op2); + + exp += string(", ") + get_memory_order(mem_order_1); + if (has_mem_order_2) + exp += string(", ") + get_memory_order(mem_order_2); + + exp += ")"; + emit_op(result_type, result_id, exp, false); + } + + flush_all_atomic_capable_variables(); +} + +// Metal only supports relaxed memory order for now +const char *CompilerMSL::get_memory_order(uint32_t) +{ + return "memory_order_relaxed"; +} + +// Override for MSL-specific extension syntax instructions +void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) +{ + auto op = static_cast<GLSLstd450>(eop); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "rsqrt"); + break; + case GLSLstd450RoundEven: + emit_unary_func_op(result_type, id, args[0], "rint"); + break; + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findSMSB", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findUMSB", uint_type, uint_type); + break; + + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_snorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "pack_float_to_unorm2x16"); + break; + + case GLSLstd450PackHalf2x16: + { + auto expr = join("as_type<uint>(half2(", to_expression(args[0]), "))"); + emit_op(result_type, id, expr, should_forward(args[0])); + inherit_expression_dependencies(id, args[0]); + break; + } + + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpack_snorm4x8_to_float"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpack_unorm4x8_to_float"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpack_snorm2x16_to_float"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpack_unorm2x16_to_float"); + break; + + case GLSLstd450UnpackHalf2x16: + { + auto expr = join("float2(as_type<half2>(", to_expression(args[0]), "))"); + emit_op(result_type, id, expr, should_forward(args[0])); + inherit_expression_dependencies(id, args[0]); + break; + } + + case GLSLstd450PackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450PackDouble2x32"); // Currently unsupported + break; + case GLSLstd450UnpackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unsupported_GLSLstd450UnpackDouble2x32"); // Currently unsupported + break; + + case GLSLstd450MatrixInverse: + { + auto &mat_type = get<SPIRType>(result_type); + switch (mat_type.columns) + { + case 2: + emit_unary_func_op(result_type, id, args[0], "spvInverse2x2"); + break; + case 3: + emit_unary_func_op(result_type, id, args[0], "spvInverse3x3"); + break; + case 4: + emit_unary_func_op(result_type, id, args[0], "spvInverse4x4"); + break; + default: + break; + } + break; + } + + case GLSLstd450FMin: + // If the result type isn't float, don't bother calling the specific + // precise::/fast:: version. Metal doesn't have those for half and + // double types. + if (get<SPIRType>(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "fast::min"); + break; + + case GLSLstd450FMax: + if (get<SPIRType>(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "fast::max"); + break; + + case GLSLstd450FClamp: + // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call. + if (get<SPIRType>(result_type).basetype != SPIRType::Float) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fast::clamp"); + break; + + case GLSLstd450NMin: + if (get<SPIRType>(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "precise::min"); + break; + + case GLSLstd450NMax: + if (get<SPIRType>(result_type).basetype != SPIRType::Float) + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + else + emit_binary_func_op(result_type, id, args[0], args[1], "precise::max"); + break; + + case GLSLstd450NClamp: + // TODO: If args[1] is 0 and args[2] is 1, emit a saturate() call. + if (get<SPIRType>(result_type).basetype != SPIRType::Float) + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "precise::clamp"); + break; + + // TODO: + // GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier) + // GLSLstd450InterpolateAtSample (sample_no_perspective qualifier) + // GLSLstd450InterpolateAtOffset + + default: + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } +} + +// Emit a structure declaration for the specified interface variable. +void CompilerMSL::emit_interface_block(uint32_t ib_var_id) +{ + if (ib_var_id) + { + auto &ib_var = get<SPIRVariable>(ib_var_id); + auto &ib_type = get_variable_data_type(ib_var); + assert(ib_type.basetype == SPIRType::Struct && !ib_type.member_types.empty()); + emit_struct(ib_type); + } +} + +// Emits the declaration signature of the specified function. +// If this is the entry point function, Metal-specific return value and function arguments are added. +void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + local_variable_names = resource_names; + string decl; + + processing_entry_point = (func.self == ir.default_entry_point); + + auto &type = get<SPIRType>(func.return_type); + + if (type.array.empty()) + { + decl += func_type_decl(type); + } + else + { + // We cannot return arrays in MSL, so "return" through an out variable. + decl = "void"; + } + + decl += " "; + decl += to_name(func.self); + decl += "("; + + if (!type.array.empty()) + { + // Fake arrays returns by writing to an out array instead. + decl += "thread "; + decl += type_to_glsl(type); + decl += " (&SPIRV_Cross_return_value)"; + decl += type_to_array_glsl(type); + if (!func.arguments.empty()) + decl += ", "; + } + + if (processing_entry_point) + { + if (msl_options.argument_buffers) + decl += entry_point_args_argument_buffer(!func.arguments.empty()); + else + decl += entry_point_args_classic(!func.arguments.empty()); + + // If entry point function has variables that require early declaration, + // ensure they each have an empty initializer, creating one if needed. + // This is done at this late stage because the initialization expression + // is cleared after each compilation pass. + for (auto var_id : vars_needing_early_declaration) + { + auto &ed_var = get<SPIRVariable>(var_id); + uint32_t &initializer = ed_var.initializer; + if (!initializer) + initializer = ir.increase_bound_by(1); + + // Do not override proper initializers. + if (ir.ids[initializer].get_type() == TypeNone || ir.ids[initializer].get_type() == TypeExpression) + set<SPIRExpression>(ed_var.initializer, "{}", ed_var.basetype, true); + } + } + + for (auto &arg : func.arguments) + { + uint32_t name_id = arg.id; + + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + { + // If we need to modify the name of the variable, make sure we modify the original variable. + // Our alias is just a shadow variable. + if (arg.alias_global_variable && var->basevariable) + name_id = var->basevariable; + + var->parameter = &arg; // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + } + + add_local_variable_name(name_id); + + decl += argument_decl(arg); + + // Manufacture automatic sampler arg for SampledImage texture + auto &arg_type = get<SPIRType>(arg.type); + if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer) + decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id)); + + // Manufacture automatic swizzle arg. + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type)) + decl += join(", constant uint32_t& ", to_swizzle_expression(arg.id)); + + if (&arg != &func.arguments.back()) + decl += ", "; + } + + decl += ")"; + statement(decl); +} + +// Returns the texture sampling function string for the specified image and sampling characteristics. +string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool, + bool has_offset, bool, bool has_dref, uint32_t) +{ + // Special-case gather. We have to alter the component being looked up + // in the swizzle case. + if (msl_options.swizzle_texture_samples && is_gather) + { + string fname = imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; + fname += "<" + type_to_glsl(get<SPIRType>(imgtype.image.type)) + ", metal::" + type_to_glsl(imgtype); + // Add the arg types ourselves. Yes, this sucks, but Clang can't + // deduce template pack parameters in the middle of an argument list. + switch (imgtype.image.dim) + { + case Dim2D: + fname += ", float2"; + if (imgtype.image.arrayed) + fname += ", uint"; + if (imgtype.image.depth) + fname += ", float"; + if (!imgtype.image.depth || has_offset) + fname += ", int2"; + break; + case DimCube: + fname += ", float3"; + if (imgtype.image.arrayed) + fname += ", uint"; + if (imgtype.image.depth) + fname += ", float"; + break; + default: + SPIRV_CROSS_THROW("Invalid texture dimension for gather op."); + } + fname += ">"; + return fname; + } + + auto *combined = maybe_get<SPIRCombinedImageSampler>(img); + + // Texture reference + string fname = to_expression(combined ? combined->image : img) + "."; + if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype)) + fname = "spvTextureSwizzle(" + fname; + + // Texture function and sampler + if (is_fetch) + fname += "read"; + else if (is_gather) + fname += "gather"; + else + fname += "sample"; + + if (has_dref) + fname += "_compare"; + + return fname; +} + +// Returns the function args for a texture sampling function for the specified image and sampling characteristics. +string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, + uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y, + uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, + uint32_t sample, bool *p_forward) +{ + string farg_str; + if (!is_fetch) + farg_str += to_sampler_expression(img); + + if (msl_options.swizzle_texture_samples && is_gather) + { + if (!farg_str.empty()) + farg_str += ", "; + + auto *combined = maybe_get<SPIRCombinedImageSampler>(img); + farg_str += to_expression(combined ? combined->image : img); + } + + // Texture coordinates + bool forward = should_forward(coord); + auto coord_expr = to_enclosed_expression(coord); + auto &coord_type = expression_type(coord); + bool coord_is_fp = type_is_floating_point(coord_type); + bool is_cube_fetch = false; + + string tex_coords = coord_expr; + uint32_t alt_coord_component = 0; + + switch (imgtype.image.dim) + { + + case Dim1D: + if (coord_type.vecsize > 1) + tex_coords = enclose_expression(tex_coords) + ".x"; + + if (is_fetch) + tex_coords = "uint(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + + alt_coord_component = 1; + break; + + case DimBuffer: + if (coord_type.vecsize > 1) + tex_coords = enclose_expression(tex_coords) + ".x"; + + // Metal texel buffer textures are 2D, so convert 1D coord to 2D. + if (is_fetch) + tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + + alt_coord_component = 1; + break; + + case DimSubpassData: + if (imgtype.image.ms) + tex_coords = "uint2(gl_FragCoord.xy)"; + else + tex_coords = join("uint2(gl_FragCoord.xy), 0"); + break; + + case Dim2D: + if (coord_type.vecsize > 2) + tex_coords = enclose_expression(tex_coords) + ".xy"; + + if (is_fetch) + tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + + alt_coord_component = 2; + break; + + case Dim3D: + if (coord_type.vecsize > 3) + tex_coords = enclose_expression(tex_coords) + ".xyz"; + + if (is_fetch) + tex_coords = "uint3(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + + alt_coord_component = 3; + break; + + case DimCube: + if (is_fetch) + { + is_cube_fetch = true; + tex_coords += ".xy"; + tex_coords = "uint2(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + } + else + { + if (coord_type.vecsize > 3) + tex_coords = enclose_expression(tex_coords) + ".xyz"; + } + + alt_coord_component = 3; + break; + + default: + break; + } + + if (is_fetch && offset) + { + // Fetch offsets must be applied directly to the coordinate. + forward = forward && should_forward(offset); + auto &type = expression_type(offset); + if (type.basetype != SPIRType::UInt) + tex_coords += " + " + bitcast_expression(SPIRType::UInt, offset); + else + tex_coords += " + " + to_enclosed_expression(offset); + } + else if (is_fetch && coffset) + { + // Fetch offsets must be applied directly to the coordinate. + forward = forward && should_forward(coffset); + auto &type = expression_type(coffset); + if (type.basetype != SPIRType::UInt) + tex_coords += " + " + bitcast_expression(SPIRType::UInt, coffset); + else + tex_coords += " + " + to_enclosed_expression(coffset); + } + + // If projection, use alt coord as divisor + if (is_proj) + tex_coords += " / " + to_extract_component_expression(coord, alt_coord_component); + + if (!farg_str.empty()) + farg_str += ", "; + farg_str += tex_coords; + + // If fetch from cube, add face explicitly + if (is_cube_fetch) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.arrayed) + farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") % 6u"; + else + farg_str += ", uint(" + round_fp_tex_coords(to_extract_component_expression(coord, 2), coord_is_fp) + ")"; + } + + // If array, use alt coord + if (imgtype.image.arrayed) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.dim == DimCube && is_fetch) + farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") / 6u"; + else + farg_str += ", uint(" + + round_fp_tex_coords(to_extract_component_expression(coord, alt_coord_component), coord_is_fp) + + ")"; + } + + // Depth compare reference value + if (dref) + { + forward = forward && should_forward(dref); + farg_str += ", "; + + if (is_proj) + farg_str += + to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, alt_coord_component); + else + farg_str += to_expression(dref); + + if (msl_options.is_macos() && (grad_x || grad_y)) + { + // For sample compare, MSL does not support gradient2d for all targets (only iOS apparently according to docs). + // However, the most common case here is to have a constant gradient of 0, as that is the only way to express + // LOD == 0 in GLSL with sampler2DArrayShadow (cascaded shadow mapping). + // We will detect a compile-time constant 0 value for gradient and promote that to level(0) on MSL. + bool constant_zero_x = !grad_x || expression_is_constant_null(grad_x); + bool constant_zero_y = !grad_y || expression_is_constant_null(grad_y); + if (constant_zero_x && constant_zero_y) + { + lod = 0; + grad_x = 0; + grad_y = 0; + farg_str += ", level(0)"; + } + else + { + SPIRV_CROSS_THROW("Using non-constant 0.0 gradient() qualifier for sample_compare. This is not " + "supported in MSL macOS."); + } + } + + if (msl_options.is_macos() && bias) + { + // Bias is not supported either on macOS with sample_compare. + // Verify it is compile-time zero, and drop the argument. + if (expression_is_constant_null(bias)) + { + bias = 0; + } + else + { + SPIRV_CROSS_THROW( + "Using non-constant 0.0 bias() qualifier for sample_compare. This is not supported in MSL macOS."); + } + } + } + + // LOD Options + // Metal does not support LOD for 1D textures. + if (bias && imgtype.image.dim != Dim1D) + { + forward = forward && should_forward(bias); + farg_str += ", bias(" + to_expression(bias) + ")"; + } + + // Metal does not support LOD for 1D textures. + if (lod && imgtype.image.dim != Dim1D) + { + forward = forward && should_forward(lod); + if (is_fetch) + { + farg_str += ", " + to_expression(lod); + } + else + { + farg_str += ", level(" + to_expression(lod) + ")"; + } + } + else if (is_fetch && !lod && imgtype.image.dim != Dim1D && imgtype.image.dim != DimBuffer && !imgtype.image.ms && + imgtype.image.sampled != 2) + { + // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. + // Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL. + farg_str += ", 0"; + } + + // Metal does not support LOD for 1D textures. + if ((grad_x || grad_y) && imgtype.image.dim != Dim1D) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + string grad_opt; + switch (imgtype.image.dim) + { + case Dim2D: + grad_opt = "2d"; + break; + case Dim3D: + grad_opt = "3d"; + break; + case DimCube: + grad_opt = "cube"; + break; + default: + grad_opt = "unsupported_gradient_dimension"; + break; + } + farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")"; + } + + // Add offsets + string offset_expr; + if (coffset && !is_fetch) + { + forward = forward && should_forward(coffset); + offset_expr = to_expression(coffset); + } + else if (offset && !is_fetch) + { + forward = forward && should_forward(offset); + offset_expr = to_expression(offset); + } + + if (!offset_expr.empty()) + { + switch (imgtype.image.dim) + { + case Dim2D: + if (coord_type.vecsize > 2) + offset_expr = enclose_expression(offset_expr) + ".xy"; + + farg_str += ", " + offset_expr; + break; + + case Dim3D: + if (coord_type.vecsize > 3) + offset_expr = enclose_expression(offset_expr) + ".xyz"; + + farg_str += ", " + offset_expr; + break; + + default: + break; + } + } + + if (comp) + { + // If 2D has gather component, ensure it also has an offset arg + if (imgtype.image.dim == Dim2D && offset_expr.empty()) + farg_str += ", int2(0)"; + + forward = forward && should_forward(comp); + farg_str += ", " + to_component_argument(comp); + } + + if (sample) + { + farg_str += ", "; + farg_str += to_expression(sample); + } + + if (msl_options.swizzle_texture_samples && is_sampled_image_type(imgtype)) + { + // Add the swizzle constant from the swizzle buffer. + if (!is_gather) + farg_str += ")"; + farg_str += ", " + to_swizzle_expression(img); + used_aux_buffer = true; + } + + *p_forward = forward; + + return farg_str; +} + +// If the texture coordinates are floating point, invokes MSL round() function to round them. +string CompilerMSL::round_fp_tex_coords(string tex_coords, bool coord_is_fp) +{ + return coord_is_fp ? ("round(" + tex_coords + ")") : tex_coords; +} + +// Returns a string to use in an image sampling function argument. +// The ID must be a scalar constant. +string CompilerMSL::to_component_argument(uint32_t id) +{ + if (ir.ids[id].get_type() != TypeConstant) + { + SPIRV_CROSS_THROW("ID " + to_string(id) + " is not an OpConstant."); + return "component::x"; + } + + uint32_t component_index = get<SPIRConstant>(id).scalar(); + switch (component_index) + { + case 0: + return "component::x"; + case 1: + return "component::y"; + case 2: + return "component::z"; + case 3: + return "component::w"; + + default: + SPIRV_CROSS_THROW("The value (" + to_string(component_index) + ") of OpConstant ID " + to_string(id) + + " is not a valid Component index, which must be one of 0, 1, 2, or 3."); + return "component::x"; + } +} + +// Establish sampled image as expression object and assign the sampler to it. +void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + set<SPIRCombinedImageSampler>(result_id, result_type, image_id, samp_id); +} + +// Returns a string representation of the ID, usable as a function arg. +// Manufacture automatic sampler arg for SampledImage texture. +string CompilerMSL::to_func_call_arg(uint32_t id) +{ + string arg_str; + + auto *c = maybe_get<SPIRConstant>(id); + if (c && !get<SPIRType>(c->constant_type).array.empty()) + { + // If we are passing a constant array directly to a function for some reason, + // the callee will expect an argument in thread const address space + // (since we can only bind to arrays with references in MSL). + // To resolve this, we must emit a copy in this address space. + // This kind of code gen should be rare enough that performance is not a real concern. + // Inline the SPIR-V to avoid this kind of suboptimal codegen. + // + // We risk calling this inside a continue block (invalid code), + // so just create a thread local copy in the current function. + arg_str = join("_", id, "_array_copy"); + auto &constants = current_function->constant_arrays_needed_on_stack; + auto itr = find(begin(constants), end(constants), id); + if (itr == end(constants)) + { + force_recompile = true; + constants.push_back(id); + } + } + else + arg_str = CompilerGLSL::to_func_call_arg(id); + + // Manufacture automatic sampler arg if the arg is a SampledImage texture. + auto &type = expression_type(id); + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + { + // Need to check the base variable in case we need to apply a qualified alias. + uint32_t var_id = 0; + auto *sampler_var = maybe_get<SPIRVariable>(id); + if (sampler_var) + var_id = sampler_var->basevariable; + + arg_str += ", " + to_sampler_expression(var_id ? var_id : id); + } + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + arg_str += ", " + to_swizzle_expression(id); + + return arg_str; +} + +// If the ID represents a sampled image that has been assigned a sampler already, +// generate an expression for the sampler, otherwise generate a fake sampler name +// by appending a suffix to the expression constructed from the ID. +string CompilerMSL::to_sampler_expression(uint32_t id) +{ + auto *combined = maybe_get<SPIRCombinedImageSampler>(id); + auto expr = to_expression(combined ? combined->image : id); + auto index = expr.find_first_of('['); + + uint32_t samp_id = 0; + if (combined) + samp_id = combined->sampler; + + if (index == string::npos) + return samp_id ? to_expression(samp_id) : expr + sampler_name_suffix; + else + { + auto image_expr = expr.substr(0, index); + auto array_expr = expr.substr(index); + return samp_id ? to_expression(samp_id) : (image_expr + sampler_name_suffix + array_expr); + } +} + +string CompilerMSL::to_swizzle_expression(uint32_t id) +{ + auto *combined = maybe_get<SPIRCombinedImageSampler>(id); + auto expr = to_expression(combined ? combined->image : id); + auto index = expr.find_first_of('['); + + if (index == string::npos) + return expr + swizzle_name_suffix; + else + { + auto image_expr = expr.substr(0, index); + auto array_expr = expr.substr(index); + return image_expr + swizzle_name_suffix + array_expr; + } +} + +// Checks whether the type is a Block all of whose members have DecorationPatch. +bool CompilerMSL::is_patch_block(const SPIRType &type) +{ + if (!has_decoration(type.self, DecorationBlock)) + return false; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + if (!has_member_decoration(type.self, i, DecorationPatch)) + return false; + } + + return true; +} + +// Checks whether the ID is a row_major matrix that requires conversion before use +bool CompilerMSL::is_non_native_row_major_matrix(uint32_t id) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!has_decoration(id, DecorationRowMajor)) + return false; + + // Generate a function that will swap matrix elements from row-major to column-major. + // Packed row-matrix should just use transpose() function. + if (!has_extended_decoration(id, SPIRVCrossDecorationPacked)) + { + const auto type = expression_type(id); + add_convert_row_major_matrix_function(type.columns, type.vecsize); + } + + return true; +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerMSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!has_member_decoration(type.self, index, DecorationRowMajor)) + return false; + + // Generate a function that will swap matrix elements from row-major to column-major. + // Packed row-matrix should just use transpose() function. + if (!has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPacked)) + { + const auto mbr_type = get<SPIRType>(type.member_types[index]); + add_convert_row_major_matrix_function(mbr_type.columns, mbr_type.vecsize); + } + + return true; +} + +// Adds a function suitable for converting a non-square row-major matrix to a column-major matrix. +void CompilerMSL::add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows) +{ + SPVFuncImpl spv_func; + if (cols == rows) // Square matrix...just use transpose() function + return; + else if (cols == 2 && rows == 3) + spv_func = SPVFuncImplRowMajor2x3; + else if (cols == 2 && rows == 4) + spv_func = SPVFuncImplRowMajor2x4; + else if (cols == 3 && rows == 2) + spv_func = SPVFuncImplRowMajor3x2; + else if (cols == 3 && rows == 4) + spv_func = SPVFuncImplRowMajor3x4; + else if (cols == 4 && rows == 2) + spv_func = SPVFuncImplRowMajor4x2; + else if (cols == 4 && rows == 3) + spv_func = SPVFuncImplRowMajor4x3; + else + SPIRV_CROSS_THROW("Could not convert row-major matrix."); + + auto rslt = spv_function_implementations.insert(spv_func); + if (rslt.second) + { + add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + force_recompile = true; + } +} + +// Wraps the expression string in a function call that converts the +// row_major matrix result of the expression to a column_major matrix. +string CompilerMSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, bool is_packed) +{ + strip_enclosed_expression(exp_str); + + string func_name; + + // Square and packed matrices can just use transpose + if (exp_type.columns == exp_type.vecsize || is_packed) + func_name = "transpose"; + else + func_name = string("spvConvertFromRowMajor") + to_string(exp_type.columns) + "x" + to_string(exp_type.vecsize); + + return join(func_name, "(", exp_str, ")"); +} + +// Called automatically at the end of the entry point function +void CompilerMSL::emit_fixup() +{ + if ((get_execution_model() == ExecutionModelVertex || + get_execution_model() == ExecutionModelTessellationEvaluation) && + stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer) + { + if (options.vertex.fixup_clipspace) + statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name, + ".w) * 0.5; // Adjust clip-space for Metal"); + + if (options.vertex.flip_vert_y) + statement(qual_pos_var_name, ".y = -(", qual_pos_var_name, ".y);", " // Invert Y-axis for Metal"); + } +} + +// Return a string defining a structure member, with padding and packing. +string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier) +{ + auto &membertype = get<SPIRType>(member_type_id); + + // If this member requires padding to maintain alignment, emit a dummy padding member. + MSLStructMemberKey key = get_struct_member_key(type.self, index); + uint32_t pad_len = struct_member_padding[key]; + if (pad_len > 0) + statement("char _m", index, "_pad", "[", to_string(pad_len), "];"); + + // If this member is packed, mark it as so. + string pack_pfx = ""; + + const SPIRType *effective_membertype = &membertype; + SPIRType override_type; + + uint32_t orig_id = 0; + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)) + orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID); + + if (member_is_packed_type(type, index)) + { + // If we're packing a matrix, output an appropriate typedef + if (membertype.basetype == SPIRType::Struct) + { + pack_pfx = "/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ "; + } + else if (membertype.vecsize > 1 && membertype.columns > 1) + { + pack_pfx = "packed_"; + string base_type = membertype.width == 16 ? "half" : "float"; + string td_line = "typedef "; + td_line += base_type + to_string(membertype.vecsize) + "x" + to_string(membertype.columns); + td_line += " " + pack_pfx; + td_line += base_type + to_string(membertype.columns) + "x" + to_string(membertype.vecsize); + td_line += ";"; + add_typedef_line(td_line); + } + else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct) + { + // A "packed" float array, but we pad here instead to 4-vector. + override_type = membertype; + override_type.vecsize = 4; + effective_membertype = &override_type; + } + else + pack_pfx = "packed_"; + } + + // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS. + if (msl_options.is_ios() && membertype.basetype == SPIRType::Image && membertype.image.sampled == 2) + { + if (!has_decoration(orig_id, DecorationNonWritable)) + SPIRV_CROSS_THROW("Writable images are not allowed in argument buffers on iOS."); + } + + // Array information is baked into these types. + string array_type; + if (membertype.basetype != SPIRType::Image && membertype.basetype != SPIRType::Sampler && + membertype.basetype != SPIRType::SampledImage) + { + array_type = type_to_array_glsl(membertype); + } + + return join(pack_pfx, type_to_glsl(*effective_membertype, orig_id), " ", qualifier, to_member_name(type, index), + member_attribute_qualifier(type, index), array_type, ";"); +} + +// Emit a structure member, padding and packing to maintain the correct memeber alignments. +void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t) +{ + statement(to_struct_member(type, member_type_id, index, qualifier)); +} + +// Return a MSL qualifier for the specified function attribute member +string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t index) +{ + auto &execution = get_entry_point(); + + uint32_t mbr_type_id = type.member_types[index]; + auto &mbr_type = get<SPIRType>(mbr_type_id); + + BuiltIn builtin = BuiltInMax; + bool is_builtin = is_member_builtin(type, index, &builtin); + + if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID)) + return join(" [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationArgumentBufferID), + ")]]"); + + // Vertex function inputs + if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInBaseVertex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInBaseInstance: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location) + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Vertex and tessellation evaluation function outputs + if ((execution.model == ExecutionModelVertex || execution.model == ExecutionModelTessellationEvaluation) && + type.storage == StorageClassOutput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInPointSize: + // Only mark the PointSize builtin if really rendering points. + // Some shaders may include a PointSize builtin even when used to render + // non-point topologies, and Metal will reject this builtin when compiling + // the shader into a render pipeline that uses a non-point topology. + return msl_options.enable_point_size_builtin ? (string(" [[") + builtin_qualifier(builtin) + "]]") : ""; + + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + /* fallthrough */ + case BuiltInPosition: + case BuiltInLayer: + case BuiltInClipDistance: + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + + default: + return ""; + } + } + uint32_t comp; + uint32_t locn = get_ordered_member_location(type.self, index, &comp); + if (locn != k_unknown_location) + { + if (comp != k_unknown_component) + return string(" [[user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")]]"; + else + return string(" [[user(locn") + convert_to_string(locn) + ")]]"; + } + } + + // Tessellation control function inputs + if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInInvocationId: + case BuiltInPrimitiveId: + return string(" [[") + builtin_qualifier(builtin) + "]]" + (mbr_type.array.empty() ? "" : " "); + case BuiltInPatchVertices: + return ""; + // Others come from stage input. + default: + break; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location) + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Tessellation control function outputs + if (execution.model == ExecutionModelTessellationControl && type.storage == StorageClassOutput) + { + // For this type of shader, we always arrange for it to capture its + // output to a buffer. For this reason, qualifiers are irrelevant here. + return ""; + } + + // Tessellation evaluation function inputs + if (execution.model == ExecutionModelTessellationEvaluation && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInPrimitiveId: + case BuiltInTessCoord: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + case BuiltInPatchVertices: + return ""; + // Others come from stage input. + default: + break; + } + } + // The special control point array must not be marked with an attribute. + if (get_type(type.member_types[index]).basetype == SPIRType::ControlPointArray) + return ""; + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location) + return string(" [[attribute(") + convert_to_string(locn) + ")]]"; + } + + // Tessellation evaluation function outputs were handled above. + + // Fragment function inputs + if (execution.model == ExecutionModelFragment && type.storage == StorageClassInput) + { + string quals = ""; + if (is_builtin) + { + switch (builtin) + { + case BuiltInFrontFacing: + case BuiltInPointCoord: + case BuiltInFragCoord: + case BuiltInSampleId: + case BuiltInSampleMask: + case BuiltInLayer: + quals = builtin_qualifier(builtin); + + default: + break; + } + } + else + { + uint32_t comp; + uint32_t locn = get_ordered_member_location(type.self, index, &comp); + if (locn != k_unknown_location) + { + if (comp != k_unknown_component) + quals = string("user(locn") + convert_to_string(locn) + "_" + convert_to_string(comp) + ")"; + else + quals = string("user(locn") + convert_to_string(locn) + ")"; + } + } + // Don't bother decorating integers with the 'flat' attribute; it's + // the default (in fact, the only option). Also don't bother with the + // FragCoord builtin; it's always noperspective on Metal. + if (!type_is_integral(mbr_type) && (!is_builtin || builtin != BuiltInFragCoord)) + { + if (has_member_decoration(type.self, index, DecorationFlat)) + { + if (!quals.empty()) + quals += ", "; + quals += "flat"; + } + else if (has_member_decoration(type.self, index, DecorationCentroid)) + { + if (!quals.empty()) + quals += ", "; + if (has_member_decoration(type.self, index, DecorationNoPerspective)) + quals += "centroid_no_perspective"; + else + quals += "centroid_perspective"; + } + else if (has_member_decoration(type.self, index, DecorationSample)) + { + if (!quals.empty()) + quals += ", "; + if (has_member_decoration(type.self, index, DecorationNoPerspective)) + quals += "sample_no_perspective"; + else + quals += "sample_perspective"; + } + else if (has_member_decoration(type.self, index, DecorationNoPerspective)) + { + if (!quals.empty()) + quals += ", "; + quals += "center_no_perspective"; + } + } + if (!quals.empty()) + return " [[" + quals + "]]"; + } + + // Fragment function outputs + if (execution.model == ExecutionModelFragment && type.storage == StorageClassOutput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInSampleMask: + case BuiltInFragDepth: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + uint32_t locn = get_ordered_member_location(type.self, index); + if (locn != k_unknown_location && has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[color(", locn, "), index(", get_member_decoration(type.self, index, DecorationIndex), + ")]]"); + else if (locn != k_unknown_location) + return join(" [[color(", locn, ")]]"); + else if (has_member_decoration(type.self, index, DecorationIndex)) + return join(" [[index(", get_member_decoration(type.self, index, DecorationIndex), ")]]"); + else + return ""; + } + + // Compute function inputs + if (execution.model == ExecutionModelGLCompute && type.storage == StorageClassInput) + { + if (is_builtin) + { + switch (builtin) + { + case BuiltInGlobalInvocationId: + case BuiltInWorkgroupId: + case BuiltInNumWorkgroups: + case BuiltInLocalInvocationId: + case BuiltInLocalInvocationIndex: + return string(" [[") + builtin_qualifier(builtin) + "]]"; + + default: + return ""; + } + } + } + + return ""; +} + +// Returns the location decoration of the member with the specified index in the specified type. +// If the location of the member has been explicitly set, that location is used. If not, this +// function assumes the members are ordered in their location order, and simply returns the +// index as the location. +uint32_t CompilerMSL::get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp) +{ + auto &m = ir.meta[type_id]; + if (index < m.members.size()) + { + auto &dec = m.members[index]; + if (comp) + { + if (dec.decoration_flags.get(DecorationComponent)) + *comp = dec.component; + else + *comp = k_unknown_component; + } + if (dec.decoration_flags.get(DecorationLocation)) + return dec.location; + } + + return index; +} + +// Returns the type declaration for a function, including the +// entry type if the current function is the entry point function +string CompilerMSL::func_type_decl(SPIRType &type) +{ + // The regular function return type. If not processing the entry point function, that's all we need + string return_type = type_to_glsl(type) + type_to_array_glsl(type); + if (!processing_entry_point) + return return_type; + + // If an outgoing interface block has been defined, and it should be returned, override the entry point return type + bool ep_should_return_output = !get_is_rasterization_disabled(); + if (stage_out_var_id && ep_should_return_output) + return_type = type_to_glsl(get_stage_out_struct_type()) + type_to_array_glsl(type); + + // Prepend a entry type, based on the execution model + string entry_type; + auto &execution = get_entry_point(); + switch (execution.model) + { + case ExecutionModelVertex: + entry_type = "vertex"; + break; + case ExecutionModelTessellationEvaluation: + if (!msl_options.supports_msl_version(1, 2)) + SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); + if (execution.flags.get(ExecutionModeIsolines)) + SPIRV_CROSS_THROW("Metal does not support isoline tessellation."); + if (msl_options.is_ios()) + entry_type = + join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ") ]] vertex"); + else + entry_type = join("[[ patch(", execution.flags.get(ExecutionModeTriangles) ? "triangle" : "quad", ", ", + execution.output_vertices, ") ]] vertex"); + break; + case ExecutionModelFragment: + entry_type = + execution.flags.get(ExecutionModeEarlyFragmentTests) ? "[[ early_fragment_tests ]] fragment" : "fragment"; + break; + case ExecutionModelTessellationControl: + if (!msl_options.supports_msl_version(1, 2)) + SPIRV_CROSS_THROW("Tessellation requires Metal 1.2."); + if (execution.flags.get(ExecutionModeIsolines)) + SPIRV_CROSS_THROW("Metal does not support isoline tessellation."); + /* fallthrough */ + case ExecutionModelGLCompute: + case ExecutionModelKernel: + entry_type = "kernel"; + break; + default: + entry_type = "unknown"; + break; + } + + return entry_type + " " + return_type; +} + +// In MSL, address space qualifiers are required for all pointer or reference variables +string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) +{ + const auto &type = get<SPIRType>(argument.basetype); + + switch (type.storage) + { + case StorageClassWorkgroup: + return "threadgroup"; + + case StorageClassStorageBuffer: + { + // For arguments from variable pointers, we use the write count deduction, so + // we should not assume any constness here. Only for global SSBOs. + bool readonly = false; + if (has_decoration(type.self, DecorationBlock)) + readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); + + return readonly ? "const device" : "device"; + } + + case StorageClassUniform: + case StorageClassUniformConstant: + case StorageClassPushConstant: + if (type.basetype == SPIRType::Struct) + { + bool ssbo = has_decoration(type.self, DecorationBufferBlock); + if (ssbo) + { + bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); + return readonly ? "const device" : "device"; + } + else + return "constant"; + } + break; + + case StorageClassFunction: + case StorageClassGeneric: + // No address space for plain values. + return type.pointer ? "thread" : ""; + + case StorageClassInput: + if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id) + return "threadgroup"; + break; + + case StorageClassOutput: + if (capture_output_to_buffer) + return "device"; + break; + + default: + break; + } + + return "thread"; +} + +string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id) +{ + switch (type.storage) + { + case StorageClassWorkgroup: + return "threadgroup"; + + case StorageClassStorageBuffer: + { + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + + return flags.get(DecorationNonWritable) ? "const device" : "device"; + } + + case StorageClassUniform: + case StorageClassUniformConstant: + case StorageClassPushConstant: + if (type.basetype == SPIRType::Struct) + { + bool ssbo = has_decoration(type.self, DecorationBufferBlock); + if (ssbo) + { + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + + return flags.get(DecorationNonWritable) ? "const device" : "device"; + } + else + return "constant"; + } + break; + + case StorageClassFunction: + case StorageClassGeneric: + // No address space for plain values. + return type.pointer ? "thread" : ""; + + case StorageClassOutput: + if (capture_output_to_buffer) + return "device"; + break; + + default: + break; + } + + return "thread"; +} + +string CompilerMSL::entry_point_arg_stage_in() +{ + string decl; + + // Stage-in structure + uint32_t stage_in_id; + if (get_execution_model() == ExecutionModelTessellationEvaluation) + stage_in_id = patch_stage_in_var_id; + else + stage_in_id = stage_in_var_id; + + if (stage_in_id) + { + auto &var = get<SPIRVariable>(stage_in_id); + auto &type = get_variable_data_type(var); + + add_resource_name(var.self); + decl = join(type_to_glsl(type), " ", to_name(var.self), " [[stage_in]]"); + } + + return decl; +} + +void CompilerMSL::entry_point_args_builtin(string &ep_args) +{ + // Builtin variables + ir.for_each_typed_id<SPIRVariable>([&](uint32_t var_id, SPIRVariable &var) { + BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type; + + // Don't emit SamplePosition as a separate parameter. In the entry + // point, we get that by calling get_sample_position() on the sample ID. + if (var.storage == StorageClassInput && is_builtin_variable(var) && + get_variable_data_type(var).basetype != SPIRType::Struct && + get_variable_data_type(var).basetype != SPIRType::ControlPointArray) + { + if (bi_type != BuiltInSamplePosition && bi_type != BuiltInHelperInvocation && + bi_type != BuiltInPatchVertices && bi_type != BuiltInTessLevelInner && + bi_type != BuiltInTessLevelOuter && bi_type != BuiltInPosition && bi_type != BuiltInPointSize && + bi_type != BuiltInClipDistance && bi_type != BuiltInCullDistance) + { + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id); + ep_args += " [[" + builtin_qualifier(bi_type) + "]]"; + } + } + }); + + // Vertex and instance index built-ins + if (needs_vertex_idx_arg) + ep_args += built_in_func_arg(BuiltInVertexIndex, !ep_args.empty()); + + if (needs_instance_idx_arg) + ep_args += built_in_func_arg(BuiltInInstanceIndex, !ep_args.empty()); + + if (capture_output_to_buffer) + { + // Add parameters to hold the indirect draw parameters and the shader output. This has to be handled + // specially because it needs to be a pointer, not a reference. + if (stage_out_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("device ", type_to_glsl(get_stage_out_struct_type()), "* ", output_buffer_var_name, + " [[buffer(", msl_options.shader_output_buffer_index, ")]]"); + } + + if (stage_out_var_id || get_execution_model() == ExecutionModelTessellationControl) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += + join("device uint* spvIndirectParams [[buffer(", msl_options.indirect_params_buffer_index, ")]]"); + } + + // Tessellation control shaders get three additional parameters: + // a buffer to hold the per-patch data, a buffer to hold the per-patch + // tessellation levels, and a block of workgroup memory to hold the + // input control point data. + if (get_execution_model() == ExecutionModelTessellationControl) + { + if (patch_stage_out_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += + join("device ", type_to_glsl(get_patch_stage_out_struct_type()), "* ", patch_output_buffer_var_name, + " [[buffer(", convert_to_string(msl_options.shader_patch_output_buffer_index), ")]]"); + } + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("device ", get_tess_factor_struct_name(), "* ", tess_factor_buffer_var_name, " [[buffer(", + convert_to_string(msl_options.shader_tess_factor_buffer_index), ")]]"); + if (stage_in_var_id) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join("threadgroup ", type_to_glsl(get_stage_in_struct_type()), "* ", input_wg_var_name, + " [[threadgroup(", convert_to_string(msl_options.shader_input_wg_index), ")]]"); + } + } + } +} + +string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) +{ + string ep_args = entry_point_arg_stage_in(); + + for (uint32_t i = 0; i < kMaxArgumentBuffers; i++) + { + uint32_t id = argument_buffer_ids[i]; + if (id == 0) + continue; + + add_resource_name(id); + auto &var = get<SPIRVariable>(id); + auto &type = get_variable_data_type(var); + + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id); + ep_args += " [[buffer(" + convert_to_string(i) + ")]]"; + + // Makes it more practical for testing, since the push constant block can occupy the first available + // buffer slot if it's not bound explicitly. + next_metal_resource_index_buffer = i + 1; + } + + entry_point_args_discrete_descriptors(ep_args); + entry_point_args_builtin(ep_args); + + if (!ep_args.empty() && append_comma) + ep_args += ", "; + + return ep_args; +} + +void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) +{ + // Output resources, sorted by resource index & type + // We need to sort to work around a bug on macOS 10.13 with NVidia drivers where switching between shaders + // with different order of buffers can result in issues with buffer assignments inside the driver. + struct Resource + { + SPIRVariable *var; + string name; + SPIRType::BaseType basetype; + uint32_t index; + }; + + vector<Resource> resources; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && + !is_hidden_variable(var)) + { + auto &type = get_variable_data_type(var); + uint32_t var_id = var.self; + + if (var.storage != StorageClassPushConstant) + { + uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); + if (descriptor_set_is_argument_buffer(desc_set)) + return; + } + + if (type.basetype == SPIRType::SampledImage) + { + add_resource_name(var_id); + resources.push_back( + { &var, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) }); + + if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0) + { + resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler, + get_metal_resource_index(var, SPIRType::Sampler) }); + } + } + else if (constexpr_samplers.count(var_id) == 0) + { + // constexpr samplers are not declared as resources. + add_resource_name(var_id); + resources.push_back( + { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + } + } + }); + + sort(resources.begin(), resources.end(), [](const Resource &lhs, const Resource &rhs) { + return tie(lhs.basetype, lhs.index) < tie(rhs.basetype, rhs.index); + }); + + for (auto &r : resources) + { + auto &var = *r.var; + auto &type = get_variable_data_type(var); + + uint32_t var_id = var.self; + + switch (r.basetype) + { + case SPIRType::Struct: + { + auto &m = ir.meta[type.self]; + if (m.members.size() == 0) + break; + if (!type.array.empty()) + { + if (type.array.size() > 1) + SPIRV_CROSS_THROW("Arrays of arrays of buffers are not supported."); + + // Metal doesn't directly support this, so we must expand the + // array. We'll declare a local array to hold these elements + // later. + uint32_t array_size = to_array_size_literal(type); + + if (array_size == 0) + SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL."); + + buffer_arrays.push_back(var_id); + for (uint32_t i = 0; i < array_size; ++i) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" + + convert_to_string(i); + ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]"; + } + } + else + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + } + break; + } + case SPIRType::Sampler: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += sampler_type(type) + " " + r.name; + ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]"; + break; + case SPIRType::Image: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += image_type_glsl(type, var_id) + " " + r.name; + ep_args += " [[texture(" + convert_to_string(r.index) + ")]]"; + break; + default: + SPIRV_CROSS_THROW("Unexpected resource type"); + break; + } + } +} + +// Returns a string containing a comma-delimited list of args for the entry point function +// This is the "classic" method of MSL 1 when we don't have argument buffer support. +string CompilerMSL::entry_point_args_classic(bool append_comma) +{ + string ep_args = entry_point_arg_stage_in(); + entry_point_args_discrete_descriptors(ep_args); + entry_point_args_builtin(ep_args); + + if (!ep_args.empty() && append_comma) + ep_args += ", "; + + return ep_args; +} + +void CompilerMSL::fix_up_shader_inputs_outputs() +{ + // Look for sampled images. Add hooks to set up the swizzle constants. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = get_variable_data_type(var); + + uint32_t var_id = var.self; + + if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && + !is_hidden_variable(var)) + { + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + { + auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point); + entry_func.fixup_hooks_in.push_back([this, &var, var_id]() { + auto &aux_type = expression_type(aux_buffer_id); + statement("constant uint32_t& ", to_swizzle_expression(var_id), " = ", to_name(aux_buffer_id), ".", + to_member_name(aux_type, k_aux_mbr_idx_swizzle_const), "[", + convert_to_string(get_metal_resource_index(var, SPIRType::Image)), "];"); + }); + } + } + }); + + // Builtin variables + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + uint32_t var_id = var.self; + BuiltIn bi_type = ir.meta[var_id].decoration.builtin_type; + + if (var.storage == StorageClassInput && is_builtin_variable(var)) + { + auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point); + switch (bi_type) + { + case BuiltInSamplePosition: + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = get_sample_position(", + to_expression(builtin_sample_id_id), ");"); + }); + break; + case BuiltInHelperInvocation: + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("simd_is_helper_thread() is only supported on macOS."); + else if (msl_options.is_macos() && !msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("simd_is_helper_thread() requires version 2.1 on macOS."); + + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_is_helper_thread();"); + }); + break; + case BuiltInPatchVertices: + if (get_execution_model() == ExecutionModelTessellationEvaluation) + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ", + to_expression(patch_stage_in_var_id), ".gl_in.size();"); + }); + else + entry_func.fixup_hooks_in.push_back([=]() { + statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = spvIndirectParams[0];"); + }); + break; + case BuiltInTessCoord: + // Emit a fixup to account for the shifted domain. Don't do this for triangles; + // MoltenVK will just reverse the winding order instead. + if (msl_options.tess_domain_origin_lower_left && !get_entry_point().flags.get(ExecutionModeTriangles)) + { + string tc = to_expression(var_id); + entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); }); + } + break; + default: + break; + } + } + }); +} + +// Returns the Metal index of the resource of the specified type as used by the specified variable. +uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype) +{ + auto &execution = get_entry_point(); + auto &var_dec = ir.meta[var.self].decoration; + uint32_t var_desc_set = (var.storage == StorageClassPushConstant) ? kPushConstDescSet : var_dec.set; + uint32_t var_binding = (var.storage == StorageClassPushConstant) ? kPushConstBinding : var_dec.binding; + + // If a matching binding has been specified, find and use it + auto itr = find_if(begin(resource_bindings), end(resource_bindings), + [&](const pair<MSLResourceBinding, bool> &resource) -> bool { + return var_desc_set == resource.first.desc_set && var_binding == resource.first.binding && + execution.model == resource.first.stage; + }); + + if (itr != end(resource_bindings)) + { + itr->second = true; + switch (basetype) + { + case SPIRType::Struct: + return itr->first.msl_buffer; + case SPIRType::Image: + return itr->first.msl_texture; + case SPIRType::Sampler: + return itr->first.msl_sampler; + default: + return 0; + } + } + + // If there is no explicit mapping of bindings to MSL, use the declared binding. + if (has_decoration(var.self, DecorationBinding)) + return get_decoration(var.self, DecorationBinding); + + uint32_t binding_stride = 1; + auto &type = get<SPIRType>(var.basetype); + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + binding_stride *= type.array_size_literal[i] ? type.array[i] : get<SPIRConstant>(type.array[i]).scalar(); + + // If a binding has not been specified, revert to incrementing resource indices + uint32_t resource_index; + switch (basetype) + { + case SPIRType::Struct: + resource_index = next_metal_resource_index_buffer; + next_metal_resource_index_buffer += binding_stride; + break; + case SPIRType::Image: + resource_index = next_metal_resource_index_texture; + next_metal_resource_index_texture += binding_stride; + break; + case SPIRType::Sampler: + resource_index = next_metal_resource_index_sampler; + next_metal_resource_index_sampler += binding_stride; + break; + default: + resource_index = 0; + break; + } + return resource_index; +} + +string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + auto &var = get<SPIRVariable>(arg.id); + auto &type = get_variable_data_type(var); + auto &var_type = get<SPIRType>(arg.type); + StorageClass storage = var_type.storage; + bool is_pointer = var_type.pointer; + + // If we need to modify the name of the variable, make sure we use the original variable. + // Our alias is just a shadow variable. + uint32_t name_id = var.self; + if (arg.alias_global_variable && var.basevariable) + name_id = var.basevariable; + + bool constref = !arg.alias_global_variable && is_pointer && arg.write_count == 0; + + bool type_is_image = type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::Sampler; + + // Arrays of images/samplers in MSL are always const. + if (!type.array.empty() && type_is_image) + constref = true; + + string decl; + if (constref) + decl += "const "; + + bool builtin = is_builtin_variable(var); + if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id) + decl += type_to_glsl(type, arg.id); + else if (builtin) + decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn))); + else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) + decl += join(type_to_glsl(type, arg.id), "*"); + else + decl += type_to_glsl(type, arg.id); + + bool opaque_handle = storage == StorageClassUniformConstant; + + string address_space = get_argument_address_space(var); + + if (!builtin && !opaque_handle && !is_pointer && + (storage == StorageClassFunction || storage == StorageClassGeneric)) + { + // If the argument is a pure value and not an opaque type, we will pass by value. + if (is_array(type)) + { + // We are receiving an array by value. This is problematic. + // We cannot be sure of the target address space since we are supposed to receive a copy, + // but this is not possible with MSL without some extra work. + // We will have to assume we're getting a reference in thread address space. + // If we happen to get a reference in constant address space, the caller must emit a copy and pass that. + // Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from + // non-constant arrays, but we can create thread const from constant. + decl = string("thread const ") + decl; + decl += " (&"; + decl += to_expression(name_id); + decl += ")"; + decl += type_to_array_glsl(type); + } + else + { + if (!address_space.empty()) + decl = join(address_space, " ", decl); + decl += " "; + decl += to_expression(name_id); + } + } + else if (is_array(type) && !type_is_image) + { + // Arrays of images and samplers are special cased. + if (!address_space.empty()) + decl = join(address_space, " ", decl); + + if (msl_options.argument_buffers) + { + // An awkward case where we need to emit *more* address space declarations (yay!). + // An example is where we pass down an array of buffer pointers to leaf functions. + // It's a constant array containing pointers to constants. + // The pointer array is always constant however. E.g. + // device SSBO * constant (&array)[N]. + // const device SSBO * constant (&array)[N]. + // constant SSBO * constant (&array)[N]. + // However, this only matters for argument buffers, since for MSL 1.0 style codegen, + // we emit the buffer array on stack instead, and that seems to work just fine apparently. + if (storage == StorageClassUniform || storage == StorageClassStorageBuffer) + decl += " constant"; + } + + decl += " (&"; + decl += to_expression(name_id); + decl += ")"; + decl += type_to_array_glsl(type); + } + else if (!opaque_handle) + { + // If this is going to be a reference to a variable pointer, the address space + // for the reference has to go before the '&', but after the '*'. + if (!address_space.empty()) + { + if (decl.back() == '*') + decl += join(" ", address_space, " "); + else + decl = join(address_space, " ", decl); + } + decl += "&"; + decl += " "; + decl += to_expression(name_id); + } + else + { + if (!address_space.empty()) + decl = join(address_space, " ", decl); + decl += " "; + decl += to_expression(name_id); + } + + return decl; +} + +// If we're currently in the entry point function, and the object +// has a qualified name, use it, otherwise use the standard name. +string CompilerMSL::to_name(uint32_t id, bool allow_alias) const +{ + if (current_function && (current_function->self == ir.default_entry_point)) + { + auto *m = ir.find_meta(id); + if (m && !m->decoration.qualified_alias.empty()) + return m->decoration.qualified_alias; + } + return Compiler::to_name(id, allow_alias); +} + +// Returns a name that combines the name of the struct with the name of the member, except for Builtins +string CompilerMSL::to_qualified_member_name(const SPIRType &type, uint32_t index) +{ + // Don't qualify Builtin names because they are unique and are treated as such when building expressions + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) + return builtin_to_glsl(builtin, type.storage); + + // Strip any underscore prefix from member name + string mbr_name = to_member_name(type, index); + size_t startPos = mbr_name.find_first_not_of("_"); + mbr_name = (startPos != string::npos) ? mbr_name.substr(startPos) : ""; + return join(to_name(type.self), "_", mbr_name); +} + +// Ensures that the specified name is permanently usable by prepending a prefix +// if the first chars are _ and a digit, which indicate a transient name. +string CompilerMSL::ensure_valid_name(string name, string pfx) +{ + return (name.size() >= 2 && name[0] == '_' && isdigit(name[1])) ? (pfx + name) : name; +} + +// Replace all names that match MSL keywords or Metal Standard Library functions. +void CompilerMSL::replace_illegal_names() +{ + // FIXME: MSL and GLSL are doing two different things here. + // Agree on convention and remove this override. + static const unordered_set<string> keywords = { + "kernel", + "vertex", + "fragment", + "compute", + "bias", + "assert", + "VARIABLE_TRACEPOINT", + "STATIC_DATA_TRACEPOINT", + "STATIC_DATA_TRACEPOINT_V", + "METAL_ALIGN", + "METAL_ASM", + "METAL_CONST", + "METAL_DEPRECATED", + "METAL_ENABLE_IF", + "METAL_FUNC", + "METAL_INTERNAL", + "METAL_NON_NULL_RETURN", + "METAL_NORETURN", + "METAL_NOTHROW", + "METAL_PURE", + "METAL_UNAVAILABLE", + "METAL_IMPLICIT", + "METAL_EXPLICIT", + "METAL_CONST_ARG", + "METAL_ARG_UNIFORM", + "METAL_ZERO_ARG", + "METAL_VALID_LOD_ARG", + "METAL_VALID_LEVEL_ARG", + "METAL_VALID_STORE_ORDER", + "METAL_VALID_LOAD_ORDER", + "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER", + "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS", + "METAL_VALID_RENDER_TARGET", + "is_function_constant_defined", + "CHAR_BIT", + "SCHAR_MAX", + "SCHAR_MIN", + "UCHAR_MAX", + "CHAR_MAX", + "CHAR_MIN", + "USHRT_MAX", + "SHRT_MAX", + "SHRT_MIN", + "UINT_MAX", + "INT_MAX", + "INT_MIN", + "FLT_DIG", + "FLT_MANT_DIG", + "FLT_MAX_10_EXP", + "FLT_MAX_EXP", + "FLT_MIN_10_EXP", + "FLT_MIN_EXP", + "FLT_RADIX", + "FLT_MAX", + "FLT_MIN", + "FLT_EPSILON", + "FP_ILOGB0", + "FP_ILOGBNAN", + "MAXFLOAT", + "HUGE_VALF", + "INFINITY", + "NAN", + "M_E_F", + "M_LOG2E_F", + "M_LOG10E_F", + "M_LN2_F", + "M_LN10_F", + "M_PI_F", + "M_PI_2_F", + "M_PI_4_F", + "M_1_PI_F", + "M_2_PI_F", + "M_2_SQRTPI_F", + "M_SQRT2_F", + "M_SQRT1_2_F", + "HALF_DIG", + "HALF_MANT_DIG", + "HALF_MAX_10_EXP", + "HALF_MAX_EXP", + "HALF_MIN_10_EXP", + "HALF_MIN_EXP", + "HALF_RADIX", + "HALF_MAX", + "HALF_MIN", + "HALF_EPSILON", + "MAXHALF", + "HUGE_VALH", + "M_E_H", + "M_LOG2E_H", + "M_LOG10E_H", + "M_LN2_H", + "M_LN10_H", + "M_PI_H", + "M_PI_2_H", + "M_PI_4_H", + "M_1_PI_H", + "M_2_PI_H", + "M_2_SQRTPI_H", + "M_SQRT2_H", + "M_SQRT1_2_H", + "DBL_DIG", + "DBL_MANT_DIG", + "DBL_MAX_10_EXP", + "DBL_MAX_EXP", + "DBL_MIN_10_EXP", + "DBL_MIN_EXP", + "DBL_RADIX", + "DBL_MAX", + "DBL_MIN", + "DBL_EPSILON", + "HUGE_VAL", + "M_E", + "M_LOG2E", + "M_LOG10E", + "M_LN2", + "M_LN10", + "M_PI", + "M_PI_2", + "M_PI_4", + "M_1_PI", + "M_2_PI", + "M_2_SQRTPI", + "M_SQRT2", + "M_SQRT1_2", + }; + + static const unordered_set<string> illegal_func_names = { + "main", + "saturate", + "assert", + "VARIABLE_TRACEPOINT", + "STATIC_DATA_TRACEPOINT", + "STATIC_DATA_TRACEPOINT_V", + "METAL_ALIGN", + "METAL_ASM", + "METAL_CONST", + "METAL_DEPRECATED", + "METAL_ENABLE_IF", + "METAL_FUNC", + "METAL_INTERNAL", + "METAL_NON_NULL_RETURN", + "METAL_NORETURN", + "METAL_NOTHROW", + "METAL_PURE", + "METAL_UNAVAILABLE", + "METAL_IMPLICIT", + "METAL_EXPLICIT", + "METAL_CONST_ARG", + "METAL_ARG_UNIFORM", + "METAL_ZERO_ARG", + "METAL_VALID_LOD_ARG", + "METAL_VALID_LEVEL_ARG", + "METAL_VALID_STORE_ORDER", + "METAL_VALID_LOAD_ORDER", + "METAL_VALID_COMPARE_EXCHANGE_FAILURE_ORDER", + "METAL_COMPATIBLE_COMPARE_EXCHANGE_ORDERS", + "METAL_VALID_RENDER_TARGET", + "is_function_constant_defined", + "CHAR_BIT", + "SCHAR_MAX", + "SCHAR_MIN", + "UCHAR_MAX", + "CHAR_MAX", + "CHAR_MIN", + "USHRT_MAX", + "SHRT_MAX", + "SHRT_MIN", + "UINT_MAX", + "INT_MAX", + "INT_MIN", + "FLT_DIG", + "FLT_MANT_DIG", + "FLT_MAX_10_EXP", + "FLT_MAX_EXP", + "FLT_MIN_10_EXP", + "FLT_MIN_EXP", + "FLT_RADIX", + "FLT_MAX", + "FLT_MIN", + "FLT_EPSILON", + "FP_ILOGB0", + "FP_ILOGBNAN", + "MAXFLOAT", + "HUGE_VALF", + "INFINITY", + "NAN", + "M_E_F", + "M_LOG2E_F", + "M_LOG10E_F", + "M_LN2_F", + "M_LN10_F", + "M_PI_F", + "M_PI_2_F", + "M_PI_4_F", + "M_1_PI_F", + "M_2_PI_F", + "M_2_SQRTPI_F", + "M_SQRT2_F", + "M_SQRT1_2_F", + "HALF_DIG", + "HALF_MANT_DIG", + "HALF_MAX_10_EXP", + "HALF_MAX_EXP", + "HALF_MIN_10_EXP", + "HALF_MIN_EXP", + "HALF_RADIX", + "HALF_MAX", + "HALF_MIN", + "HALF_EPSILON", + "MAXHALF", + "HUGE_VALH", + "M_E_H", + "M_LOG2E_H", + "M_LOG10E_H", + "M_LN2_H", + "M_LN10_H", + "M_PI_H", + "M_PI_2_H", + "M_PI_4_H", + "M_1_PI_H", + "M_2_PI_H", + "M_2_SQRTPI_H", + "M_SQRT2_H", + "M_SQRT1_2_H", + "DBL_DIG", + "DBL_MANT_DIG", + "DBL_MAX_10_EXP", + "DBL_MAX_EXP", + "DBL_MIN_10_EXP", + "DBL_MIN_EXP", + "DBL_RADIX", + "DBL_MAX", + "DBL_MIN", + "DBL_EPSILON", + "HUGE_VAL", + "M_E", + "M_LOG2E", + "M_LOG10E", + "M_LN2", + "M_LN10", + "M_PI", + "M_PI_2", + "M_PI_4", + "M_1_PI", + "M_2_PI", + "M_2_SQRTPI", + "M_SQRT2", + "M_SQRT1_2", + }; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &) { + auto &dec = ir.meta[self].decoration; + if (keywords.find(dec.alias) != end(keywords)) + dec.alias += "0"; + }); + + ir.for_each_typed_id<SPIRFunction>([&](uint32_t self, SPIRFunction &) { + auto &dec = ir.meta[self].decoration; + if (illegal_func_names.find(dec.alias) != end(illegal_func_names)) + dec.alias += "0"; + }); + + ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &) { + for (auto &mbr_dec : ir.meta[self].members) + if (keywords.find(mbr_dec.alias) != end(keywords)) + mbr_dec.alias += "0"; + }); + + for (auto &entry : ir.entry_points) + { + // Change both the entry point name and the alias, to keep them synced. + string &ep_name = entry.second.name; + if (illegal_func_names.find(ep_name) != end(illegal_func_names)) + ep_name += "0"; + + // Always write this because entry point might have been renamed earlier. + ir.meta[entry.first].decoration.alias = ep_name; + } + + CompilerGLSL::replace_illegal_names(); +} + +string CompilerMSL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) +{ + auto *var = maybe_get<SPIRVariable>(base); + // If this is a buffer array, we have to dereference the buffer pointers. + // Otherwise, if this is a pointer expression, dereference it. + + bool declared_as_pointer = false; + + if (var) + { + bool is_buffer_variable = var->storage == StorageClassUniform || var->storage == StorageClassStorageBuffer; + declared_as_pointer = is_buffer_variable && is_array(get<SPIRType>(var->basetype)); + } + + if (declared_as_pointer || (!ptr_chain && should_dereference(base))) + return join("->", to_member_name(type, index)); + else + return join(".", to_member_name(type, index)); +} + +string CompilerMSL::to_qualifiers_glsl(uint32_t id) +{ + string quals; + + auto &type = expression_type(id); + if (type.storage == StorageClassWorkgroup) + quals += "threadgroup "; + + return quals; +} + +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + string type_name; + + // Pointer? + if (type.pointer) + { + type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id)); + switch (type.basetype) + { + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + // These are handles. + break; + default: + // Anything else can be a raw pointer. + type_name += "*"; + break; + } + return type_name; + } + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type, id); + + case SPIRType::Sampler: + return sampler_type(type); + + case SPIRType::Void: + return "void"; + + case SPIRType::AtomicCounter: + return "atomic_uint"; + + case SPIRType::ControlPointArray: + return join("patch_control_point<", type_to_glsl(get<SPIRType>(type.parent_type), id), ">"); + + // Scalars + case SPIRType::Boolean: + type_name = "bool"; + break; + case SPIRType::Char: + case SPIRType::SByte: + type_name = "char"; + break; + case SPIRType::UByte: + type_name = "uchar"; + break; + case SPIRType::Short: + type_name = "short"; + break; + case SPIRType::UShort: + type_name = "ushort"; + break; + case SPIRType::Int: + type_name = "int"; + break; + case SPIRType::UInt: + type_name = "uint"; + break; + case SPIRType::Int64: + type_name = "long"; // Currently unsupported + break; + case SPIRType::UInt64: + type_name = "size_t"; + break; + case SPIRType::Half: + type_name = "half"; + break; + case SPIRType::Float: + type_name = "float"; + break; + case SPIRType::Double: + type_name = "double"; // Currently unsupported + break; + + default: + return "unknown_type"; + } + + // Matrix? + if (type.columns > 1) + type_name += to_string(type.columns) + "x"; + + // Vector or Matrix? + if (type.vecsize > 1) + type_name += to_string(type.vecsize); + + return type_name; +} + +std::string CompilerMSL::sampler_type(const SPIRType &type) +{ + if (!type.array.empty()) + { + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of samplers."); + + if (type.array.size() > 1) + SPIRV_CROSS_THROW("Arrays of arrays of samplers are not supported in MSL."); + + // Arrays of samplers in MSL must be declared with a special array<T, N> syntax ala C++11 std::array. + uint32_t array_size = to_array_size_literal(type); + if (array_size == 0) + SPIRV_CROSS_THROW("Unsized array of samplers is not supported in MSL."); + + auto &parent = get<SPIRType>(get_pointee_type(type).parent_type); + return join("array<", sampler_type(parent), ", ", array_size, ">"); + } + else + return "sampler"; +} + +// Returns an MSL string describing the SPIR-V image type +string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) +{ + auto *var = maybe_get<SPIRVariable>(id); + if (var && var->basevariable) + { + // For comparison images, check against the base variable, + // and not the fake ID which might have been generated for this variable. + id = var->basevariable; + } + + if (!type.array.empty()) + { + uint32_t major = 2, minor = 0; + if (msl_options.is_ios()) + { + major = 1; + minor = 2; + } + if (!msl_options.supports_msl_version(major, minor)) + { + if (msl_options.is_ios()) + SPIRV_CROSS_THROW("MSL 1.2 or greater is required for arrays of textures."); + else + SPIRV_CROSS_THROW("MSL 2.0 or greater is required for arrays of textures."); + } + + if (type.array.size() > 1) + SPIRV_CROSS_THROW("Arrays of arrays of textures are not supported in MSL."); + + // Arrays of images in MSL must be declared with a special array<T, N> syntax ala C++11 std::array. + uint32_t array_size = to_array_size_literal(type); + if (array_size == 0) + SPIRV_CROSS_THROW("Unsized array of images is not supported in MSL."); + + auto &parent = get<SPIRType>(get_pointee_type(type).parent_type); + return join("array<", image_type_glsl(parent, id), ", ", array_size, ">"); + } + + string img_type_name; + + // Bypass pointers because we need the real image struct + auto &img_type = get<SPIRType>(type.self).image; + if (image_is_comparison(type, id)) + { + switch (img_type.dim) + { + case Dim1D: + img_type_name += "depth1d_unsupported_by_metal"; + break; + case Dim2D: + if (img_type.ms && img_type.arrayed) + { + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1."); + img_type_name += "depth2d_ms_array"; + } + else if (img_type.ms) + img_type_name += "depth2d_ms"; + else if (img_type.arrayed) + img_type_name += "depth2d_array"; + else + img_type_name += "depth2d"; + break; + case Dim3D: + img_type_name += "depth3d_unsupported_by_metal"; + break; + case DimCube: + img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube"); + break; + default: + img_type_name += "unknown_depth_texture_type"; + break; + } + } + else + { + switch (img_type.dim) + { + case Dim1D: + img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d"); + break; + case DimBuffer: + case Dim2D: + case DimSubpassData: + if (img_type.ms && img_type.arrayed) + { + if (!msl_options.supports_msl_version(2, 1)) + SPIRV_CROSS_THROW("Multisampled array textures are supported from 2.1."); + img_type_name += "texture2d_ms_array"; + } + else if (img_type.ms) + img_type_name += "texture2d_ms"; + else if (img_type.arrayed) + img_type_name += "texture2d_array"; + else + img_type_name += "texture2d"; + break; + case Dim3D: + img_type_name += "texture3d"; + break; + case DimCube: + img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube"); + break; + default: + img_type_name += "unknown_texture_type"; + break; + } + } + + // Append the pixel type + img_type_name += "<"; + img_type_name += type_to_glsl(get<SPIRType>(img_type.type)); + + // For unsampled images, append the sample/read/write access qualifier. + // For kernel images, the access qualifier my be supplied directly by SPIR-V. + // Otherwise it may be set based on whether the image is read from or written to within the shader. + if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) + { + switch (img_type.access) + { + case AccessQualifierReadOnly: + img_type_name += ", access::read"; + break; + + case AccessQualifierWriteOnly: + img_type_name += ", access::write"; + break; + + case AccessQualifierReadWrite: + img_type_name += ", access::read_write"; + break; + + default: + { + auto *p_var = maybe_get_backing_variable(id); + if (p_var && p_var->basevariable) + p_var = maybe_get<SPIRVariable>(p_var->basevariable); + if (p_var && !has_decoration(p_var->self, DecorationNonWritable)) + { + img_type_name += ", access::"; + + if (!has_decoration(p_var->self, DecorationNonReadable)) + img_type_name += "read_"; + + img_type_name += "write"; + } + break; + } + } + } + + img_type_name += ">"; + + return img_type_name; +} + +string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + if (out_type.basetype == in_type.basetype) + return ""; + + assert(out_type.basetype != SPIRType::Boolean); + assert(in_type.basetype != SPIRType::Boolean); + + bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); + bool same_size_cast = out_type.width == in_type.width; + + if (integral_cast && same_size_cast) + { + // Trivial bitcast case, casts between integers. + return type_to_glsl(out_type); + } + else + { + // Fall back to the catch-all bitcast in MSL. + return "as_type<" + type_to_glsl(out_type) + ">"; + } +} + +// Returns an MSL string identifying the name of a SPIR-V builtin. +// Output builtins are qualified with the name of the stage out structure. +string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) +{ + switch (builtin) + { + + // Override GLSL compiler strictness + case BuiltInVertexId: + return "gl_VertexID"; + case BuiltInInstanceId: + return "gl_InstanceID"; + case BuiltInVertexIndex: + return "gl_VertexIndex"; + case BuiltInInstanceIndex: + return "gl_InstanceIndex"; + case BuiltInBaseVertex: + return "gl_BaseVertex"; + case BuiltInBaseInstance: + return "gl_BaseInstance"; + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + // When used in the entry function, output builtins are qualified with output struct name. + // Test storage class as NOT Input, as output builtins might be part of generic type. + // Also don't do this for tessellation control shaders. + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + /* fallthrough */ + case BuiltInPosition: + case BuiltInPointSize: + case BuiltInClipDistance: + case BuiltInCullDistance: + case BuiltInLayer: + case BuiltInFragDepth: + case BuiltInSampleMask: + if (get_execution_model() == ExecutionModelTessellationControl) + break; + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return stage_out_var_name + "." + CompilerGLSL::builtin_to_glsl(builtin, storage); + + break; + + case BuiltInTessLevelOuter: + if (get_execution_model() == ExecutionModelTessellationEvaluation) + { + if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && + current_function && (current_function->self == ir.default_entry_point)) + return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); + else + break; + } + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "].edgeTessellationFactor"); + break; + + case BuiltInTessLevelInner: + if (get_execution_model() == ExecutionModelTessellationEvaluation) + { + if (storage != StorageClassOutput && !get_entry_point().flags.get(ExecutionModeTriangles) && + current_function && (current_function->self == ir.default_entry_point)) + return join(patch_stage_in_var_name, ".", CompilerGLSL::builtin_to_glsl(builtin, storage)); + else + break; + } + if (storage != StorageClassInput && current_function && (current_function->self == ir.default_entry_point)) + return join(tess_factor_buffer_var_name, "[", to_expression(builtin_primitive_id_id), + "].insideTessellationFactor"); + break; + + default: + break; + } + + return CompilerGLSL::builtin_to_glsl(builtin, storage); +} + +// Returns an MSL string attribute qualifer for a SPIR-V builtin +string CompilerMSL::builtin_qualifier(BuiltIn builtin) +{ + auto &execution = get_entry_point(); + + switch (builtin) + { + // Vertex function in + case BuiltInVertexId: + return "vertex_id"; + case BuiltInVertexIndex: + return "vertex_id"; + case BuiltInBaseVertex: + return "base_vertex"; + case BuiltInInstanceId: + return "instance_id"; + case BuiltInInstanceIndex: + return "instance_id"; + case BuiltInBaseInstance: + return "base_instance"; + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + // Vertex function out + case BuiltInClipDistance: + return "clip_distance"; + case BuiltInPointSize: + return "point_size"; + case BuiltInPosition: + return "position"; + case BuiltInLayer: + return "render_target_array_index"; + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + return "viewport_array_index"; + + // Tess. control function in + case BuiltInInvocationId: + return "thread_index_in_threadgroup"; + case BuiltInPatchVertices: + // Shouldn't be reached. + SPIRV_CROSS_THROW("PatchVertices is derived from the auxiliary buffer in MSL."); + case BuiltInPrimitiveId: + switch (execution.model) + { + case ExecutionModelTessellationControl: + return "threadgroup_position_in_grid"; + case ExecutionModelTessellationEvaluation: + return "patch_id"; + default: + SPIRV_CROSS_THROW("PrimitiveId is not supported in this execution model."); + } + + // Tess. control function out + case BuiltInTessLevelOuter: + case BuiltInTessLevelInner: + // Shouldn't be reached. + SPIRV_CROSS_THROW("Tessellation levels are handled specially in MSL."); + + // Tess. evaluation function in + case BuiltInTessCoord: + return "position_in_patch"; + + // Fragment function in + case BuiltInFrontFacing: + return "front_facing"; + case BuiltInPointCoord: + return "point_coord"; + case BuiltInFragCoord: + return "position"; + case BuiltInSampleId: + return "sample_id"; + case BuiltInSampleMask: + return "sample_mask"; + case BuiltInSamplePosition: + // Shouldn't be reached. + SPIRV_CROSS_THROW("Sample position is retrieved by a function in MSL."); + + // Fragment function out + case BuiltInFragDepth: + if (execution.flags.get(ExecutionModeDepthGreater)) + return "depth(greater)"; + else if (execution.flags.get(ExecutionModeDepthLess)) + return "depth(less)"; + else + return "depth(any)"; + + // Compute function in + case BuiltInGlobalInvocationId: + return "thread_position_in_grid"; + + case BuiltInWorkgroupId: + return "threadgroup_position_in_grid"; + + case BuiltInNumWorkgroups: + return "threadgroups_per_grid"; + + case BuiltInLocalInvocationId: + return "thread_position_in_threadgroup"; + + case BuiltInLocalInvocationIndex: + return "thread_index_in_threadgroup"; + + default: + return "unsupported-built-in"; + } +} + +// Returns an MSL string type declaration for a SPIR-V builtin +string CompilerMSL::builtin_type_decl(BuiltIn builtin) +{ + const SPIREntryPoint &execution = get_entry_point(); + switch (builtin) + { + // Vertex function in + case BuiltInVertexId: + return "uint"; + case BuiltInVertexIndex: + return "uint"; + case BuiltInBaseVertex: + return "uint"; + case BuiltInInstanceId: + return "uint"; + case BuiltInInstanceIndex: + return "uint"; + case BuiltInBaseInstance: + return "uint"; + case BuiltInDrawIndex: + SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); + + // Vertex function out + case BuiltInClipDistance: + return "float"; + case BuiltInPointSize: + return "float"; + case BuiltInPosition: + return "float4"; + case BuiltInLayer: + return "uint"; + case BuiltInViewportIndex: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("ViewportIndex requires Metal 2.0."); + return "uint"; + + // Tess. control function in + case BuiltInInvocationId: + return "uint"; + case BuiltInPatchVertices: + return "uint"; + case BuiltInPrimitiveId: + return "uint"; + + // Tess. control function out + case BuiltInTessLevelInner: + if (execution.model == ExecutionModelTessellationEvaluation) + return !execution.flags.get(ExecutionModeTriangles) ? "float2" : "float"; + return "half"; + case BuiltInTessLevelOuter: + if (execution.model == ExecutionModelTessellationEvaluation) + return !execution.flags.get(ExecutionModeTriangles) ? "float4" : "float"; + return "half"; + + // Tess. evaluation function in + case BuiltInTessCoord: + return execution.flags.get(ExecutionModeTriangles) ? "float3" : "float2"; + + // Fragment function in + case BuiltInFrontFacing: + return "bool"; + case BuiltInPointCoord: + return "float2"; + case BuiltInFragCoord: + return "float4"; + case BuiltInSampleId: + return "uint"; + case BuiltInSampleMask: + return "uint"; + case BuiltInSamplePosition: + return "float2"; + + // Fragment function out + case BuiltInFragDepth: + return "float"; + + // Compute function in + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInNumWorkgroups: + case BuiltInWorkgroupId: + return "uint3"; + case BuiltInLocalInvocationIndex: + return "uint"; + + case BuiltInHelperInvocation: + return "bool"; + + default: + return "unsupported-built-in-type"; + } +} + +// Returns the declaration of a built-in argument to a function +string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma) +{ + string bi_arg; + if (prefix_comma) + bi_arg += ", "; + + bi_arg += builtin_type_decl(builtin); + bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput); + bi_arg += " [[" + builtin_qualifier(builtin) + "]]"; + + return bi_arg; +} + +// Returns the byte size of a struct member. +size_t CompilerMSL::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const +{ + auto &type = get<SPIRType>(struct_type.member_types[index]); + + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying size of opaque object."); + + default: + { + // For arrays, we can use ArrayStride to get an easy check. + // Runtime arrays will have zero size so force to min of one. + if (!type.array.empty()) + { + uint32_t array_size = to_array_size_literal(type); + return type_struct_member_array_stride(struct_type, index) * max(array_size, 1u); + } + + if (type.basetype == SPIRType::Struct) + { + // The size of a struct in Metal is aligned up to its natural alignment. + auto size = get_declared_struct_size(type); + auto alignment = get_declared_struct_member_alignment(struct_type, index); + return (size + alignment - 1) & ~(alignment - 1); + } + + uint32_t component_size = type.width / 8; + uint32_t vecsize = type.vecsize; + uint32_t columns = type.columns; + + // An unpacked 3-element vector or matrix column is the same memory size as a 4-element. + if (vecsize == 3 && !has_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPacked)) + vecsize = 4; + + return component_size * vecsize * columns; + } + } +} + +// Returns the byte alignment of a struct member. +size_t CompilerMSL::get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const +{ + auto &type = get<SPIRType>(struct_type.member_types[index]); + + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying alignment of opaque object."); + + case SPIRType::Struct: + { + // In MSL, a struct's alignment is equal to the maximum alignment of any of its members. + uint32_t alignment = 1; + for (uint32_t i = 0; i < type.member_types.size(); i++) + alignment = max(alignment, uint32_t(get_declared_struct_member_alignment(type, i))); + return alignment; + } + + default: + { + // Alignment of packed type is the same as the underlying component or column size. + // Alignment of unpacked type is the same as the vector size. + // Alignment of 3-elements vector is the same as 4-elements (including packed using column). + if (member_is_packed_type(struct_type, index)) + { + // This is getting pretty complicated. + // The special case of array of float/float2 needs to be handled here. + uint32_t packed_type_id = + get_extended_member_decoration(struct_type.self, index, SPIRVCrossDecorationPackedType); + const SPIRType *packed_type = packed_type_id != 0 ? &get<SPIRType>(packed_type_id) : nullptr; + if (packed_type && is_array(*packed_type) && !is_matrix(*packed_type) && + packed_type->basetype != SPIRType::Struct) + return (packed_type->width / 8) * 4; + else + return (type.width / 8) * (type.columns == 3 ? 4 : type.columns); + } + else + return (type.width / 8) * (type.vecsize == 3 ? 4 : type.vecsize); + } + } +} + +bool CompilerMSL::skip_argument(uint32_t) const +{ + return false; +} + +void CompilerMSL::analyze_sampled_image_usage() +{ + if (msl_options.swizzle_texture_samples) + { + SampledImageScanner scanner(*this); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), scanner); + } +} + +bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *args, uint32_t length) +{ + switch (opcode) + { + case OpLoad: + case OpImage: + case OpSampledImage: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + auto &type = compiler.get<SPIRType>(result_type); + if ((type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage) || type.image.sampled != 1) + return true; + + uint32_t id = args[1]; + compiler.set<SPIRExpression>(id, "", result_type, true); + break; + } + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + compiler.has_sampled_images = + compiler.has_sampled_images || compiler.is_sampled_image_type(compiler.expression_type(args[2])); + compiler.needs_aux_buffer_def = compiler.needs_aux_buffer_def || compiler.has_sampled_images; + break; + default: + break; + } + return true; +} + +bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // Since MSL exists in a single execution scope, function prototype declarations are not + // needed, and clutter the output. If secondary functions are output (either as a SPIR-V + // function implementation or as indicated by the presence of OpFunctionCall), then set + // suppress_missing_prototypes to suppress compiler warnings of missing function prototypes. + + // Mark if the input requires the implementation of an SPIR-V function that does not exist in Metal. + SPVFuncImpl spv_func = get_spv_func_impl(opcode, args); + if (spv_func != SPVFuncImplNone) + { + compiler.spv_function_implementations.insert(spv_func); + suppress_missing_prototypes = true; + } + + switch (opcode) + { + + case OpFunctionCall: + suppress_missing_prototypes = true; + break; + + case OpImageWrite: + uses_resource_write = true; + break; + + case OpStore: + check_resource_write(args[0]); + break; + + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + uses_atomics = true; + check_resource_write(args[2]); + break; + + case OpAtomicLoad: + uses_atomics = true; + break; + + default: + break; + } + + // If it has one, keep track of the instruction's result type, mapped by ID + uint32_t result_type, result_id; + if (compiler.instruction_to_result_type(result_type, result_id, opcode, args, length)) + result_types[result_id] = result_type; + + return true; +} + +// If the variable is a Uniform or StorageBuffer, mark that a resource has been written to. +void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id) +{ + auto *p_var = compiler.maybe_get_backing_variable(var_id); + StorageClass sc = p_var ? p_var->storage : StorageClassMax; + if (sc == StorageClassUniform || sc == StorageClassStorageBuffer) + uses_resource_write = true; +} + +// Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes. +CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args) +{ + switch (opcode) + { + case OpFMod: + return SPVFuncImplMod; + + case OpFunctionCall: + { + auto &return_type = compiler.get<SPIRType>(args[0]); + if (return_type.array.size() > 1) + { + if (return_type.array.size() > SPVFuncImplArrayCopyMultidimMax) + SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); + return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + return_type.array.size()); + } + else if (return_type.array.size() > 0) + return SPVFuncImplArrayCopy; + + break; + } + + case OpStore: + { + // Get the result type of the RHS. Since this is run as a pre-processing stage, + // we must extract the result type directly from the Instruction, rather than the ID. + uint32_t id_lhs = args[0]; + uint32_t id_rhs = args[1]; + + const SPIRType *type = nullptr; + if (compiler.ir.ids[id_rhs].get_type() != TypeNone) + { + // Could be a constant, or similar. + type = &compiler.expression_type(id_rhs); + } + else + { + // Or ... an expression. + uint32_t tid = result_types[id_rhs]; + if (tid) + type = &compiler.get<SPIRType>(tid); + } + + auto *var = compiler.maybe_get<SPIRVariable>(id_lhs); + + // Are we simply assigning to a statically assigned variable which takes a constant? + // Don't bother emitting this function. + bool static_expression_lhs = + var && var->storage == StorageClassFunction && var->statically_assigned && var->remapped_variable; + if (type && compiler.is_array(*type) && !static_expression_lhs) + { + if (type->array.size() > 1) + { + if (type->array.size() > SPVFuncImplArrayCopyMultidimMax) + SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); + return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type->array.size()); + } + else + return SPVFuncImplArrayCopy; + } + + break; + } + + case OpImageFetch: + case OpImageRead: + case OpImageWrite: + { + // Retrieve the image type, and if it's a Buffer, emit a texel coordinate function + uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]]; + if (tid && compiler.get<SPIRType>(tid).image.dim == DimBuffer) + return SPVFuncImplTexelBufferCoords; + + if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples) + return SPVFuncImplTextureSwizzle; + + break; + } + + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageGather: + case OpImageDrefGather: + if (compiler.msl_options.swizzle_texture_samples) + return SPVFuncImplTextureSwizzle; + break; + + case OpCompositeConstruct: + { + auto &type = compiler.get<SPIRType>(args[0]); + if (type.array.size() > 1) // We need to use copies to build the composite. + return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size() - 1); + break; + } + + case OpExtInst: + { + uint32_t extension_set = args[2]; + if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL) + { + GLSLstd450 op_450 = static_cast<GLSLstd450>(args[3]); + switch (op_450) + { + case GLSLstd450Radians: + return SPVFuncImplRadians; + case GLSLstd450Degrees: + return SPVFuncImplDegrees; + case GLSLstd450FindILsb: + return SPVFuncImplFindILsb; + case GLSLstd450FindSMsb: + return SPVFuncImplFindSMsb; + case GLSLstd450FindUMsb: + return SPVFuncImplFindUMsb; + case GLSLstd450SSign: + return SPVFuncImplSSign; + case GLSLstd450MatrixInverse: + { + auto &mat_type = compiler.get<SPIRType>(args[0]); + switch (mat_type.columns) + { + case 2: + return SPVFuncImplInverse2x2; + case 3: + return SPVFuncImplInverse3x3; + case 4: + return SPVFuncImplInverse4x4; + default: + break; + } + break; + } + default: + break; + } + } + break; + } + + default: + break; + } + return SPVFuncImplNone; +} + +// Sort both type and meta member content based on builtin status (put builtins at end), +// then by the required sorting aspect. +void CompilerMSL::MemberSorter::sort() +{ + // Create a temporary array of consecutive member indices and sort it based on how + // the members should be reordered, based on builtin and sorting aspect meta info. + size_t mbr_cnt = type.member_types.size(); + vector<uint32_t> mbr_idxs(mbr_cnt); + iota(mbr_idxs.begin(), mbr_idxs.end(), 0); // Fill with consecutive indices + std::sort(mbr_idxs.begin(), mbr_idxs.end(), *this); // Sort member indices based on sorting aspect + + // Move type and meta member info to the order defined by the sorted member indices. + // This is done by creating temporary copies of both member types and meta, and then + // copying back to the original content at the sorted indices. + auto mbr_types_cpy = type.member_types; + auto mbr_meta_cpy = meta.members; + for (uint32_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + type.member_types[mbr_idx] = mbr_types_cpy[mbr_idxs[mbr_idx]]; + meta.members[mbr_idx] = mbr_meta_cpy[mbr_idxs[mbr_idx]]; + } +} + +// Sort first by builtin status (put builtins at end), then by the sorting aspect. +bool CompilerMSL::MemberSorter::operator()(uint32_t mbr_idx1, uint32_t mbr_idx2) +{ + auto &mbr_meta1 = meta.members[mbr_idx1]; + auto &mbr_meta2 = meta.members[mbr_idx2]; + if (mbr_meta1.builtin != mbr_meta2.builtin) + return mbr_meta2.builtin; + else + switch (sort_aspect) + { + case Location: + return mbr_meta1.location < mbr_meta2.location; + case LocationReverse: + return mbr_meta1.location > mbr_meta2.location; + case Offset: + return mbr_meta1.offset < mbr_meta2.offset; + case OffsetThenLocationReverse: + return (mbr_meta1.offset < mbr_meta2.offset) || + ((mbr_meta1.offset == mbr_meta2.offset) && (mbr_meta1.location > mbr_meta2.location)); + case Alphabetical: + return mbr_meta1.alias < mbr_meta2.alias; + default: + return false; + } +} + +CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) + : type(t) + , meta(m) + , sort_aspect(sa) +{ + // Ensure enough meta info is available + meta.members.resize(max(type.member_types.size(), meta.members.size())); +} + +void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler) +{ + auto &type = get<SPIRType>(get<SPIRVariable>(id).basetype); + if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) + SPIRV_CROSS_THROW("Can only remap SampledImage and Sampler type."); + if (!type.array.empty()) + SPIRV_CROSS_THROW("Can not remap array of samplers."); + constexpr_samplers[id] = sampler; +} + +void CompilerMSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) +{ + auto *var = maybe_get_backing_variable(source_id); + if (var) + source_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(source_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + switch (builtin) + { + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + case BuiltInLocalInvocationIndex: + case BuiltInWorkgroupSize: + case BuiltInNumWorkgroups: + case BuiltInLayer: + case BuiltInViewportIndex: + expected_type = SPIRType::UInt; + break; + + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + if (get_execution_model() == ExecutionModelTessellationControl) + expected_type = SPIRType::Half; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + expr = bitcast_expression(expr_type, expected_type, expr); + + if (builtin == BuiltInTessCoord && get_entry_point().flags.get(ExecutionModeQuads) && expr_type.vecsize == 3) + { + // In SPIR-V, this is always a vec3, even for quads. In Metal, though, it's a float2 for quads. + // The code is expecting a float3, so we need to widen this. + expr = join("float3(", expr, ", 0)"); + } +} + +void CompilerMSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) +{ + auto *var = maybe_get_backing_variable(target_id); + if (var) + target_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(target_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + switch (builtin) + { + case BuiltInLayer: + case BuiltInViewportIndex: + expected_type = SPIRType::UInt; + break; + + case BuiltInTessLevelInner: + case BuiltInTessLevelOuter: + expected_type = SPIRType::Half; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + { + if (expected_type == SPIRType::Half && expr_type.basetype == SPIRType::Float) + { + // These are of different widths, so we cannot do a straight bitcast. + expr = join("half(", expr, ")"); + } + else + { + auto type = expr_type; + type.basetype = expected_type; + expr = bitcast_expression(type, expr_type.basetype, expr); + } + } +} + +std::string CompilerMSL::to_initializer_expression(const SPIRVariable &var) +{ + // We risk getting an array initializer here with MSL. If we have an array. + // FIXME: We cannot handle non-constant arrays being initialized. + // We will need to inject spvArrayCopy here somehow ... + auto &type = get<SPIRType>(var.basetype); + if (ir.ids[var.initializer].get_type() == TypeConstant && + (!type.array.empty() || type.basetype == SPIRType::Struct)) + return constant_expression(get<SPIRConstant>(var.initializer)); + else + return CompilerGLSL::to_initializer_expression(var); +} + +bool CompilerMSL::descriptor_set_is_argument_buffer(uint32_t desc_set) const +{ + if (!msl_options.argument_buffers) + return false; + if (desc_set >= kMaxArgumentBuffers) + return false; + + return (argument_buffer_discrete_mask & (1u << desc_set)) == 0; +} + +void CompilerMSL::analyze_argument_buffers() +{ + // Gather all used resources and sort them out into argument buffers. + // Each argument buffer corresponds to a descriptor set in SPIR-V. + // The [[id(N)]] values used correspond to the resource mapping we have for MSL. + // Otherwise, the binding number is used, but this is generally not safe some types like + // combined image samplers and arrays of resources. Metal needs different indices here, + // while SPIR-V can have one descriptor set binding. To use argument buffers in practice, + // you will need to use the remapping from the API. + for (auto &id : argument_buffer_ids) + id = 0; + + // Output resources, sorted by resource index & type. + struct Resource + { + SPIRVariable *var; + string name; + SPIRType::BaseType basetype; + uint32_t index; + }; + vector<Resource> resources_in_set[kMaxArgumentBuffers]; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t self, SPIRVariable &var) { + if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassStorageBuffer) && + !is_hidden_variable(var)) + { + uint32_t desc_set = get_decoration(self, DecorationDescriptorSet); + // Ignore if it's part of a push descriptor set. + if (!descriptor_set_is_argument_buffer(desc_set)) + return; + + uint32_t var_id = var.self; + auto &type = get_variable_data_type(var); + + if (desc_set >= kMaxArgumentBuffers) + SPIRV_CROSS_THROW("Descriptor set index is out of range."); + + if (type.basetype == SPIRType::SampledImage) + { + add_resource_name(var_id); + + uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image); + uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); + + // Avoid trivial conflicts where we didn't remap. + // This will let us at least compile test cases without having to instrument remaps. + if (sampler_resource_index == image_resource_index) + sampler_resource_index += type.array.empty() ? 1 : to_array_size_literal(type); + + resources_in_set[desc_set].push_back({ &var, to_name(var_id), SPIRType::Image, image_resource_index }); + + if (type.image.dim != DimBuffer && constexpr_samplers.count(var_id) == 0) + { + resources_in_set[desc_set].push_back( + { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index }); + } + } + else if (constexpr_samplers.count(var_id) == 0) + { + // constexpr samplers are not declared as resources. + add_resource_name(var_id); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + } + } + }); + + for (uint32_t desc_set = 0; desc_set < kMaxArgumentBuffers; desc_set++) + { + auto &resources = resources_in_set[desc_set]; + if (resources.empty()) + continue; + + assert(descriptor_set_is_argument_buffer(desc_set)); + + uint32_t next_id = ir.increase_bound_by(3); + uint32_t type_id = next_id + 1; + uint32_t ptr_type_id = next_id + 2; + argument_buffer_ids[desc_set] = next_id; + + auto &buffer_type = set<SPIRType>(type_id); + buffer_type.storage = StorageClassUniform; + buffer_type.basetype = SPIRType::Struct; + set_name(type_id, join("spvDescriptorSetBuffer", desc_set)); + + auto &ptr_type = set<SPIRType>(ptr_type_id); + ptr_type = buffer_type; + ptr_type.pointer = true; + ptr_type.pointer_depth = 1; + ptr_type.parent_type = type_id; + + uint32_t buffer_variable_id = next_id; + set<SPIRVariable>(buffer_variable_id, ptr_type_id, StorageClassUniform); + set_name(buffer_variable_id, join("spvDescriptorSet", desc_set)); + + // Ids must be emitted in ID order. + sort(begin(resources), end(resources), [&](const Resource &lhs, const Resource &rhs) -> bool { + return tie(lhs.index, lhs.basetype) < tie(rhs.index, rhs.basetype); + }); + + uint32_t member_index = 0; + for (auto &resource : resources) + { + auto &var = *resource.var; + auto &type = get_variable_data_type(var); + string mbr_name = ensure_valid_name(resource.name, "m"); + set_member_name(buffer_type.self, member_index, mbr_name); + + if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler) + { + // Have to synthesize a sampler type here. + + bool type_is_array = !type.array.empty(); + uint32_t sampler_type_id = ir.increase_bound_by(type_is_array ? 2 : 1); + auto &new_sampler_type = set<SPIRType>(sampler_type_id); + new_sampler_type.basetype = SPIRType::Sampler; + new_sampler_type.storage = StorageClassUniformConstant; + + if (type_is_array) + { + uint32_t sampler_type_array_id = sampler_type_id + 1; + auto &sampler_type_array = set<SPIRType>(sampler_type_array_id); + sampler_type_array = new_sampler_type; + sampler_type_array.array = type.array; + sampler_type_array.array_size_literal = type.array_size_literal; + sampler_type_array.parent_type = sampler_type_id; + buffer_type.member_types.push_back(sampler_type_array_id); + } + else + buffer_type.member_types.push_back(sampler_type_id); + } + else + { + if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler || + resource.basetype == SPIRType::SampledImage) + { + // Drop pointer information when we emit the resources into a struct. + buffer_type.member_types.push_back(get_variable_data_type_id(var)); + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + else + { + // Resources will be declared as pointers not references, so automatically dereference as appropriate. + buffer_type.member_types.push_back(var.basetype); + if (type.array.empty()) + set_qualified_name(var.self, join("(*", to_name(buffer_variable_id), ".", mbr_name, ")")); + else + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + } + + set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationArgumentBufferID, + resource.index); + set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationInterfaceOrigID, + var.self); + member_index++; + } + } +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp new file mode 100644 index 0000000..38610e1 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp @@ -0,0 +1,600 @@ +/* + * Copyright 2016-2019 The Brenwill Workshop Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_MSL_HPP +#define SPIRV_CROSS_MSL_HPP + +#include "spirv_glsl.hpp" +#include <map> +#include <set> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +namespace spirv_cross +{ + +// Indicates the format of the vertex attribute. Currently limited to specifying +// if the attribute is an 8-bit unsigned integer, 16-bit unsigned integer, or +// some other format. +enum MSLVertexFormat +{ + MSL_VERTEX_FORMAT_OTHER = 0, + MSL_VERTEX_FORMAT_UINT8 = 1, + MSL_VERTEX_FORMAT_UINT16 = 2, + MSL_VERTEX_FORMAT_INT_MAX = 0x7fffffff +}; + +// Defines MSL characteristics of a vertex attribute at a particular location. +// After compilation, it is possible to query whether or not this location was used. +struct MSLVertexAttr +{ + uint32_t location = 0; + uint32_t msl_buffer = 0; + uint32_t msl_offset = 0; + uint32_t msl_stride = 0; + bool per_instance = false; + MSLVertexFormat format = MSL_VERTEX_FORMAT_OTHER; + spv::BuiltIn builtin = spv::BuiltInMax; +}; + +// Matches the binding index of a MSL resource for a binding within a descriptor set. +// Taken together, the stage, desc_set and binding combine to form a reference to a resource +// descriptor used in a particular shading stage. +// If using MSL 2.0 argument buffers, and the descriptor set is not marked as a discrete descriptor set, +// the binding reference we remap to will become an [[id(N)]] attribute within +// the "descriptor set" argument buffer structure. +// For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a +// [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. +struct MSLResourceBinding +{ + spv::ExecutionModel stage = spv::ExecutionModelMax; + uint32_t desc_set = 0; + uint32_t binding = 0; + uint32_t msl_buffer = 0; + uint32_t msl_texture = 0; + uint32_t msl_sampler = 0; +}; + +enum MSLSamplerCoord +{ + MSL_SAMPLER_COORD_NORMALIZED = 0, + MSL_SAMPLER_COORD_PIXEL = 1, + MSL_SAMPLER_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerFilter +{ + MSL_SAMPLER_FILTER_NEAREST = 0, + MSL_SAMPLER_FILTER_LINEAR = 1, + MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerMipFilter +{ + MSL_SAMPLER_MIP_FILTER_NONE = 0, + MSL_SAMPLER_MIP_FILTER_NEAREST = 1, + MSL_SAMPLER_MIP_FILTER_LINEAR = 2, + MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerAddress +{ + MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, + MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, + MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, + MSL_SAMPLER_ADDRESS_REPEAT = 3, + MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, + MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerCompareFunc +{ + MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, + MSL_SAMPLER_COMPARE_FUNC_LESS = 1, + MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, + MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, + MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, + MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, + MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, + MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, + MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerBorderColor +{ + MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, + MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, + MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, + MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff +}; + +struct MSLConstexprSampler +{ + MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; + MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE; + MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; + MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; + MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; + MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER; + MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK; + float lod_clamp_min = 0.0f; + float lod_clamp_max = 1000.0f; + int max_anisotropy = 1; + + bool compare_enable = false; + bool lod_clamp_enable = false; + bool anisotropy_enable = false; +}; + +// Tracks the type ID and member index of a struct member +using MSLStructMemberKey = uint64_t; + +// Special constant used in a MSLResourceBinding desc_set +// element to indicate the bindings for the push constants. +static const uint32_t kPushConstDescSet = ~(0u); + +// Special constant used in a MSLResourceBinding binding +// element to indicate the bindings for the push constants. +static const uint32_t kPushConstBinding = 0; + +static const uint32_t kMaxArgumentBuffers = 8; + +// The current version of the aux buffer structure. It must be incremented any time a +// new field is added to the aux buffer. +#define SPIRV_CROSS_MSL_AUX_BUFFER_STRUCT_VERSION 1 + +// Decompiles SPIR-V to Metal Shading Language +class CompilerMSL : public CompilerGLSL +{ +public: + // Options for compiling to Metal Shading Language + struct Options + { + typedef enum + { + iOS = 0, + macOS = 1 + } Platform; + + Platform platform = macOS; + uint32_t msl_version = make_msl_version(1, 2); + uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers + uint32_t aux_buffer_index = 30; + uint32_t indirect_params_buffer_index = 29; + uint32_t shader_output_buffer_index = 28; + uint32_t shader_patch_output_buffer_index = 27; + uint32_t shader_tess_factor_buffer_index = 26; + uint32_t shader_input_wg_index = 0; + bool enable_point_size_builtin = true; + bool disable_rasterization = false; + bool capture_output_to_buffer = false; + bool swizzle_texture_samples = false; + bool tess_domain_origin_lower_left = false; + + // Enable use of MSL 2.0 indirect argument buffers. + // MSL 2.0 must also be enabled. + bool argument_buffers = false; + + // Fragment output in MSL must have at least as many components as the render pass. + // Add support to explicit pad out components. + bool pad_fragment_output_components = false; + + bool is_ios() + { + return platform == iOS; + } + + bool is_macos() + { + return platform == macOS; + } + + void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + { + msl_version = make_msl_version(major, minor, patch); + } + + bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + { + return msl_version >= make_msl_version(major, minor, patch); + } + + static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + { + return (major * 10000) + (minor * 100) + patch; + } + }; + + const Options &get_msl_options() const + { + return msl_options; + } + + void set_msl_options(const Options &opts) + { + msl_options = opts; + } + + // Provide feedback to calling API to allow runtime to disable pipeline + // rasterization if vertex shader requires rasterization to be disabled. + bool get_is_rasterization_disabled() const + { + return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex || + get_entry_point().model == spv::ExecutionModelTessellationControl || + get_entry_point().model == spv::ExecutionModelTessellationEvaluation); + } + + // Provide feedback to calling API to allow it to pass an auxiliary + // buffer if the shader needs it. + bool needs_aux_buffer() const + { + return used_aux_buffer; + } + + // Provide feedback to calling API to allow it to pass an output + // buffer if the shader needs it. + bool needs_output_buffer() const + { + return capture_output_to_buffer && stage_out_var_id != 0; + } + + // Provide feedback to calling API to allow it to pass a patch output + // buffer if the shader needs it. + bool needs_patch_output_buffer() const + { + return capture_output_to_buffer && patch_stage_out_var_id != 0; + } + + // Provide feedback to calling API to allow it to pass an input threadgroup + // buffer if the shader needs it. + bool needs_input_threadgroup_mem() const + { + return capture_output_to_buffer && stage_in_var_id != 0; + } + + explicit CompilerMSL(std::vector<uint32_t> spirv); + CompilerMSL(const uint32_t *ir, size_t word_count); + explicit CompilerMSL(const ParsedIR &ir); + explicit CompilerMSL(ParsedIR &&ir); + + // attr is a vertex attribute binding used to match + // vertex content locations to MSL attributes. If vertex attributes are provided, + // is_msl_vertex_attribute_used() will return true after calling ::compile() if + // the location was used by the MSL code. + void add_msl_vertex_attribute(const MSLVertexAttr &attr); + + // resource is a resource binding to indicate the MSL buffer, + // texture or sampler index to use for a particular SPIR-V description set + // and binding. If resource bindings are provided, + // is_msl_resource_binding_used() will return true after calling ::compile() if + // the set/binding combination was used by the MSL code. + void add_msl_resource_binding(const MSLResourceBinding &resource); + + // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. + // This corresponds to VK_KHR_push_descriptor in Vulkan. + void add_discrete_descriptor_set(uint32_t desc_set); + + // Query after compilation is done. This allows you to check if a location or set/binding combination was used by the shader. + bool is_msl_vertex_attribute_used(uint32_t location); + bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding); + + // Compiles the SPIR-V code into Metal Shading Language. + std::string compile() override; + + // Remap a sampler with ID to a constexpr sampler. + // Older iOS targets must use constexpr samplers in certain cases (PCF), + // so a static sampler must be used. + // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler. + // This can be used on both combined image/samplers (sampler2D) or standalone samplers. + // The remapped sampler must not be an array of samplers. + void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler); + + // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect + // to use for a particular location. The default is 4 if number of components is not overridden. + void set_fragment_output_components(uint32_t location, uint32_t components); + +protected: + // An enum of SPIR-V functions that are implemented in additional + // source code that is added to the shader if necessary. + enum SPVFuncImpl + { + SPVFuncImplNone, + SPVFuncImplMod, + SPVFuncImplRadians, + SPVFuncImplDegrees, + SPVFuncImplFindILsb, + SPVFuncImplFindSMsb, + SPVFuncImplFindUMsb, + SPVFuncImplSSign, + SPVFuncImplArrayCopyMultidimBase, + // Unfortunately, we cannot use recursive templates in the MSL compiler properly, + // so stamp out variants up to some arbitrary maximum. + SPVFuncImplArrayCopy = SPVFuncImplArrayCopyMultidimBase + 1, + SPVFuncImplArrayOfArrayCopy2Dim = SPVFuncImplArrayCopyMultidimBase + 2, + SPVFuncImplArrayOfArrayCopy3Dim = SPVFuncImplArrayCopyMultidimBase + 3, + SPVFuncImplArrayOfArrayCopy4Dim = SPVFuncImplArrayCopyMultidimBase + 4, + SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5, + SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6, + SPVFuncImplTexelBufferCoords, + SPVFuncImplInverse4x4, + SPVFuncImplInverse3x3, + SPVFuncImplInverse2x2, + SPVFuncImplRowMajor2x3, + SPVFuncImplRowMajor2x4, + SPVFuncImplRowMajor3x2, + SPVFuncImplRowMajor3x4, + SPVFuncImplRowMajor4x2, + SPVFuncImplRowMajor4x3, + SPVFuncImplTextureSwizzle, + SPVFuncImplArrayCopyMultidimMax = 6 + }; + + void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + void emit_instruction(const Instruction &instr) override; + void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count) override; + void emit_header() override; + void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; + void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; + void emit_fixup() override; + std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier = ""); + void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier = "", uint32_t base_offset = 0) override; + std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; + std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; + std::string sampler_type(const SPIRType &type); + std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; + size_t get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const override; + std::string to_func_call_arg(uint32_t id) override; + std::string to_name(uint32_t id, bool allow_alias = true) const override; + std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, + bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, + uint32_t lod) override; + std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, + uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, + uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, + uint32_t comp, uint32_t sample, bool *p_forward) override; + std::string to_initializer_expression(const SPIRVariable &var) override; + std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t packed_type_id) override; + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; + bool skip_argument(uint32_t id) const override; + std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override; + std::string to_qualifiers_glsl(uint32_t id) override; + void replace_illegal_names() override; + void declare_undefined_values() override; + void declare_constant_arrays(); + bool is_patch_block(const SPIRType &type); + bool is_non_native_row_major_matrix(uint32_t id) override; + bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; + std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, bool is_packed) override; + + void preprocess_op_codes(); + void localize_global_variables(); + void extract_global_variables_from_functions(); + void mark_packable_structs(); + void mark_as_packable(SPIRType &type); + + std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars; + void extract_global_variables_from_function(uint32_t func_id, std::set<uint32_t> &added_arg_ids, + std::unordered_set<uint32_t> &global_var_ids, + std::unordered_set<uint32_t> &processed_func_ids); + uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); + uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); + + void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, + SPIRVariable &var, bool strip_array); + void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, bool strip_array); + void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, bool strip_array); + void add_plain_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, uint32_t index, + bool strip_array); + void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, + SPIRType &ib_type, SPIRVariable &var, uint32_t index, + bool strip_array); + uint32_t get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array); + void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); + + void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); + + void mark_location_as_used_by_shader(uint32_t location, spv::StorageClass storage); + uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); + uint32_t ensure_correct_attribute_type(uint32_t type_id, uint32_t location); + + void emit_custom_functions(); + void emit_resources(); + void emit_specialization_constants_and_structs(); + void emit_interface_block(uint32_t ib_var_id); + bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); + void add_convert_row_major_matrix_function(uint32_t cols, uint32_t rows); + void fix_up_shader_inputs_outputs(); + + std::string func_type_decl(SPIRType &type); + std::string entry_point_args_classic(bool append_comma); + std::string entry_point_args_argument_buffer(bool append_comma); + std::string entry_point_arg_stage_in(); + void entry_point_args_builtin(std::string &args); + void entry_point_args_discrete_descriptors(std::string &args); + std::string to_qualified_member_name(const SPIRType &type, uint32_t index); + std::string ensure_valid_name(std::string name, std::string pfx); + std::string to_sampler_expression(uint32_t id); + std::string to_swizzle_expression(uint32_t id); + std::string builtin_qualifier(spv::BuiltIn builtin); + std::string builtin_type_decl(spv::BuiltIn builtin); + std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); + std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); + std::string argument_decl(const SPIRFunction::Parameter &arg); + std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); + uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype); + uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); + size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const; + std::string to_component_argument(uint32_t id); + void align_struct(SPIRType &ib_type); + bool is_member_packable(SPIRType &ib_type, uint32_t index); + MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index); + std::string get_argument_address_space(const SPIRVariable &argument); + std::string get_type_address_space(const SPIRType &type, uint32_t id); + SPIRType &get_stage_in_struct_type(); + SPIRType &get_stage_out_struct_type(); + SPIRType &get_patch_stage_in_struct_type(); + SPIRType &get_patch_stage_out_struct_type(); + std::string get_tess_factor_struct_name(); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1, + uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, + bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); + const char *get_memory_order(uint32_t spv_mem_sem); + void add_pragma_line(const std::string &line); + void add_typedef_line(const std::string &line); + void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); + void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override; + void build_implicit_builtins(); + void emit_entry_point_declarations() override; + uint32_t builtin_frag_coord_id = 0; + uint32_t builtin_sample_id_id = 0; + uint32_t builtin_vertex_idx_id = 0; + uint32_t builtin_base_vertex_id = 0; + uint32_t builtin_instance_idx_id = 0; + uint32_t builtin_base_instance_id = 0; + uint32_t builtin_invocation_id_id = 0; + uint32_t builtin_primitive_id_id = 0; + uint32_t aux_buffer_id = 0; + + void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; + void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; + void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; + + void analyze_sampled_image_usage(); + + bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); + bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); + + Options msl_options; + std::set<SPVFuncImpl> spv_function_implementations; + std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_location; + std::unordered_map<uint32_t, MSLVertexAttr> vtx_attrs_by_builtin; + std::unordered_set<uint32_t> vtx_attrs_in_use; + std::unordered_map<uint32_t, uint32_t> fragment_output_components; + std::unordered_map<MSLStructMemberKey, uint32_t> struct_member_padding; + std::set<std::string> pragma_lines; + std::set<std::string> typedef_lines; + std::vector<uint32_t> vars_needing_early_declaration; + + std::vector<std::pair<MSLResourceBinding, bool>> resource_bindings; + uint32_t next_metal_resource_index_buffer = 0; + uint32_t next_metal_resource_index_texture = 0; + uint32_t next_metal_resource_index_sampler = 0; + + uint32_t stage_in_var_id = 0; + uint32_t stage_out_var_id = 0; + uint32_t patch_stage_in_var_id = 0; + uint32_t patch_stage_out_var_id = 0; + uint32_t stage_in_ptr_var_id = 0; + uint32_t stage_out_ptr_var_id = 0; + bool has_sampled_images = false; + bool needs_vertex_idx_arg = false; + bool needs_instance_idx_arg = false; + bool is_rasterization_disabled = false; + bool capture_output_to_buffer = false; + bool needs_aux_buffer_def = false; + bool used_aux_buffer = false; + bool added_builtin_tess_level = false; + std::string qual_pos_var_name; + std::string stage_in_var_name = "in"; + std::string stage_out_var_name = "out"; + std::string patch_stage_in_var_name = "patchIn"; + std::string patch_stage_out_var_name = "patchOut"; + std::string sampler_name_suffix = "Smplr"; + std::string swizzle_name_suffix = "Swzl"; + std::string input_wg_var_name = "gl_in"; + std::string output_buffer_var_name = "spvOut"; + std::string patch_output_buffer_var_name = "spvPatchOut"; + std::string tess_factor_buffer_var_name = "spvTessLevel"; + spv::Op previous_instruction_opcode = spv::OpNop; + + std::unordered_map<uint32_t, MSLConstexprSampler> constexpr_samplers; + std::vector<uint32_t> buffer_arrays; + + uint32_t argument_buffer_ids[kMaxArgumentBuffers]; + uint32_t argument_buffer_discrete_mask = 0; + void analyze_argument_buffers(); + bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; + + uint32_t get_target_components_for_fragment_location(uint32_t location) const; + uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components); + + // OpcodeHandler that handles several MSL preprocessing operations. + struct OpCodePreprocessor : OpcodeHandler + { + OpCodePreprocessor(CompilerMSL &compiler_) + : compiler(compiler_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args); + void check_resource_write(uint32_t var_id); + + CompilerMSL &compiler; + std::unordered_map<uint32_t, uint32_t> result_types; + bool suppress_missing_prototypes = false; + bool uses_atomics = false; + bool uses_resource_write = false; + }; + + // OpcodeHandler that scans for uses of sampled images + struct SampledImageScanner : OpcodeHandler + { + SampledImageScanner(CompilerMSL &compiler_) + : compiler(compiler_) + { + } + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t) override; + + CompilerMSL &compiler; + }; + + // Sorts the members of a SPIRType and associated Meta info based on a settable sorting + // aspect, which defines which aspect of the struct members will be used to sort them. + // Regardless of the sorting aspect, built-in members always appear at the end of the struct. + struct MemberSorter + { + enum SortAspect + { + Location, + LocationReverse, + Offset, + OffsetThenLocationReverse, + Alphabetical + }; + + void sort(); + bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2); + MemberSorter(SPIRType &t, Meta &m, SortAspect sa); + + SPIRType &type; + Meta &meta; + SortAspect sort_aspect; + }; +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_parser.cpp b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp new file mode 100644 index 0000000..fa87fa3 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_parser.cpp @@ -0,0 +1,1121 @@ +/* + * Copyright 2018-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_parser.hpp" +#include <assert.h> + +using namespace std; +using namespace spv; + +namespace spirv_cross +{ +Parser::Parser(std::vector<uint32_t> spirv) +{ + ir.spirv = move(spirv); +} + +Parser::Parser(const uint32_t *spirv_data, size_t word_count) +{ + ir.spirv = vector<uint32_t>(spirv_data, spirv_data + word_count); +} + +static bool decoration_is_string(Decoration decoration) +{ + switch (decoration) + { + case DecorationHlslSemanticGOOGLE: + return true; + + default: + return false; + } +} + +static inline uint32_t swap_endian(uint32_t v) +{ + return ((v >> 24) & 0x000000ffu) | ((v >> 8) & 0x0000ff00u) | ((v << 8) & 0x00ff0000u) | ((v << 24) & 0xff000000u); +} + +static bool is_valid_spirv_version(uint32_t version) +{ + switch (version) + { + // Allow v99 since it tends to just work. + case 99: + case 0x10000: // SPIR-V 1.0 + case 0x10100: // SPIR-V 1.1 + case 0x10200: // SPIR-V 1.2 + case 0x10300: // SPIR-V 1.3 + return true; + + default: + return false; + } +} + +void Parser::parse() +{ + auto &spirv = ir.spirv; + + auto len = spirv.size(); + if (len < 5) + SPIRV_CROSS_THROW("SPIRV file too small."); + + auto s = spirv.data(); + + // Endian-swap if we need to. + if (s[0] == swap_endian(MagicNumber)) + transform(begin(spirv), end(spirv), begin(spirv), [](uint32_t c) { return swap_endian(c); }); + + if (s[0] != MagicNumber || !is_valid_spirv_version(s[1])) + SPIRV_CROSS_THROW("Invalid SPIRV format."); + + uint32_t bound = s[3]; + ir.set_id_bounds(bound); + + uint32_t offset = 5; + + vector<Instruction> instructions; + while (offset < len) + { + Instruction instr = {}; + instr.op = spirv[offset] & 0xffff; + instr.count = (spirv[offset] >> 16) & 0xffff; + + if (instr.count == 0) + SPIRV_CROSS_THROW("SPIR-V instructions cannot consume 0 words. Invalid SPIR-V file."); + + instr.offset = offset + 1; + instr.length = instr.count - 1; + + offset += instr.count; + + if (offset > spirv.size()) + SPIRV_CROSS_THROW("SPIR-V instruction goes out of bounds."); + + instructions.push_back(instr); + } + + for (auto &i : instructions) + parse(i); + + if (current_function) + SPIRV_CROSS_THROW("Function was not terminated."); + if (current_block) + SPIRV_CROSS_THROW("Block was not terminated."); +} + +const uint32_t *Parser::stream(const Instruction &instr) const +{ + // If we're not going to use any arguments, just return nullptr. + // We want to avoid case where we return an out of range pointer + // that trips debug assertions on some platforms. + if (!instr.length) + return nullptr; + + if (instr.offset + instr.length > ir.spirv.size()) + SPIRV_CROSS_THROW("Compiler::stream() out of range."); + return &ir.spirv[instr.offset]; +} + +static string extract_string(const vector<uint32_t> &spirv, uint32_t offset) +{ + string ret; + for (uint32_t i = offset; i < spirv.size(); i++) + { + uint32_t w = spirv[i]; + + for (uint32_t j = 0; j < 4; j++, w >>= 8) + { + char c = w & 0xff; + if (c == '\0') + return ret; + ret += c; + } + } + + SPIRV_CROSS_THROW("String was not terminated before EOF"); +} + +void Parser::parse(const Instruction &instruction) +{ + auto *ops = stream(instruction); + auto op = static_cast<Op>(instruction.op); + uint32_t length = instruction.length; + + switch (op) + { + case OpMemoryModel: + case OpSourceContinued: + case OpSourceExtension: + case OpNop: + case OpLine: + case OpNoLine: + case OpString: + case OpModuleProcessed: + break; + + case OpSource: + { + auto lang = static_cast<SourceLanguage>(ops[0]); + switch (lang) + { + case SourceLanguageESSL: + ir.source.es = true; + ir.source.version = ops[1]; + ir.source.known = true; + ir.source.hlsl = false; + break; + + case SourceLanguageGLSL: + ir.source.es = false; + ir.source.version = ops[1]; + ir.source.known = true; + ir.source.hlsl = false; + break; + + case SourceLanguageHLSL: + // For purposes of cross-compiling, this is GLSL 450. + ir.source.es = false; + ir.source.version = 450; + ir.source.known = true; + ir.source.hlsl = true; + break; + + default: + ir.source.known = false; + break; + } + break; + } + + case OpUndef: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + set<SPIRUndef>(id, result_type); + break; + } + + case OpCapability: + { + uint32_t cap = ops[0]; + if (cap == CapabilityKernel) + SPIRV_CROSS_THROW("Kernel capability not supported."); + + ir.declared_capabilities.push_back(static_cast<Capability>(ops[0])); + break; + } + + case OpExtension: + { + auto ext = extract_string(ir.spirv, instruction.offset); + ir.declared_extensions.push_back(move(ext)); + break; + } + + case OpExtInstImport: + { + uint32_t id = ops[0]; + auto ext = extract_string(ir.spirv, instruction.offset + 1); + if (ext == "GLSL.std.450") + set<SPIRExtension>(id, SPIRExtension::GLSL); + else if (ext == "SPV_AMD_shader_ballot") + set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot); + else if (ext == "SPV_AMD_shader_explicit_vertex_parameter") + set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter); + else if (ext == "SPV_AMD_shader_trinary_minmax") + set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_trinary_minmax); + else if (ext == "SPV_AMD_gcn_shader") + set<SPIRExtension>(id, SPIRExtension::SPV_AMD_gcn_shader); + else + set<SPIRExtension>(id, SPIRExtension::Unsupported); + + // Other SPIR-V extensions which have ExtInstrs are currently not supported. + + break; + } + + case OpEntryPoint: + { + auto itr = + ir.entry_points.insert(make_pair(ops[1], SPIREntryPoint(ops[1], static_cast<ExecutionModel>(ops[0]), + extract_string(ir.spirv, instruction.offset + 2)))); + auto &e = itr.first->second; + + // Strings need nul-terminator and consume the whole word. + uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2); + e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length); + + // Set the name of the entry point in case OpName is not provided later. + ir.set_name(ops[1], e.name); + + // If we don't have an entry, make the first one our "default". + if (!ir.default_entry_point) + ir.default_entry_point = ops[1]; + break; + } + + case OpExecutionMode: + { + auto &execution = ir.entry_points[ops[0]]; + auto mode = static_cast<ExecutionMode>(ops[1]); + execution.flags.set(mode); + + switch (mode) + { + case ExecutionModeInvocations: + execution.invocations = ops[2]; + break; + + case ExecutionModeLocalSize: + execution.workgroup_size.x = ops[2]; + execution.workgroup_size.y = ops[3]; + execution.workgroup_size.z = ops[4]; + break; + + case ExecutionModeOutputVertices: + execution.output_vertices = ops[2]; + break; + + default: + break; + } + break; + } + + case OpName: + { + uint32_t id = ops[0]; + ir.set_name(id, extract_string(ir.spirv, instruction.offset + 1)); + break; + } + + case OpMemberName: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + ir.set_member_name(id, member, extract_string(ir.spirv, instruction.offset + 2)); + break; + } + + case OpDecorationGroup: + { + // Noop, this simply means an ID should be a collector of decorations. + // The meta array is already a flat array of decorations which will contain the relevant decorations. + break; + } + + case OpGroupDecorate: + { + uint32_t group_id = ops[0]; + auto &decorations = ir.meta[group_id].decoration; + auto &flags = decorations.decoration_flags; + + // Copies decorations from one ID to another. Only copy decorations which are set in the group, + // i.e., we cannot just copy the meta structure directly. + for (uint32_t i = 1; i < length; i++) + { + uint32_t target = ops[i]; + flags.for_each_bit([&](uint32_t bit) { + auto decoration = static_cast<Decoration>(bit); + + if (decoration_is_string(decoration)) + { + ir.set_decoration_string(target, decoration, ir.get_decoration_string(group_id, decoration)); + } + else + { + ir.meta[target].decoration_word_offset[decoration] = + ir.meta[group_id].decoration_word_offset[decoration]; + ir.set_decoration(target, decoration, ir.get_decoration(group_id, decoration)); + } + }); + } + break; + } + + case OpGroupMemberDecorate: + { + uint32_t group_id = ops[0]; + auto &flags = ir.meta[group_id].decoration.decoration_flags; + + // Copies decorations from one ID to another. Only copy decorations which are set in the group, + // i.e., we cannot just copy the meta structure directly. + for (uint32_t i = 1; i + 1 < length; i += 2) + { + uint32_t target = ops[i + 0]; + uint32_t index = ops[i + 1]; + flags.for_each_bit([&](uint32_t bit) { + auto decoration = static_cast<Decoration>(bit); + + if (decoration_is_string(decoration)) + ir.set_member_decoration_string(target, index, decoration, + ir.get_decoration_string(group_id, decoration)); + else + ir.set_member_decoration(target, index, decoration, ir.get_decoration(group_id, decoration)); + }); + } + break; + } + + case OpDecorate: + case OpDecorateId: + { + // OpDecorateId technically supports an array of arguments, but our only supported decorations are single uint, + // so merge decorate and decorate-id here. + uint32_t id = ops[0]; + + auto decoration = static_cast<Decoration>(ops[1]); + if (length >= 3) + { + ir.meta[id].decoration_word_offset[decoration] = uint32_t(&ops[2] - ir.spirv.data()); + ir.set_decoration(id, decoration, ops[2]); + } + else + ir.set_decoration(id, decoration); + + break; + } + + case OpDecorateStringGOOGLE: + { + uint32_t id = ops[0]; + auto decoration = static_cast<Decoration>(ops[1]); + ir.set_decoration_string(id, decoration, extract_string(ir.spirv, instruction.offset + 2)); + break; + } + + case OpMemberDecorate: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + auto decoration = static_cast<Decoration>(ops[2]); + if (length >= 4) + ir.set_member_decoration(id, member, decoration, ops[3]); + else + ir.set_member_decoration(id, member, decoration); + break; + } + + case OpMemberDecorateStringGOOGLE: + { + uint32_t id = ops[0]; + uint32_t member = ops[1]; + auto decoration = static_cast<Decoration>(ops[2]); + ir.set_member_decoration_string(id, member, decoration, extract_string(ir.spirv, instruction.offset + 3)); + break; + } + + // Build up basic types. + case OpTypeVoid: + { + uint32_t id = ops[0]; + auto &type = set<SPIRType>(id); + type.basetype = SPIRType::Void; + break; + } + + case OpTypeBool: + { + uint32_t id = ops[0]; + auto &type = set<SPIRType>(id); + type.basetype = SPIRType::Boolean; + type.width = 1; + break; + } + + case OpTypeFloat: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + auto &type = set<SPIRType>(id); + if (width == 64) + type.basetype = SPIRType::Double; + else if (width == 32) + type.basetype = SPIRType::Float; + else if (width == 16) + type.basetype = SPIRType::Half; + else + SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type."); + type.width = width; + break; + } + + case OpTypeInt: + { + uint32_t id = ops[0]; + uint32_t width = ops[1]; + bool signedness = ops[2] != 0; + auto &type = set<SPIRType>(id); + type.basetype = signedness ? to_signed_basetype(width) : to_unsigned_basetype(width); + type.width = width; + break; + } + + // Build composite types by "inheriting". + // NOTE: The self member is also copied! For pointers and array modifiers this is a good thing + // since we can refer to decorations on pointee classes which is needed for UBO/SSBO, I/O blocks in geometry/tess etc. + case OpTypeVector: + { + uint32_t id = ops[0]; + uint32_t vecsize = ops[2]; + + auto &base = get<SPIRType>(ops[1]); + auto &vecbase = set<SPIRType>(id); + + vecbase = base; + vecbase.vecsize = vecsize; + vecbase.self = id; + vecbase.parent_type = ops[1]; + break; + } + + case OpTypeMatrix: + { + uint32_t id = ops[0]; + uint32_t colcount = ops[2]; + + auto &base = get<SPIRType>(ops[1]); + auto &matrixbase = set<SPIRType>(id); + + matrixbase = base; + matrixbase.columns = colcount; + matrixbase.self = id; + matrixbase.parent_type = ops[1]; + break; + } + + case OpTypeArray: + { + uint32_t id = ops[0]; + auto &arraybase = set<SPIRType>(id); + + uint32_t tid = ops[1]; + auto &base = get<SPIRType>(tid); + + arraybase = base; + arraybase.parent_type = tid; + + uint32_t cid = ops[2]; + ir.mark_used_as_array_length(cid); + auto *c = maybe_get<SPIRConstant>(cid); + bool literal = c && !c->specialization; + + arraybase.array_size_literal.push_back(literal); + arraybase.array.push_back(literal ? c->scalar() : cid); + // Do NOT set arraybase.self! + break; + } + + case OpTypeRuntimeArray: + { + uint32_t id = ops[0]; + + auto &base = get<SPIRType>(ops[1]); + auto &arraybase = set<SPIRType>(id); + + arraybase = base; + arraybase.array.push_back(0); + arraybase.array_size_literal.push_back(true); + arraybase.parent_type = ops[1]; + // Do NOT set arraybase.self! + break; + } + + case OpTypeImage: + { + uint32_t id = ops[0]; + auto &type = set<SPIRType>(id); + type.basetype = SPIRType::Image; + type.image.type = ops[1]; + type.image.dim = static_cast<Dim>(ops[2]); + type.image.depth = ops[3] == 1; + type.image.arrayed = ops[4] != 0; + type.image.ms = ops[5] != 0; + type.image.sampled = ops[6]; + type.image.format = static_cast<ImageFormat>(ops[7]); + type.image.access = (length >= 9) ? static_cast<AccessQualifier>(ops[8]) : AccessQualifierMax; + + if (type.image.sampled == 0) + SPIRV_CROSS_THROW("OpTypeImage Sampled parameter must not be zero."); + + break; + } + + case OpTypeSampledImage: + { + uint32_t id = ops[0]; + uint32_t imagetype = ops[1]; + auto &type = set<SPIRType>(id); + type = get<SPIRType>(imagetype); + type.basetype = SPIRType::SampledImage; + type.self = id; + break; + } + + case OpTypeSampler: + { + uint32_t id = ops[0]; + auto &type = set<SPIRType>(id); + type.basetype = SPIRType::Sampler; + break; + } + + case OpTypePointer: + { + uint32_t id = ops[0]; + + auto &base = get<SPIRType>(ops[2]); + auto &ptrbase = set<SPIRType>(id); + + ptrbase = base; + ptrbase.pointer = true; + ptrbase.pointer_depth++; + ptrbase.storage = static_cast<StorageClass>(ops[1]); + + if (ptrbase.storage == StorageClassAtomicCounter) + ptrbase.basetype = SPIRType::AtomicCounter; + + ptrbase.parent_type = ops[2]; + + // Do NOT set ptrbase.self! + break; + } + + case OpTypeStruct: + { + uint32_t id = ops[0]; + auto &type = set<SPIRType>(id); + type.basetype = SPIRType::Struct; + for (uint32_t i = 1; i < length; i++) + type.member_types.push_back(ops[i]); + + // Check if we have seen this struct type before, with just different + // decorations. + // + // Add workaround for issue #17 as well by looking at OpName for the struct + // types, which we shouldn't normally do. + // We should not normally have to consider type aliases like this to begin with + // however ... glslang issues #304, #307 cover this. + + // For stripped names, never consider struct type aliasing. + // We risk declaring the same struct multiple times, but type-punning is not allowed + // so this is safe. + bool consider_aliasing = !ir.get_name(type.self).empty(); + if (consider_aliasing) + { + for (auto &other : global_struct_cache) + { + if (ir.get_name(type.self) == ir.get_name(other) && + types_are_logically_equivalent(type, get<SPIRType>(other))) + { + type.type_alias = other; + break; + } + } + + if (type.type_alias == 0) + global_struct_cache.push_back(id); + } + break; + } + + case OpTypeFunction: + { + uint32_t id = ops[0]; + uint32_t ret = ops[1]; + + auto &func = set<SPIRFunctionPrototype>(id, ret); + for (uint32_t i = 2; i < length; i++) + func.parameter_types.push_back(ops[i]); + break; + } + + case OpTypeAccelerationStructureNV: + { + uint32_t id = ops[0]; + auto &type = set<SPIRType>(id); + type.basetype = SPIRType::AccelerationStructureNV; + break; + } + + // Variable declaration + // All variables are essentially pointers with a storage qualifier. + case OpVariable: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + auto storage = static_cast<StorageClass>(ops[2]); + uint32_t initializer = length == 4 ? ops[3] : 0; + + if (storage == StorageClassFunction) + { + if (!current_function) + SPIRV_CROSS_THROW("No function currently in scope"); + current_function->add_local_variable(id); + } + + set<SPIRVariable>(id, type, storage, initializer); + + // hlsl based shaders don't have those decorations. force them and then reset when reading/writing images + auto &ttype = get<SPIRType>(type); + if (ttype.basetype == SPIRType::BaseType::Image) + { + ir.set_decoration(id, DecorationNonWritable); + ir.set_decoration(id, DecorationNonReadable); + } + + break; + } + + // OpPhi + // OpPhi is a fairly magical opcode. + // It selects temporary variables based on which parent block we *came from*. + // In high-level languages we can "de-SSA" by creating a function local, and flush out temporaries to this function-local + // variable to emulate SSA Phi. + case OpPhi: + { + if (!current_function) + SPIRV_CROSS_THROW("No function currently in scope"); + if (!current_block) + SPIRV_CROSS_THROW("No block currently in scope"); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // Instead of a temporary, create a new function-wide temporary with this ID instead. + auto &var = set<SPIRVariable>(id, result_type, spv::StorageClassFunction); + var.phi_variable = true; + + current_function->add_local_variable(id); + + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->phi_variables.push_back({ ops[i], ops[i + 1], id }); + break; + } + + // Constants + case OpSpecConstant: + case OpConstant: + { + uint32_t id = ops[1]; + auto &type = get<SPIRType>(ops[0]); + + if (type.width > 32) + set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant); + else + set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant); + break; + } + + case OpSpecConstantFalse: + case OpConstantFalse: + { + uint32_t id = ops[1]; + set<SPIRConstant>(id, ops[0], uint32_t(0), op == OpSpecConstantFalse); + break; + } + + case OpSpecConstantTrue: + case OpConstantTrue: + { + uint32_t id = ops[1]; + set<SPIRConstant>(id, ops[0], uint32_t(1), op == OpSpecConstantTrue); + break; + } + + case OpConstantNull: + { + uint32_t id = ops[1]; + uint32_t type = ops[0]; + make_constant_null(id, type); + break; + } + + case OpSpecConstantComposite: + case OpConstantComposite: + { + uint32_t id = ops[1]; + uint32_t type = ops[0]; + + auto &ctype = get<SPIRType>(type); + + // We can have constants which are structs and arrays. + // In this case, our SPIRConstant will be a list of other SPIRConstant ids which we + // can refer to. + if (ctype.basetype == SPIRType::Struct || !ctype.array.empty()) + { + set<SPIRConstant>(id, type, ops + 2, length - 2, op == OpSpecConstantComposite); + } + else + { + uint32_t elements = length - 2; + if (elements > 4) + SPIRV_CROSS_THROW("OpConstantComposite only supports 1, 2, 3 and 4 elements."); + + SPIRConstant remapped_constant_ops[4]; + const SPIRConstant *c[4]; + for (uint32_t i = 0; i < elements; i++) + { + // Specialization constants operations can also be part of this. + // We do not know their value, so any attempt to query SPIRConstant later + // will fail. We can only propagate the ID of the expression and use to_expression on it. + auto *constant_op = maybe_get<SPIRConstantOp>(ops[2 + i]); + auto *undef_op = maybe_get<SPIRUndef>(ops[2 + i]); + if (constant_op) + { + if (op == OpConstantComposite) + SPIRV_CROSS_THROW("Specialization constant operation used in OpConstantComposite."); + + remapped_constant_ops[i].make_null(get<SPIRType>(constant_op->basetype)); + remapped_constant_ops[i].self = constant_op->self; + remapped_constant_ops[i].constant_type = constant_op->basetype; + remapped_constant_ops[i].specialization = true; + c[i] = &remapped_constant_ops[i]; + } + else if (undef_op) + { + // Undefined, just pick 0. + remapped_constant_ops[i].make_null(get<SPIRType>(undef_op->basetype)); + remapped_constant_ops[i].constant_type = undef_op->basetype; + c[i] = &remapped_constant_ops[i]; + } + else + c[i] = &get<SPIRConstant>(ops[2 + i]); + } + set<SPIRConstant>(id, type, c, elements, op == OpSpecConstantComposite); + } + break; + } + + // Functions + case OpFunction: + { + uint32_t res = ops[0]; + uint32_t id = ops[1]; + // Control + uint32_t type = ops[3]; + + if (current_function) + SPIRV_CROSS_THROW("Must end a function before starting a new one!"); + + current_function = &set<SPIRFunction>(id, res, type); + break; + } + + case OpFunctionParameter: + { + uint32_t type = ops[0]; + uint32_t id = ops[1]; + + if (!current_function) + SPIRV_CROSS_THROW("Must be in a function!"); + + current_function->add_parameter(type, id); + set<SPIRVariable>(id, type, StorageClassFunction); + break; + } + + case OpFunctionEnd: + { + if (current_block) + { + // Very specific error message, but seems to come up quite often. + SPIRV_CROSS_THROW( + "Cannot end a function before ending the current block.\n" + "Likely cause: If this SPIR-V was created from glslang HLSL, make sure the entry point is valid."); + } + current_function = nullptr; + break; + } + + // Blocks + case OpLabel: + { + // OpLabel always starts a block. + if (!current_function) + SPIRV_CROSS_THROW("Blocks cannot exist outside functions!"); + + uint32_t id = ops[0]; + + current_function->blocks.push_back(id); + if (!current_function->entry_block) + current_function->entry_block = id; + + if (current_block) + SPIRV_CROSS_THROW("Cannot start a block before ending the current block."); + + current_block = &set<SPIRBlock>(id); + break; + } + + // Branch instructions end blocks. + case OpBranch: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + uint32_t target = ops[0]; + current_block->terminator = SPIRBlock::Direct; + current_block->next_block = target; + current_block = nullptr; + break; + } + + case OpBranchConditional: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + current_block->condition = ops[0]; + current_block->true_block = ops[1]; + current_block->false_block = ops[2]; + + current_block->terminator = SPIRBlock::Select; + current_block = nullptr; + break; + } + + case OpSwitch: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + + current_block->terminator = SPIRBlock::MultiSelect; + + current_block->condition = ops[0]; + current_block->default_block = ops[1]; + + for (uint32_t i = 2; i + 2 <= length; i += 2) + current_block->cases.push_back({ ops[i], ops[i + 1] }); + + // If we jump to next block, make it break instead since we're inside a switch case block at that point. + ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT; + + current_block = nullptr; + break; + } + + case OpKill: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Kill; + current_block = nullptr; + break; + } + + case OpReturn: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Return; + current_block = nullptr; + break; + } + + case OpReturnValue: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Return; + current_block->return_value = ops[0]; + current_block = nullptr; + break; + } + + case OpUnreachable: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::Unreachable; + current_block = nullptr; + break; + } + + case OpSelectionMerge: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to modify a non-existing block."); + + current_block->next_block = ops[0]; + current_block->merge = SPIRBlock::MergeSelection; + ir.block_meta[current_block->next_block] |= ParsedIR::BLOCK_META_SELECTION_MERGE_BIT; + + if (length >= 2) + { + if (ops[1] & SelectionControlFlattenMask) + current_block->hint = SPIRBlock::HintFlatten; + else if (ops[1] & SelectionControlDontFlattenMask) + current_block->hint = SPIRBlock::HintDontFlatten; + } + break; + } + + case OpLoopMerge: + { + if (!current_block) + SPIRV_CROSS_THROW("Trying to modify a non-existing block."); + + current_block->merge_block = ops[0]; + current_block->continue_block = ops[1]; + current_block->merge = SPIRBlock::MergeLoop; + + ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT; + ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT; + + ir.continue_block_to_loop_header[current_block->continue_block] = current_block->self; + + // Don't add loop headers to continue blocks, + // which would make it impossible branch into the loop header since + // they are treated as continues. + if (current_block->continue_block != current_block->self) + ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT; + + if (length >= 3) + { + if (ops[2] & LoopControlUnrollMask) + current_block->hint = SPIRBlock::HintUnroll; + else if (ops[2] & LoopControlDontUnrollMask) + current_block->hint = SPIRBlock::HintDontUnroll; + } + break; + } + + case OpSpecConstantOp: + { + if (length < 3) + SPIRV_CROSS_THROW("OpSpecConstantOp not enough arguments."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto spec_op = static_cast<Op>(ops[2]); + + set<SPIRConstantOp>(id, result_type, spec_op, ops + 3, length - 3); + break; + } + + // Actual opcodes. + default: + { + if (!current_block) + SPIRV_CROSS_THROW("Currently no block to insert opcode."); + + current_block->ops.push_back(instruction); + break; + } + } +} + +bool Parser::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const +{ + if (a.basetype != b.basetype) + return false; + if (a.width != b.width) + return false; + if (a.vecsize != b.vecsize) + return false; + if (a.columns != b.columns) + return false; + if (a.array.size() != b.array.size()) + return false; + + size_t array_count = a.array.size(); + if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0) + return false; + + if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage) + { + if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0) + return false; + } + + if (a.member_types.size() != b.member_types.size()) + return false; + + size_t member_types = a.member_types.size(); + for (size_t i = 0; i < member_types; i++) + { + if (!types_are_logically_equivalent(get<SPIRType>(a.member_types[i]), get<SPIRType>(b.member_types[i]))) + return false; + } + + return true; +} + +bool Parser::variable_storage_is_aliased(const SPIRVariable &v) const +{ + auto &type = get<SPIRType>(v.basetype); + + auto *type_meta = ir.find_meta(type.self); + + bool ssbo = v.storage == StorageClassStorageBuffer || + (type_meta && type_meta->decoration.decoration_flags.get(DecorationBufferBlock)); + bool image = type.basetype == SPIRType::Image; + bool counter = type.basetype == SPIRType::AtomicCounter; + + bool is_restrict; + if (ssbo) + is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict); + else + is_restrict = ir.has_decoration(v.self, DecorationRestrict); + + return !is_restrict && (ssbo || image || counter); +} + +void Parser::make_constant_null(uint32_t id, uint32_t type) +{ + auto &constant_type = get<SPIRType>(type); + + if (constant_type.pointer) + { + auto &constant = set<SPIRConstant>(id, type); + constant.make_null(constant_type); + } + else if (!constant_type.array.empty()) + { + assert(constant_type.parent_type); + uint32_t parent_id = ir.increase_bound_by(1); + make_constant_null(parent_id, constant_type.parent_type); + + if (!constant_type.array_size_literal.back()) + SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); + + vector<uint32_t> elements(constant_type.array.back()); + for (uint32_t i = 0; i < constant_type.array.back(); i++) + elements[i] = parent_id; + set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false); + } + else if (!constant_type.member_types.empty()) + { + uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size())); + vector<uint32_t> elements(constant_type.member_types.size()); + for (uint32_t i = 0; i < constant_type.member_types.size(); i++) + { + make_constant_null(member_ids + i, constant_type.member_types[i]); + elements[i] = member_ids + i; + } + set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false); + } + else + { + auto &constant = set<SPIRConstant>(id, type); + constant.make_null(constant_type); + } +} + +} // namespace spirv_cross diff --git a/src/3rdparty/SPIRV-Cross/spirv_parser.hpp b/src/3rdparty/SPIRV-Cross/spirv_parser.hpp new file mode 100644 index 0000000..cc15315 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_parser.hpp @@ -0,0 +1,95 @@ +/* + * Copyright 2018-2019 Arm Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_PARSER_HPP +#define SPIRV_CROSS_PARSER_HPP + +#include "spirv_cross_parsed_ir.hpp" +#include <stdint.h> +#include <vector> + +namespace spirv_cross +{ +class Parser +{ +public: + Parser(const uint32_t *spirv_data, size_t word_count); + Parser(std::vector<uint32_t> spirv); + + void parse(); + + ParsedIR &get_parsed_ir() + { + return ir; + } + +private: + ParsedIR ir; + SPIRFunction *current_function = nullptr; + SPIRBlock *current_block = nullptr; + + void parse(const Instruction &instr); + const uint32_t *stream(const Instruction &instr) const; + + template <typename T, typename... P> + T &set(uint32_t id, P &&... args) + { + ir.add_typed_id(static_cast<Types>(T::type), id); + auto &var = variant_set<T>(ir.ids[id], std::forward<P>(args)...); + var.self = id; + return var; + } + + template <typename T> + T &get(uint32_t id) + { + return variant_get<T>(ir.ids[id]); + } + + template <typename T> + T *maybe_get(uint32_t id) + { + if (ir.ids[id].get_type() == static_cast<Types>(T::type)) + return &get<T>(id); + else + return nullptr; + } + + template <typename T> + const T &get(uint32_t id) const + { + return variant_get<T>(ir.ids[id]); + } + + template <typename T> + const T *maybe_get(uint32_t id) const + { + if (ir.ids[id].get_type() == T::type) + return &get<T>(id); + else + return nullptr; + } + + // This must be an ordered data structure so we always pick the same type aliases. + std::vector<uint32_t> global_struct_cache; + + bool types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const; + bool variable_storage_is_aliased(const SPIRVariable &v) const; + void make_constant_null(uint32_t id, uint32_t type); +}; +} // namespace spirv_cross + +#endif diff --git a/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp new file mode 100644 index 0000000..c6cd3be --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_reflect.cpp @@ -0,0 +1,594 @@ +/* + * Copyright 2018-2019 Bradley Austin Davis + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "spirv_reflect.hpp" +#include "spirv_glsl.hpp" +#include <iomanip> + +using namespace spv; +using namespace spirv_cross; +using namespace std; + +namespace simple_json +{ +enum class Type +{ + Object, + Array, +}; + +using State = std::pair<Type, bool>; +using Stack = std::stack<State>; + +class Stream +{ + Stack stack; + std::ostringstream buffer; + uint32_t indent{ 0 }; + +public: + void begin_json_object(); + void end_json_object(); + void emit_json_key(const std::string &key); + void emit_json_key_value(const std::string &key, const std::string &value); + void emit_json_key_value(const std::string &key, bool value); + void emit_json_key_value(const std::string &key, uint32_t value); + void emit_json_key_value(const std::string &key, int32_t value); + void emit_json_key_value(const std::string &key, float value); + void emit_json_key_object(const std::string &key); + void emit_json_key_array(const std::string &key); + + void begin_json_array(); + void end_json_array(); + void emit_json_array_value(const std::string &value); + void emit_json_array_value(uint32_t value); + + std::string str() const + { + return buffer.str(); + } + +private: + inline void statement_indent() + { + for (uint32_t i = 0; i < indent; i++) + buffer << " "; + } + + template <typename T> + inline void statement_inner(T &&t) + { + buffer << std::forward<T>(t); + } + + template <typename T, typename... Ts> + inline void statement_inner(T &&t, Ts &&... ts) + { + buffer << std::forward<T>(t); + statement_inner(std::forward<Ts>(ts)...); + } + + template <typename... Ts> + inline void statement(Ts &&... ts) + { + statement_indent(); + statement_inner(std::forward<Ts>(ts)...); + buffer << '\n'; + } + + template <typename... Ts> + void statement_no_return(Ts &&... ts) + { + statement_indent(); + statement_inner(std::forward<Ts>(ts)...); + } +}; +} // namespace simple_json + +using namespace simple_json; + +// Hackery to emit JSON without using nlohmann/json C++ library (which requires a +// higher level of compiler compliance than is required by SPIRV-Cross +void Stream::begin_json_array() +{ + if (!stack.empty() && stack.top().second) + { + statement_inner(",\n"); + } + statement("["); + ++indent; + stack.emplace(Type::Array, false); +} + +void Stream::end_json_array() +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + { + statement_inner("\n"); + } + --indent; + statement_no_return("]"); + stack.pop(); + if (!stack.empty()) + { + stack.top().second = true; + } +} + +void Stream::emit_json_array_value(const std::string &value) +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + + if (stack.top().second) + statement_inner(",\n"); + + statement_no_return("\"", value, "\""); + stack.top().second = true; +} + +void Stream::emit_json_array_value(uint32_t value) +{ + if (stack.empty() || stack.top().first != Type::Array) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + statement_inner(",\n"); + statement_no_return(std::to_string(value)); + stack.top().second = true; +} + +void Stream::begin_json_object() +{ + if (!stack.empty() && stack.top().second) + { + statement_inner(",\n"); + } + statement("{"); + ++indent; + stack.emplace(Type::Object, false); +} + +void Stream::end_json_object() +{ + if (stack.empty() || stack.top().first != Type::Object) + SPIRV_CROSS_THROW("Invalid JSON state"); + if (stack.top().second) + { + statement_inner("\n"); + } + --indent; + statement_no_return("}"); + stack.pop(); + if (!stack.empty()) + { + stack.top().second = true; + } +} + +void Stream::emit_json_key(const std::string &key) +{ + if (stack.empty() || stack.top().first != Type::Object) + SPIRV_CROSS_THROW("Invalid JSON state"); + + if (stack.top().second) + statement_inner(",\n"); + statement_no_return("\"", key, "\" : "); + stack.top().second = true; +} + +void Stream::emit_json_key_value(const std::string &key, const std::string &value) +{ + emit_json_key(key); + statement_inner("\"", value, "\""); +} + +void Stream::emit_json_key_value(const std::string &key, uint32_t value) +{ + emit_json_key(key); + statement_inner(value); +} + +void Stream::emit_json_key_value(const std::string &key, int32_t value) +{ + emit_json_key(key); + statement_inner(value); +} + +void Stream::emit_json_key_value(const std::string &key, float value) +{ + emit_json_key(key); + statement_inner(value); +} + +void Stream::emit_json_key_value(const std::string &key, bool value) +{ + emit_json_key(key); + statement_inner(value ? "true" : "false"); +} + +void Stream::emit_json_key_object(const std::string &key) +{ + emit_json_key(key); + statement_inner("{\n"); + ++indent; + stack.emplace(Type::Object, false); +} + +void Stream::emit_json_key_array(const std::string &key) +{ + emit_json_key(key); + statement_inner("[\n"); + ++indent; + stack.emplace(Type::Array, false); +} + +void CompilerReflection::set_format(const std::string &format) +{ + if (format != "json") + { + SPIRV_CROSS_THROW("Unsupported format"); + } +} + +string CompilerReflection::compile() +{ + // Move constructor for this type is broken on GCC 4.9 ... + json_stream = std::make_shared<simple_json::Stream>(); + json_stream->begin_json_object(); + emit_entry_points(); + emit_types(); + emit_resources(); + emit_specialization_constants(); + json_stream->end_json_object(); + return json_stream->str(); +} + +void CompilerReflection::emit_types() +{ + bool emitted_open_tag = false; + + ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) { + if (type.basetype == SPIRType::Struct && !type.pointer && type.array.empty()) + emit_type(type, emitted_open_tag); + }); + + if (emitted_open_tag) + { + json_stream->end_json_object(); + } +} + +void CompilerReflection::emit_type(const SPIRType &type, bool &emitted_open_tag) +{ + auto name = type_to_glsl(type); + + if (type.type_alias != 0) + return; + + if (!emitted_open_tag) + { + json_stream->emit_json_key_object("types"); + emitted_open_tag = true; + } + json_stream->emit_json_key_object("_" + std::to_string(type.self)); + json_stream->emit_json_key_value("name", name); + json_stream->emit_json_key_array("members"); + // FIXME ideally we'd like to emit the size of a structure as a + // convenience to people parsing the reflected JSON. The problem + // is that there's no implicit size for a type. It's final size + // will be determined by the top level declaration in which it's + // included. So there might be one size for the struct if it's + // included in a std140 uniform block and another if it's included + // in a std430 uniform block. + // The solution is to include *all* potential sizes as a map of + // layout type name to integer, but that will probably require + // some additional logic being written in this class, or in the + // parent CompilerGLSL class. + auto size = type.member_types.size(); + for (uint32_t i = 0; i < size; ++i) + { + emit_type_member(type, i); + } + json_stream->end_json_array(); + json_stream->end_json_object(); +} + +void CompilerReflection::emit_type_member(const SPIRType &type, uint32_t index) +{ + auto &membertype = get<SPIRType>(type.member_types[index]); + json_stream->begin_json_object(); + auto name = to_member_name(type, index); + // FIXME we'd like to emit the offset of each member, but such offsets are + // context dependent. See the comment above regarding structure sizes + json_stream->emit_json_key_value("name", name); + if (membertype.basetype == SPIRType::Struct) + { + json_stream->emit_json_key_value("type", "_" + std::to_string(membertype.self)); + } + else + { + json_stream->emit_json_key_value("type", type_to_glsl(membertype)); + } + emit_type_member_qualifiers(type, index); + json_stream->end_json_object(); +} + +void CompilerReflection::emit_type_array(const SPIRType &type) +{ + if (!type.array.empty()) + { + json_stream->emit_json_key_array("array"); + // Note that we emit the zeros here as a means of identifying + // unbounded arrays. This is necessary as otherwise there would + // be no way of differentiating between float[4] and float[4][] + for (const auto &value : type.array) + json_stream->emit_json_array_value(value); + json_stream->end_json_array(); + } +} + +void CompilerReflection::emit_type_member_qualifiers(const SPIRType &type, uint32_t index) +{ + auto flags = combined_decoration_for_member(type, index); + if (flags.get(DecorationRowMajor)) + json_stream->emit_json_key_value("row_major", true); + + auto &membertype = get<SPIRType>(type.member_types[index]); + emit_type_array(membertype); + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + { + auto &dec = memb[index]; + if (dec.decoration_flags.get(DecorationLocation)) + json_stream->emit_json_key_value("location", dec.location); + if (dec.decoration_flags.get(DecorationOffset)) + json_stream->emit_json_key_value("offset", dec.offset); + } +} + +string CompilerReflection::execution_model_to_str(spv::ExecutionModel model) +{ + switch (model) + { + case ExecutionModelVertex: + return "vert"; + case ExecutionModelTessellationControl: + return "tesc"; + case ExecutionModelTessellationEvaluation: + return "tese"; + case ExecutionModelGeometry: + return "geom"; + case ExecutionModelFragment: + return "frag"; + case ExecutionModelGLCompute: + return "comp"; + case ExecutionModelRayGenerationNV: + return "rgen"; + case ExecutionModelIntersectionNV: + return "rint"; + case ExecutionModelAnyHitNV: + return "rahit"; + case ExecutionModelClosestHitNV: + return "rchit"; + case ExecutionModelMissNV: + return "rmiss"; + case ExecutionModelCallableNV: + return "rcall"; + default: + return "???"; + } +} + +// FIXME include things like the local_size dimensions, geometry output vertex count, etc +void CompilerReflection::emit_entry_points() +{ + auto entries = get_entry_points_and_stages(); + if (!entries.empty()) + { + // Needed to make output deterministic. + sort(begin(entries), end(entries), [](const EntryPoint &a, const EntryPoint &b) -> bool { + if (a.execution_model < b.execution_model) + return true; + else if (a.execution_model > b.execution_model) + return false; + else + return a.name < b.name; + }); + + json_stream->emit_json_key_array("entryPoints"); + for (auto &e : entries) + { + json_stream->begin_json_object(); + json_stream->emit_json_key_value("name", e.name); + json_stream->emit_json_key_value("mode", execution_model_to_str(e.execution_model)); + json_stream->end_json_object(); + } + json_stream->end_json_array(); + } +} + +void CompilerReflection::emit_resources() +{ + auto res = get_shader_resources(); + emit_resources("subpass_inputs", res.subpass_inputs); + emit_resources("inputs", res.stage_inputs); + emit_resources("outputs", res.stage_outputs); + emit_resources("textures", res.sampled_images); + emit_resources("separate_images", res.separate_images); + emit_resources("separate_samplers", res.separate_samplers); + emit_resources("images", res.storage_images); + emit_resources("ssbos", res.storage_buffers); + emit_resources("ubos", res.uniform_buffers); + emit_resources("push_constants", res.push_constant_buffers); + emit_resources("counters", res.atomic_counters); + emit_resources("acceleration_structures", res.acceleration_structures); +} + +void CompilerReflection::emit_resources(const char *tag, const vector<Resource> &resources) +{ + if (resources.empty()) + { + return; + } + + json_stream->emit_json_key_array(tag); + for (auto &res : resources) + { + auto &type = get_type(res.type_id); + auto typeflags = ir.meta[type.self].decoration.decoration_flags; + auto &mask = get_decoration_bitset(res.id); + + // If we don't have a name, use the fallback for the type instead of the variable + // for SSBOs and UBOs since those are the only meaningful names to use externally. + // Push constant blocks are still accessed by name and not block name, even though they are technically Blocks. + bool is_push_constant = get_storage_class(res.id) == StorageClassPushConstant; + bool is_block = get_decoration_bitset(type.self).get(DecorationBlock) || + get_decoration_bitset(type.self).get(DecorationBufferBlock); + + uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id; + + json_stream->begin_json_object(); + + if (type.basetype == SPIRType::Struct) + { + json_stream->emit_json_key_value("type", "_" + std::to_string(res.base_type_id)); + } + else + { + json_stream->emit_json_key_value("type", type_to_glsl(type)); + } + + json_stream->emit_json_key_value("name", !res.name.empty() ? res.name : get_fallback_name(fallback_id)); + { + bool ssbo_block = type.storage == StorageClassStorageBuffer || + (type.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); + if (ssbo_block) + { + auto buffer_flags = get_buffer_block_flags(res.id); + if (buffer_flags.get(DecorationNonReadable)) + json_stream->emit_json_key_value("writeonly", true); + if (buffer_flags.get(DecorationNonWritable)) + json_stream->emit_json_key_value("readonly", true); + if (buffer_flags.get(DecorationRestrict)) + json_stream->emit_json_key_value("restrict", true); + if (buffer_flags.get(DecorationCoherent)) + json_stream->emit_json_key_value("coherent", true); + } + } + + emit_type_array(type); + + { + bool is_sized_block = is_block && (get_storage_class(res.id) == StorageClassUniform || + get_storage_class(res.id) == StorageClassUniformConstant || + get_storage_class(res.id) == StorageClassStorageBuffer); + if (is_sized_block) + { + uint32_t block_size = uint32_t(get_declared_struct_size(get_type(res.base_type_id))); + json_stream->emit_json_key_value("block_size", block_size); + } + } + + if (type.storage == StorageClassPushConstant) + json_stream->emit_json_key_value("push_constant", true); + if (mask.get(DecorationLocation)) + json_stream->emit_json_key_value("location", get_decoration(res.id, DecorationLocation)); + if (mask.get(DecorationRowMajor)) + json_stream->emit_json_key_value("row_major", true); + if (mask.get(DecorationColMajor)) + json_stream->emit_json_key_value("column_major", true); + if (mask.get(DecorationIndex)) + json_stream->emit_json_key_value("index", get_decoration(res.id, DecorationIndex)); + if (type.storage != StorageClassPushConstant && mask.get(DecorationDescriptorSet)) + json_stream->emit_json_key_value("set", get_decoration(res.id, DecorationDescriptorSet)); + if (mask.get(DecorationBinding)) + json_stream->emit_json_key_value("binding", get_decoration(res.id, DecorationBinding)); + if (mask.get(DecorationInputAttachmentIndex)) + json_stream->emit_json_key_value("input_attachment_index", + get_decoration(res.id, DecorationInputAttachmentIndex)); + if (mask.get(DecorationOffset)) + json_stream->emit_json_key_value("offset", get_decoration(res.id, DecorationOffset)); + + // For images, the type itself adds a layout qualifer. + // Only emit the format for storage images. + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + { + const char *fmt = format_to_glsl(type.image.format); + if (fmt != nullptr) + json_stream->emit_json_key_value("format", std::string(fmt)); + } + json_stream->end_json_object(); + } + json_stream->end_json_array(); +} + +void CompilerReflection::emit_specialization_constants() +{ + auto specialization_constants = get_specialization_constants(); + if (specialization_constants.empty()) + return; + + json_stream->emit_json_key_array("specialization_constants"); + for (const auto spec_const : specialization_constants) + { + auto &c = get<SPIRConstant>(spec_const.id); + auto type = get<SPIRType>(c.constant_type); + json_stream->begin_json_object(); + json_stream->emit_json_key_value("id", spec_const.constant_id); + json_stream->emit_json_key_value("type", type_to_glsl(type)); + switch (type.basetype) + { + case SPIRType::UInt: + json_stream->emit_json_key_value("default_value", c.scalar()); + break; + + case SPIRType::Int: + json_stream->emit_json_key_value("default_value", c.scalar_i32()); + break; + + case SPIRType::Float: + json_stream->emit_json_key_value("default_value", c.scalar_f32()); + break; + + case SPIRType::Boolean: + json_stream->emit_json_key_value("default_value", c.scalar() != 0); + break; + + default: + break; + } + json_stream->end_json_object(); + } + json_stream->end_json_array(); +} + +string CompilerReflection::to_member_name(const SPIRType &type, uint32_t index) const +{ + auto *type_meta = ir.find_meta(type.self); + + if (type_meta) + { + auto &memb = type_meta->members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_m", index); + } + else + return join("_m", index); +} diff --git a/src/3rdparty/SPIRV-Cross/spirv_reflect.hpp b/src/3rdparty/SPIRV-Cross/spirv_reflect.hpp new file mode 100644 index 0000000..13b5b43 --- /dev/null +++ b/src/3rdparty/SPIRV-Cross/spirv_reflect.hpp @@ -0,0 +1,84 @@ +/* + * Copyright 2018-2019 Bradley Austin Davis + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SPIRV_CROSS_REFLECT_HPP +#define SPIRV_CROSS_REFLECT_HPP + +#include "spirv_glsl.hpp" +#include <utility> +#include <vector> + +namespace simple_json +{ +class Stream; +} + +namespace spirv_cross +{ +class CompilerReflection : public CompilerGLSL +{ + using Parent = CompilerGLSL; + +public: + explicit CompilerReflection(std::vector<uint32_t> spirv_) + : Parent(move(spirv_)) + { + options.vulkan_semantics = true; + } + + CompilerReflection(const uint32_t *ir_, size_t word_count) + : Parent(ir_, word_count) + { + options.vulkan_semantics = true; + } + + explicit CompilerReflection(const ParsedIR &ir_) + : CompilerGLSL(ir_) + { + options.vulkan_semantics = true; + } + + explicit CompilerReflection(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) + { + options.vulkan_semantics = true; + } + + void set_format(const std::string &format); + std::string compile() override; + +private: + static std::string execution_model_to_str(spv::ExecutionModel model); + + void emit_entry_points(); + void emit_types(); + void emit_resources(); + void emit_specialization_constants(); + + void emit_type(const SPIRType &type, bool &emitted_open_tag); + void emit_type_member(const SPIRType &type, uint32_t index); + void emit_type_member_qualifiers(const SPIRType &type, uint32_t index); + void emit_type_array(const SPIRType &type); + void emit_resources(const char *tag, const std::vector<Resource> &resources); + + std::string to_member_name(const SPIRType &type, uint32_t index) const; + + std::shared_ptr<simple_json::Stream> json_stream; +}; + +} // namespace spirv_cross + +#endif |