diff options
author | Laszlo Agocs <laszlo.agocs@qt.io> | 2019-11-11 14:11:25 +0100 |
---|---|---|
committer | Laszlo Agocs <laszlo.agocs@qt.io> | 2019-11-11 14:09:20 +0000 |
commit | 0feaa5fb18b750616ded2d841471a895599bcc91 (patch) | |
tree | 61e8a073fdd26e89fba864b81244c58c553e06e6 /src | |
parent | 13115ddaf1fa200ef79085c493276ceeabd19787 (diff) |
Update SPIRV-Cross
Task-number: QTBUG-78591
Change-Id: I03d2b49408267d18aaa54fe163663325ee7672d0
Reviewed-by: Laszlo Agocs <laszlo.agocs@qt.io>
Diffstat (limited to 'src')
-rw-r--r-- | src/3rdparty/SPIRV-Cross/qt_attribution.json | 2 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_common.hpp | 5 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_cross.cpp | 78 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_cross.hpp | 3 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp | 48 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_cross_c.h | 29 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_cross_error_handling.hpp | 4 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_glsl.cpp | 163 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_glsl.hpp | 17 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp | 82 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp | 32 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_msl.cpp | 1880 | ||||
-rw-r--r-- | src/3rdparty/SPIRV-Cross/spirv_msl.hpp | 66 |
13 files changed, 1916 insertions, 493 deletions
diff --git a/src/3rdparty/SPIRV-Cross/qt_attribution.json b/src/3rdparty/SPIRV-Cross/qt_attribution.json index 43847a0..e4a06c5 100644 --- a/src/3rdparty/SPIRV-Cross/qt_attribution.json +++ b/src/3rdparty/SPIRV-Cross/qt_attribution.json @@ -7,7 +7,7 @@ "QtUsage": "Shader code generation.", "Homepage": "https://github.com/KhronosGroup/SPIRV-Cross", - "Version": "ff1897ae0e1fc1e37c604933694477f335ca8e44", + "Version": "0b95cbdea394753137537e41d55e6795e5d14dac", "License": "Apache License 2.0", "LicenseId": "Apache-2.0", "LicenseFile": "LICENSE", diff --git a/src/3rdparty/SPIRV-Cross/spirv_common.hpp b/src/3rdparty/SPIRV-Cross/spirv_common.hpp index c1c6fc8..e6a29a1 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_common.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_common.hpp @@ -938,11 +938,6 @@ struct SPIRFunction : IVariant // Intentionally not a small vector, this one is rare, and std::function can be large. Vector<std::function<void()>> fixup_hooks_in; - // On function entry, make sure to copy a constant array into thread addr space to work around - // the case where we are passing a constant array by value to a function on backends which do not - // consider arrays value types. - SmallVector<ID> constant_arrays_needed_on_stack; - bool active = false; bool flush_undeclared = true; bool do_combined_parameters = true; diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp index c73ecdf..286b450 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross.cpp @@ -2729,6 +2729,12 @@ void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_ if (id == 0) return; + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. + auto itr = access_chain_children.find(id); + if (itr != end(access_chain_children)) + for (auto child_id : itr->second) + notify_variable_access(child_id, block); + if (id_is_phi_variable(id)) accessed_variables_to_block[id].insert(block); else if (id_is_potential_temporary(id)) @@ -2793,14 +2799,21 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 3) return false; + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. uint32_t ptr = args[2]; auto *var = compiler.maybe_get<SPIRVariable>(ptr); if (var) + { accessed_variables_to_block[var->self].insert(current_block->self); + access_chain_children[args[1]].insert(var->self); + } // args[2] might be another access chain we have to track use of. for (uint32_t i = 2; i < length; i++) + { notify_variable_access(args[i], current_block->self); + access_chain_children[args[1]].insert(args[i]); + } // Also keep track of the access chain pointer itself. // In exceptionally rare cases, we can end up with a case where @@ -2889,6 +2902,10 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 3) return false; + // Return value may be a temporary. + if (compiler.get_type(args[0]).basetype != SPIRType::Void) + notify_variable_access(args[1], current_block->self); + length -= 3; args += 3; @@ -2909,9 +2926,6 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // Might try to copy a Phi variable here. notify_variable_access(args[i], current_block->self); } - - // Return value may be a temporary. - notify_variable_access(args[1], current_block->self); break; } @@ -3290,6 +3304,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA DominatorBuilder builder(cfg); bool force_temporary = false; + bool used_in_header_hoisted_continue_block = false; // Figure out which block is dominating all accesses of those temporaries. auto &blocks = var.second; @@ -3304,10 +3319,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA // This is moot for complex loops however. auto &loop_header_block = get<SPIRBlock>(ir.continue_block_to_loop_header[block]); assert(loop_header_block.merge == SPIRBlock::MergeLoop); - - // Only relevant if the loop is not marked as complex. - if (!loop_header_block.complex_continue) - builder.add_block(loop_header_block.self); + builder.add_block(loop_header_block.self); + used_in_header_hoisted_continue_block = true; } } @@ -3332,11 +3345,22 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { // Exceptionally rare case. // We cannot declare temporaries of access chains (except on MSL perhaps with pointers). - // Rather than do that, we force a complex loop to make sure access chains are created and consumed - // in expected order. - auto &loop_header_block = get<SPIRBlock>(dominating_block); - assert(loop_header_block.merge == SPIRBlock::MergeLoop); - loop_header_block.complex_continue = true; + // Rather than do that, we force the indexing expressions to be declared in the right scope by + // tracking their usage to that end. There is no temporary to hoist. + // However, we still need to observe declaration order of the access chain. + + if (used_in_header_hoisted_continue_block) + { + // For this scenario, we used an access chain inside a continue block where we also registered an access to header block. + // This is a problem as we need to declare an access chain properly first with full definition. + // We cannot use temporaries for these expressions, + // so we must make sure the access chain is declared ahead of time. + // Force a complex for loop to deal with this. + // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option. + auto &loop_header_block = get<SPIRBlock>(dominating_block); + assert(loop_header_block.merge == SPIRBlock::MergeLoop); + loop_header_block.complex_continue = true; + } } else { @@ -3773,6 +3797,13 @@ void Compiler::analyze_image_and_sampler_usage() CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers); traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + + // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions + // down to main(). + // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions. + handler.dependency_hierarchy.clear(); + traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); + comparison_ids = move(handler.comparison_ids); need_subpass_input = handler.need_subpass_input; @@ -3888,6 +3919,14 @@ bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) return false; } +void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src) +{ + dependency_hierarchy[dst].insert(src); + // Propagate up any comparison state if we're loading from one such variable. + if (comparison_ids.count(src)) + comparison_ids.insert(dst); +} + bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) { if (length < 3) @@ -3900,7 +3939,7 @@ bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint for (uint32_t i = 0; i < length; i++) { auto &argument = func.arguments[i]; - dependency_hierarchy[argument.id].insert(arg[i]); + add_dependency(argument.id, arg[i]); } return true; @@ -3910,6 +3949,7 @@ void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids { // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids. comparison_ids.insert(id); + for (auto &dep_id : dependency_hierarchy[id]) add_hierarchy_to_comparison_ids(dep_id); } @@ -3925,7 +3965,8 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ { if (length < 3) return false; - dependency_hierarchy[args[1]].insert(args[2]); + + add_dependency(args[1], args[2]); // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs. // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord. @@ -3947,14 +3988,17 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ uint32_t result_type = args[0]; uint32_t result_id = args[1]; auto &type = compiler.get<SPIRType>(result_type); + + // If the underlying resource has been used for comparison then duplicate loads of that resource must be too. + // This image must be a depth image. + uint32_t image = args[2]; + uint32_t sampler = args[3]; + if (type.image.depth || dref_combined_samplers.count(result_id) != 0) { - // This image must be a depth image. - uint32_t image = args[2]; add_hierarchy_to_comparison_ids(image); // This sampler must be a SamplerComparisonState, and not a regular SamplerState. - uint32_t sampler = args[3]; add_hierarchy_to_comparison_ids(sampler); // Mark the OpSampledImage itself as being comparison state. diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp index 7385a6c..73d9085 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross.hpp @@ -888,6 +888,7 @@ protected: void add_hierarchy_to_comparison_ids(uint32_t ids); bool need_subpass_input = false; + void add_dependency(uint32_t dst, uint32_t src); }; void build_function_control_flow_graphs_and_analyze(); @@ -925,6 +926,8 @@ protected: std::unordered_map<uint32_t, std::unordered_set<uint32_t>> complete_write_variables_to_block; std::unordered_map<uint32_t, std::unordered_set<uint32_t>> partial_write_variables_to_block; std::unordered_set<uint32_t> access_chain_expressions; + // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. + std::unordered_map<uint32_t, std::unordered_set<uint32_t>> access_chain_children; const SPIRBlock *current_block = nullptr; }; diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp index f6e63b4..5d9e802 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.cpp @@ -569,6 +569,30 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX: options->msl.dynamic_offsets_buffer_index = value; break; + + case SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D: + options->msl.texture_1D_as_2D = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO: + options->msl.enable_base_index_zero = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS: + options->msl.ios_use_framebuffer_fetch_subpasses = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH: + options->msl.invariant_float_math = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY: + options->msl.emulate_cube_array = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING: + options->msl.enable_decoration_binding = value != 0; + break; #endif default: @@ -745,6 +769,25 @@ spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler c #endif } +spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, + spvc_hlsl_binding_flags flags) +{ +#if SPIRV_CROSS_C_API_HLSL + if (compiler->backend != SPVC_BACKEND_HLSL) + { + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &hlsl = *static_cast<CompilerHLSL *>(compiler->compiler.get()); + hlsl.set_resource_binding_flags(flags); + return SPVC_SUCCESS; +#else + compiler->context->report_error("HLSL function used on a non-HLSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler) { #if SPIRV_CROSS_C_API_MSL @@ -1639,6 +1682,11 @@ spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id) SPVC_END_SAFE_SCOPE(compiler->context, nullptr) } +spvc_type_id spvc_type_get_base_type_id(spvc_type type) +{ + return type->self; +} + static spvc_basetype convert_basetype(SPIRType::BaseType type) { // For now the enums match up. diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h index f950803..a3ad84b 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_c.h +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_c.h @@ -33,7 +33,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 19 +#define SPVC_C_API_VERSION_MINOR 21 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -466,6 +466,18 @@ typedef struct spvc_msl_sampler_ycbcr_conversion */ SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv); +/* Maps to C++ API. */ +typedef enum spvc_hlsl_binding_flag_bits +{ + SPVC_HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, + SPVC_HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, + SPVC_HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, + SPVC_HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, + SPVC_HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, + SPVC_HLSL_BINDING_AUTO_ALL = 0x7fffffff +} spvc_hlsl_binding_flag_bits; +typedef unsigned spvc_hlsl_binding_flags; + /* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ typedef enum spvc_compiler_option { @@ -527,6 +539,12 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT, SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; @@ -600,6 +618,9 @@ SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_c size_t remaps); SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler); +SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, + spvc_hlsl_binding_flags flags); + /* * MSL specifics. * Maps to C++ API. @@ -713,6 +734,12 @@ SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compile */ SPVC_PUBLIC_API spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id); +/* Pulls out SPIRType::self. This effectively gives the type ID without array or pointer qualifiers. + * This is necessary when reflecting decoration/name information on members of a struct, + * which are placed in the base type, not the qualified type. + * This is similar to spvc_reflected_resource::base_type_id. */ +SPVC_PUBLIC_API spvc_type_id spvc_type_get_base_type_id(spvc_type type); + SPVC_PUBLIC_API spvc_basetype spvc_type_get_basetype(spvc_type type); SPVC_PUBLIC_API unsigned spvc_type_get_bit_width(spvc_type type); SPVC_PUBLIC_API unsigned spvc_type_get_vector_size(spvc_type type); diff --git a/src/3rdparty/SPIRV-Cross/spirv_cross_error_handling.hpp b/src/3rdparty/SPIRV-Cross/spirv_cross_error_handling.hpp index e821c04..153b07d 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_cross_error_handling.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_cross_error_handling.hpp @@ -17,10 +17,12 @@ #ifndef SPIRV_CROSS_ERROR_HANDLING #define SPIRV_CROSS_ERROR_HANDLING -#include <stdexcept> #include <stdio.h> #include <stdlib.h> #include <string> +#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS +#include <stdexcept> +#endif #ifdef SPIRV_CROSS_NAMESPACE_OVERRIDE #define SPIRV_CROSS_NAMESPACE SPIRV_CROSS_NAMESPACE_OVERRIDE diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp index bcdcd5f..e992516 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.cpp @@ -324,6 +324,9 @@ void CompilerGLSL::reset() forwarded_temporaries.clear(); suppressed_usage_tracking.clear(); + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + flushed_phi_variables.clear(); + reset_name_caches(); ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) { @@ -502,6 +505,7 @@ string CompilerGLSL::compile() backend.allow_precision_qualifiers = true; backend.force_gl_in_out_block = true; backend.supports_extensions = true; + backend.use_array_constructor = true; // Scan the SPIR-V to find trivial uses of extensions. fixup_type_alias(); @@ -1332,7 +1336,8 @@ uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &f } bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, - uint32_t start_offset, uint32_t end_offset) + uint32_t *failed_validation_index, uint32_t start_offset, + uint32_t end_offset) { // This is very tricky and error prone, but try to be exhaustive and correct here. // SPIR-V doesn't directly say if we're using std430 or std140. @@ -1413,18 +1418,28 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin if (!packing_has_flexible_offset(packing)) { if (actual_offset != offset) // This cannot be the packing we're looking for. + { + if (failed_validation_index) + *failed_validation_index = i; return false; + } } else if ((actual_offset & (alignment - 1)) != 0) { // We still need to verify that alignment rules are observed, even if we have explicit offset. + if (failed_validation_index) + *failed_validation_index = i; return false; } // Verify array stride rules. if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i)) + { + if (failed_validation_index) + *failed_validation_index = i; return false; + } // Verify that sub-structs also follow packing rules. // We cannot use enhanced layouts on substructs, so they better be up to spec. @@ -1433,6 +1448,8 @@ bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackin if (!memb_type.pointer && !memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing)) { + if (failed_validation_index) + *failed_validation_index = i; return false; } } @@ -3394,11 +3411,19 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c) { // Handles Arrays and structures. string res; + + // Allow Metal to use the array<T> template to make arrays a value type + bool needs_trailing_tracket = false; if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && type.array.empty()) { res = type_to_glsl_constructor(type) + "{ "; } + else if (backend.use_initializer_list && backend.use_typed_initializer_list && !type.array.empty()) + { + res = type_to_glsl_constructor(type) + "({ "; + needs_trailing_tracket = true; + } else if (backend.use_initializer_list) { res = "{ "; @@ -3421,8 +3446,23 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c) } res += backend.use_initializer_list ? " }" : ")"; + if (needs_trailing_tracket) + res += ")"; + return res; } + else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0) + { + // Metal tessellation likes empty structs which are then constant expressions. + if (backend.supports_empty_struct) + return "{ }"; + else if (backend.use_typed_initializer_list) + return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }"); + else if (backend.use_initializer_list) + return "{ 0 }"; + else + return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)"); + } else if (c.columns() == 1) { return constant_expression_vector(c, 0); @@ -6586,6 +6626,36 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index) } } +void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType *type, + AccessChainFlags flags, bool & /*access_chain_is_arrayed*/, + uint32_t index) +{ + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + + expr += "["; + + // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier. + bool nonuniform_index = + has_decoration(index, DecorationNonUniformEXT) && + (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock)); + if (nonuniform_index) + { + expr += backend.nonuniform_qualifier; + expr += "("; + } + + if (index_is_literal) + expr += convert_to_string(index); + else + expr += to_expression(index, register_expression_read); + + if (nonuniform_index) + expr += ")"; + + expr += "]"; +} + string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta) { @@ -6637,27 +6707,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice bool dimension_flatten = false; const auto append_index = [&](uint32_t index) { - expr += "["; - - // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier. - bool nonuniform_index = - has_decoration(index, DecorationNonUniformEXT) && - (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock)); - if (nonuniform_index) - { - expr += backend.nonuniform_qualifier; - expr += "("; - } - - if (index_is_literal) - expr += convert_to_string(index); - else - expr += to_expression(index, register_expression_read); - - if (nonuniform_index) - expr += ")"; - - expr += "]"; + access_chain_internal_append_index(expr, base, type, flags, access_chain_is_arrayed, index); }; for (uint32_t i = 0; i < count; i++) @@ -6780,7 +6830,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (!pending_array_enclose) expr += "]"; } - else + // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. + // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. + else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) { append_index(index); } @@ -7502,23 +7554,29 @@ string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) { - if (var.allocate_temporary_copy) + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self)) { auto &type = get<SPIRType>(var.basetype); auto &flags = get_decoration_bitset(var.self); statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + flushed_phi_variables.insert(var.self); } } void CompilerGLSL::flush_variable_declaration(uint32_t id) { + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred auto *var = maybe_get<SPIRVariable>(id); if (var && var->deferred_declaration) { statement(variable_decl_function_local(*var), ";"); - emit_variable_temporary_copies(*var); var->deferred_declaration = false; } + if (var) + { + emit_variable_temporary_copies(*var); + } } bool CompilerGLSL::remove_duplicate_swizzle(string &op) @@ -8293,11 +8351,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) string constructor_op; if (backend.use_initializer_list && composite) { + bool needs_trailing_tracket = false; // Only use this path if we are building composites. // This path cannot be used for arithmetic. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)); + else if (backend.use_typed_initializer_list && !out_type.array.empty()) + { + // MSL path. Array constructor is baked into type here, do not use _constructor variant. + constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "("; + needs_trailing_tracket = true; + } constructor_op += "{ "; + if (type_is_empty(out_type) && !backend.supports_empty_struct) constructor_op += "0"; else if (splat) @@ -8305,6 +8371,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += " }"; + if (needs_trailing_tracket) + constructor_op += ")"; } else if (swizzle_splat && !composite) { @@ -9650,11 +9718,18 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + + auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", coord_expr), result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); break; } @@ -10700,8 +10775,10 @@ string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. // Opt for unsized as it's the more "correct" variant to use. - if (type.storage == StorageClassInput && (get_entry_point().model == ExecutionModelTessellationControl || - get_entry_point().model == ExecutionModelTessellationEvaluation)) + if (type.storage == StorageClassInput && + (get_entry_point().model == ExecutionModelTessellationControl || + get_entry_point().model == ExecutionModelTessellationEvaluation) && + index == uint32_t(type.array.size() - 1)) return ""; auto &size = type.array[index]; @@ -10870,7 +10947,7 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) { - if (type.array.size() > 1) + if (backend.use_array_constructor && type.array.size() > 1) { if (options.flatten_multidimensional_arrays) SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]()."); @@ -10881,8 +10958,11 @@ string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) } auto e = type_to_glsl(type); - for (uint32_t i = 0; i < type.array.size(); i++) - e += "[]"; + if (backend.use_array_constructor) + { + for (uint32_t i = 0; i < type.array.size(); i++) + e += "[]"; + } return e; } @@ -11121,6 +11201,11 @@ void CompilerGLSL::flatten_buffer_block(VariableID id) flattened_buffer_blocks.insert(id); } +bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const +{ + return false; // GLSL itself does not need to translate array builtin types to non-array builtin types +} + bool CompilerGLSL::check_atomic_image(uint32_t id) { auto &type = expression_type(id); @@ -11310,14 +11395,6 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) current_function = &func; auto &entry_block = get<SPIRBlock>(func.entry_block); - sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); - for (auto &array : func.constant_arrays_needed_on_stack) - { - auto &c = get<SPIRConstant>(array); - auto &type = get<SPIRType>(c.constant_type); - statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); - } - for (auto &v : func.local_variables) { auto &var = get<SPIRVariable>(v); @@ -12703,14 +12780,14 @@ void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t s auto new_expr = join("_", target_id, "_unrolled"); statement(variable_decl(type, new_expr, target_id), ";"); string array_expr; - if (type.array_size_literal.front()) + if (type.array_size_literal.back()) { - array_expr = convert_to_string(type.array.front()); - if (type.array.front() == 0) + array_expr = convert_to_string(type.array.back()); + if (type.array.back() == 0) SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); } else - array_expr = to_expression(type.array.front()); + array_expr = to_expression(type.array.back()); // The array size might be a specialization constant, so use a for-loop instead. statement("for (int i = 0; i < int(", array_expr, "); i++)"); diff --git a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp index 6f59bd8..3326a24 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_glsl.hpp @@ -273,6 +273,8 @@ protected: virtual std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, bool packed_type, bool row_major); + virtual bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const; + StringStream<> buffer; template <typename T> @@ -338,11 +340,12 @@ protected: Options options; - std::string type_to_array_glsl(const SPIRType &type); + virtual std::string type_to_array_glsl( + const SPIRType &type); // Allow Metal to use the array<T> template to make arrays a value type std::string to_array_size(const SPIRType &type, uint32_t index); uint32_t to_array_size_literal(const SPIRType &type, uint32_t index) const; uint32_t to_array_size_literal(const SPIRType &type) const; - std::string variable_decl(const SPIRVariable &variable); + virtual std::string variable_decl(const SPIRVariable &variable); // Threadgroup arrays can't have a wrapper type std::string variable_decl_function_local(SPIRVariable &variable); void add_local_variable_name(uint32_t id); @@ -414,6 +417,7 @@ protected: bool native_pointers = false; bool support_small_type_sampling_result = false; bool support_case_fallthrough = true; + bool use_array_constructor = false; } backend; void emit_struct(SPIRType &type); @@ -488,6 +492,9 @@ protected: SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, bool suppress_usage_tracking = false); + void access_chain_internal_append_index(std::string &expr, uint32_t base, const SPIRType *type, + AccessChainFlags flags, bool &access_chain_is_arrayed, uint32_t index); + std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta); @@ -551,7 +558,8 @@ protected: virtual void emit_block_hints(const SPIRBlock &block); virtual std::string to_initializer_expression(const SPIRVariable &var); - bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset = 0, + bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, + uint32_t *failed_index = nullptr, uint32_t start_offset = 0, uint32_t end_offset = ~(0u)); std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout); @@ -586,6 +594,9 @@ protected: std::unordered_set<uint32_t> emitted_functions; + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + std::unordered_set<uint32_t> flushed_phi_variables; + std::unordered_set<uint32_t> flattened_buffer_blocks; std::unordered_set<uint32_t> flattened_structs; diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp index 4d4e276..ae7a4d5 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.cpp @@ -1867,11 +1867,6 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) { if (type.array.empty()) { - if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset)) - set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); - else - SPIRV_CROSS_THROW("cbuffer cannot be expressed with either HLSL packing layout or packoffset."); - // Flatten the top-level struct so we can use packoffset, // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. flattened_structs.insert(var.self); @@ -1892,6 +1887,16 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) if (buffer_name.empty()) buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self); + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index)) + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + else + { + SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ", + failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } + block_names.insert(buffer_name); // Save for post-reflection later. @@ -1927,13 +1932,18 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) SPIRV_CROSS_THROW( "Need ConstantBuffer<T> to use arrays of UBOs, but this is only supported in SM 5.1."); - // ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect. - if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer)) - SPIRV_CROSS_THROW("HLSL ConstantBuffer<T> cannot be expressed with normal HLSL packing rules."); - add_resource_name(type.self); add_resource_name(var.self); + // ConstantBuffer<T> does not support packoffset, so it is unuseable unless everything aligns as we expect. + uint32_t failed_index = 0; + if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index)) + { + SPIRV_CROSS_THROW(join("HLSL ConstantBuffer<T> ID ", var.self, " (name: ", to_name(type.self), + "), member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with normal HLSL packing rules.")); + } + emit_struct(get<SPIRType>(type.self)); statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); @@ -1953,11 +1963,16 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) { auto &type = get<SPIRType>(var.basetype); - if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, layout.start, layout.end)) + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start, + layout.end)) set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); else - SPIRV_CROSS_THROW( - "root constant cbuffer cannot be expressed with either HLSL packing layout or packoffset."); + { + SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")", + ", member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } flattened_structs.insert(var.self); type.member_name_cache.clear(); @@ -1965,7 +1980,7 @@ void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) auto &memb = ir.meta[type.self].members; statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self), - to_resource_register('b', layout.binding, layout.space)); + to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space)); begin_scope(); // Index of the next field in the generated root constant constant buffer @@ -2928,21 +2943,31 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) const auto &type = get<SPIRType>(var.basetype); char space = '\0'; + HLSLBindingFlags resource_flags = 0; + switch (type.basetype) { case SPIRType::SampledImage: space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; break; case SPIRType::Image: if (type.image.sampled == 2 && type.image.dim != DimSubpassData) + { space = 'u'; // UAV + resource_flags = HLSL_BINDING_AUTO_UAV_BIT; + } else + { space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + } break; case SPIRType::Sampler: space = 's'; + resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT; break; case SPIRType::Struct: @@ -2955,18 +2980,26 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) Bitset flags = ir.get_buffer_block_flags(var); bool is_readonly = flags.get(DecorationNonWritable); space = is_readonly ? 't' : 'u'; // UAV + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; } else if (has_decoration(type.self, DecorationBlock)) + { space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_CBV_BIT; + } } else if (storage == StorageClassPushConstant) + { space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT; + } else if (storage == StorageClassStorageBuffer) { // UAV or SRV depending on readonly flag. Bitset flags = ir.get_buffer_block_flags(var); bool is_readonly = flags.get(DecorationNonWritable); space = is_readonly ? 't' : 'u'; + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; } break; @@ -2978,7 +3011,7 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) if (!space) return ""; - return to_resource_register(space, get_decoration(var.self, DecorationBinding), + return to_resource_register(resource_flags, space, get_decoration(var.self, DecorationBinding), get_decoration(var.self, DecorationDescriptorSet)); } @@ -2988,16 +3021,21 @@ string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var) if (!has_decoration(var.self, DecorationBinding)) return ""; - return to_resource_register('s', get_decoration(var.self, DecorationBinding), + return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding), get_decoration(var.self, DecorationDescriptorSet)); } -string CompilerHLSL::to_resource_register(char space, uint32_t binding, uint32_t space_set) +string CompilerHLSL::to_resource_register(uint32_t flags, char space, uint32_t binding, uint32_t space_set) { - if (hlsl_options.shader_model >= 51) - return join(" : register(", space, binding, ", space", space_set, ")"); + if ((flags & resource_binding_flags) == 0) + { + if (hlsl_options.shader_model >= 51) + return join(" : register(", space, binding, ", space", space_set, ")"); + else + return join(" : register(", space, binding, ")"); + } else - return join(" : register(", space, binding, ")"); + return ""; } void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) @@ -4562,6 +4600,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); break; } @@ -4877,6 +4916,11 @@ VariableID CompilerHLSL::remap_num_workgroups_builtin() return variable_id; } +void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags) +{ + resource_binding_flags = flags; +} + void CompilerHLSL::validate_shader_model() { // Check for nonuniform qualifier. diff --git a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp index eb968f0..b0db688 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_hlsl.hpp @@ -41,6 +41,32 @@ struct RootConstants uint32_t space; }; +// For finer control, decorations may be removed from specific resources instead with unset_decoration(). +enum HLSLBindingFlagBits +{ + // Push constant (root constant) resources will be declared as CBVs (b-space) without a register() declaration. + // A register will be automatically assigned by the D3D compiler, but must therefore be reflected in D3D-land. + // Push constants do not normally have a DecorationBinding set, but if they do, this can be used to ignore it. + HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, + + // cbuffer resources will be declared as CBVs (b-space) without a register() declaration. + // A register will be automatically assigned, but must be reflected in D3D-land. + HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, + + // All SRVs (t-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, + + // All UAVs (u-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, + + // All samplers (s-space) will be declared without a register() declaration. + HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, + + // No resources will be declared with register(). + HLSL_BINDING_AUTO_ALL = 0x7fffffff +}; +using HLSLBindingFlags = uint32_t; + class CompilerHLSL : public CompilerGLSL { public: @@ -116,6 +142,9 @@ public: // so the calling application should declare explicit bindings on this ID before calling compile(). VariableID remap_num_workgroups_builtin(); + // Controls how resource bindings are declared in the output HLSL. + void set_resource_binding_flags(HLSLBindingFlags flags); + private: std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; std::string image_type_hlsl(const SPIRType &type, uint32_t id); @@ -149,7 +178,7 @@ private: std::string to_sampler_expression(uint32_t id); std::string to_resource_binding(const SPIRVariable &var); std::string to_resource_binding_sampler(const SPIRVariable &var); - std::string to_resource_register(char space, uint32_t binding, uint32_t set); + std::string to_resource_register(HLSLBindingFlags flags, char space, uint32_t binding, uint32_t set); void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; void emit_access_chain(const Instruction &instruction); void emit_load(const Instruction &instruction); @@ -221,6 +250,7 @@ private: std::string to_semantic(uint32_t location, spv::ExecutionModel em, spv::StorageClass sc); uint32_t num_workgroups_builtin = 0; + HLSLBindingFlags resource_binding_flags = 0; // Custom root constant layout, which should be emitted // when translating push constant ranges. diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp index d7cb138..b16e1e8 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_msl.cpp +++ b/src/3rdparty/SPIRV-Cross/spirv_msl.cpp @@ -27,6 +27,7 @@ using namespace std; static const uint32_t k_unknown_location = ~0u; static const uint32_t k_unknown_component = ~0u; +static const char *force_inline = "static inline __attribute__((always_inline))"; CompilerMSL::CompilerMSL(std::vector<uint32_t> spirv_) : CompilerGLSL(move(spirv_)) @@ -121,6 +122,11 @@ void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t com fragment_output_components[location] = components; } +bool CompilerMSL::builtin_translates_to_nonarray(spv::BuiltIn builtin) const +{ + return (builtin == BuiltInSampleMask); +} + void CompilerMSL::build_implicit_builtins() { bool need_sample_pos = active_input_builtins.get(BuiltInSamplePosition); @@ -157,8 +163,10 @@ void CompilerMSL::build_implicit_builtins() if (var.storage != StorageClassInput || !ir.meta[var.self].decoration.builtin) return; + // Use Metal's native frame-buffer fetch API for subpass inputs. BuiltIn builtin = ir.meta[var.self].decoration.builtin_type; - if (need_subpass_input && builtin == BuiltInFragCoord) + if (need_subpass_input && (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses) && + builtin == BuiltInFragCoord) { builtin_frag_coord_id = var.self; has_frag_coord = true; @@ -226,17 +234,19 @@ void CompilerMSL::build_implicit_builtins() if (need_multiview) { - if (builtin == BuiltInInstanceIndex) + switch (builtin) { + case BuiltInInstanceIndex: // The view index here is derived from the instance index. builtin_instance_idx_id = var.self; has_instance_idx = true; - } - - if (builtin == BuiltInViewIndex) - { + break; + case BuiltInViewIndex: builtin_view_idx_id = var.self; has_view_idx = true; + break; + default: + break; } } @@ -248,7 +258,9 @@ void CompilerMSL::build_implicit_builtins() workgroup_id_type = var.basetype; }); - if (!has_frag_coord && need_subpass_input) + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (!has_frag_coord && (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses) && + need_subpass_input) { uint32_t offset = ir.increase_bound_by(3); uint32_t type_id = offset; @@ -579,6 +591,35 @@ void CompilerMSL::build_implicit_builtins() } } +// Checks if the specified builtin variable (e.g. gl_InstanceIndex) is marked as active. +// If not, it marks it as active and forces a recompilation. +// This might be used when the optimization of inactive builtins was too optimistic (e.g. when "spvOut" is emitted). +void CompilerMSL::ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin) +{ + Bitset *active_builtins = nullptr; + switch (storage) + { + case StorageClassInput: + active_builtins = &active_input_builtins; + break; + + case StorageClassOutput: + active_builtins = &active_output_builtins; + break; + + default: + break; + } + + // At this point, the specified builtin variable must have already been declared in the entry point. + // If not, mark as active and force recompile. + if (active_builtins != nullptr && !active_builtins->get(builtin)) + { + active_builtins->set(builtin); + force_recompile(); + } +} + void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id) { Bitset *active_builtins = nullptr; @@ -685,6 +726,8 @@ std::string CompilerMSL::get_tess_factor_struct_name() void CompilerMSL::emit_entry_point_declarations() { // FIXME: Get test coverage here ... + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries + declare_complex_constant_arrays(); // Emit constexpr samplers here. for (auto &samp : constexpr_samplers_by_id) @@ -841,8 +884,10 @@ void CompilerMSL::emit_entry_point_declarations() SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet."); else { + use_builtin_array = true; statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name, type_to_array_glsl(type), " ="); + uint32_t dim = uint32_t(type.array.size()); uint32_t j = 0; for (SmallVector<uint32_t> indices(type.array.size()); @@ -870,6 +915,7 @@ void CompilerMSL::emit_entry_point_declarations() } end_scope_decl(); statement_no_indent(""); + use_builtin_array = false; } } else @@ -886,8 +932,10 @@ void CompilerMSL::emit_entry_point_declarations() { const auto &var = get<SPIRVariable>(array_id); const auto &type = get_variable_data_type(var); + const auto &buffer_type = get_variable_element_type(var); string name = to_name(array_id); - statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] ="); + statement(get_argument_address_space(var), " ", type_to_glsl(buffer_type), "* ", to_restrict(array_id), name, + "[] ="); begin_scope(); for (uint32_t i = 0; i < to_array_size_literal(type); ++i) statement(name, "_", i, ","); @@ -925,13 +973,14 @@ string CompilerMSL::compile() backend.native_row_major_matrix = false; backend.unsized_array_supported = false; backend.can_declare_arrays_inline = false; - backend.can_return_array = false; + backend.can_return_array = true; // <-- Allow Metal to use the array<T> template backend.allow_truncated_access_chain = true; - backend.array_is_value_type = false; + backend.array_is_value_type = true; // <-- Allow Metal to use the array<T> template to make arrays a value type backend.comparison_image_samples_scalar = true; backend.native_pointers = true; backend.nonuniform_qualifier = ""; backend.support_small_type_sampling_result = true; + backend.supports_empty_struct = true; capture_output_to_buffer = msl_options.capture_output_to_buffer; is_rasterization_disabled = msl_options.disable_rasterization || capture_output_to_buffer; @@ -1025,6 +1074,7 @@ string CompilerMSL::compile() buffer.reset(); emit_header(); + emit_custom_templates(); emit_specialization_constants_and_structs(); emit_resources(); emit_custom_functions(); @@ -1153,8 +1203,10 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: if (global_var_ids.find(base_id) != global_var_ids.end()) added_arg_ids.insert(base_id); + // Use Metal's native frame-buffer fetch API for subpass inputs. auto &type = get<SPIRType>(ops[0]); - if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + (!msl_options.is_ios() || !msl_options.ios_use_framebuffer_fetch_subpasses)) { // Implicitly reads gl_FragCoord. assert(builtin_frag_coord_id != 0); @@ -1202,6 +1254,20 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: break; } + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + // When using the pointer, we need to know which variable it is actually loaded from. + uint32_t base_id = ops[2]; + auto *var = maybe_get_backing_variable(base_id); + if (var && atomic_image_vars.count(var->self)) + { + if (global_var_ids.find(base_id) != global_var_ids.end()) + added_arg_ids.insert(base_id); + } + break; + } + default: break; } @@ -1545,10 +1611,13 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage if (is_builtin) set_name(var.self, builtin_to_glsl(builtin, StorageClassFunction)); - entry_func.add_local_variable(var.self); - - // We need to declare the variable early and at entry-point scope. - vars_needing_early_declaration.push_back(var.self); + // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped. + if (!strip_array) + { + entry_func.add_local_variable(var.self); + // We need to declare the variable early and at entry-point scope. + vars_needing_early_declaration.push_back(var.self); + } for (uint32_t i = 0; i < elem_cnt; i++) { @@ -1618,6 +1687,7 @@ void CompilerMSL::add_composite_variable_to_interface_block(StorageClass storage set_extended_member_decoration(ib_type.self, ib_mbr_idx, SPIRVCrossDecorationInterfaceOrigID, var.self); + // Only flatten/unflatten IO composites for non-tessellation cases where arrays are not stripped. if (!strip_array) { switch (storage) @@ -2125,60 +2195,37 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id) { // Only needed for tessellation shaders. + // Need to redirect interface indices back to variables themselves. + // For structs, each member of the struct need a separate instance. if (get_execution_model() != ExecutionModelTessellationControl && !(get_execution_model() == ExecutionModelTessellationEvaluation && storage == StorageClassInput)) return; - bool in_array = false; - for (uint32_t i = 0; i < ir.meta[ib_type_id].members.size(); i++) + auto mbr_cnt = uint32_t(ir.meta[ib_type_id].members.size()); + for (uint32_t i = 0; i < mbr_cnt; i++) { uint32_t var_id = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceOrigID); if (!var_id) continue; auto &var = get<SPIRVariable>(var_id); - // Unfortunately, all this complexity is needed to handle flattened structs and/or - // arrays. - if (storage == StorageClassInput) + auto &type = get_variable_element_type(var); + if (storage == StorageClassInput && type.basetype == SPIRType::Struct) { - auto &type = get_variable_element_type(var); - if (is_array(type) || is_matrix(type)) - { - if (in_array) - continue; - in_array = true; - set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - else - { - if (type.basetype == SPIRType::Struct) - { - uint32_t mbr_idx = - get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); - auto &mbr_type = get<SPIRType>(type.member_types[mbr_idx]); + uint32_t mbr_idx = get_extended_member_decoration(ib_type_id, i, SPIRVCrossDecorationInterfaceMemberIndex); - if (is_array(mbr_type) || is_matrix(mbr_type)) - { - if (in_array) - continue; - in_array = true; - set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - else - { - in_array = false; - set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - } - else - { - in_array = false; - set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); - } - } + // Only set the lowest InterfaceMemberIndex for each variable member. + // IB struct members will be emitted in-order w.r.t. interface member index. + if (!has_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex)) + set_extended_member_decoration(var_id, mbr_idx, SPIRVCrossDecorationInterfaceMemberIndex, i); } else - set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + { + // Only set the lowest InterfaceMemberIndex for each variable. + // IB struct members will be emitted in-order w.r.t. interface member index. + if (!has_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex)) + set_extended_decoration(var_id, SPIRVCrossDecorationInterfaceMemberIndex, i); + } } } @@ -2314,11 +2361,21 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) { // The first member of the indirect buffer is always the number of vertices // to draw. - statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, " = ", - output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), " - ", - to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", - to_expression(builtin_vertex_idx_id), " - ", to_expression(builtin_base_vertex_id), - "];"); + // We zero-base the InstanceID & VertexID variables for HLSL emulation elsewhere, so don't do it twice + if (msl_options.enable_base_index_zero) + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[", to_expression(builtin_instance_idx_id), + " * spvIndirectParams[0] + ", to_expression(builtin_vertex_idx_id), "];"); + } + else + { + statement("device ", to_name(ir.default_entry_point), "_", ib_var_ref, "& ", ib_var_ref, + " = ", output_buffer_var_name, "[(", to_expression(builtin_instance_idx_id), + " - ", to_expression(builtin_base_instance_id), ") * spvIndirectParams[0] + ", + to_expression(builtin_vertex_idx_id), " - ", + to_expression(builtin_base_vertex_id), "];"); + } } }); break; @@ -3108,6 +3165,14 @@ void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_exp } } +static bool expression_ends_with(const string &expr_str, const std::string &ending) +{ + if (expr_str.length() >= ending.length()) + return (expr_str.compare(expr_str.length() - ending.length(), ending.length(), ending) == 0); + else + return false; +} + // Converts the format of the current expression from packed to unpacked, // by wrapping the expression in a constructor of the appropriate type. // Also, handle special physical ID remapping scenarios, similar to emit_store_statement(). @@ -3128,9 +3193,17 @@ string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type ".xyz", }; - // std140 array cases for vectors. - if (physical_type && is_vector(*physical_type) && is_array(*physical_type) && physical_type->vecsize > type.vecsize) + if (physical_type && is_vector(*physical_type) && is_array(*physical_type) && + physical_type->vecsize > type.vecsize && !expression_ends_with(expr_str, swizzle_lut[type.vecsize - 1])) + { + // std140 array cases for vectors. + assert(type.vecsize >= 1 && type.vecsize <= 3); + return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1]; + } + else if (physical_type && is_matrix(*physical_type) && is_vector(type) && + physical_type->vecsize > type.vecsize) { + // Extract column from padded matrix. assert(type.vecsize >= 1 && type.vecsize <= 3); return enclose_expression(expr_str) + swizzle_lut[type.vecsize - 1]; } @@ -3172,7 +3245,9 @@ string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type return unpack_expr; } else + { return join(type_to_glsl(type), "(", expr_str, ")"); + } } // Emits the file header info @@ -3181,6 +3256,11 @@ void CompilerMSL::emit_header() // This particular line can be overridden during compilation, so make it a flag and not a pragma line. if (suppress_missing_prototypes) statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + + // Disable warning about missing braces for array<T> template to make arrays a value type + if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0) + statement("#pragma clang diagnostic ignored \"-Wmissing-braces\""); + for (auto &pragma : pragma_lines) statement(pragma); @@ -3218,7 +3298,63 @@ void CompilerMSL::add_typedef_line(const string &line) force_recompile(); } +// Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared +void CompilerMSL::emit_custom_templates() +{ + for (const auto &spv_func : spv_function_implementations) + { + switch (spv_func) + { + case SPVFuncImplUnsafeArray: + statement("template<typename T, size_t Num>"); + statement("struct spvUnsafeArray"); + begin_scope(); + statement("T elements[Num ? Num : 1];"); + statement(""); + statement("thread T& operator [] (size_t pos) thread"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const thread T& operator [] (size_t pos) const thread"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("device T& operator [] (size_t pos) device"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const device T& operator [] (size_t pos) const device"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("constexpr const constant T& operator [] (size_t pos) const constant"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement(""); + statement("threadgroup T& operator [] (size_t pos) threadgroup"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + statement("constexpr const threadgroup T& operator [] (size_t pos) const threadgroup"); + begin_scope(); + statement("return elements[pos];"); + end_scope(); + end_scope_decl(); + statement(""); + break; + + default: + break; + } + } +} + // Emits any needed custom function bodies. +// Metal helper functions must be static force-inline, i.e. static inline __attribute__((always_inline)) +// otherwise they will cause problems when linked together in a single Metallib. void CompilerMSL::emit_custom_functions() { for (uint32_t i = SPVFuncImplArrayCopyMultidimMax; i >= 2; i--) @@ -3258,7 +3394,7 @@ void CompilerMSL::emit_custom_functions() spv_function_implementations.insert(SPVFuncImplGetSwizzle); } - for (auto &spv_func : spv_function_implementations) + for (const auto &spv_func : spv_function_implementations) { switch (spv_func) { @@ -3390,32 +3526,159 @@ void CompilerMSL::emit_custom_functions() end_scope(); statement(""); } - break; } + // Support for Metal 2.1's new texture_buffer type. case SPVFuncImplTexelBufferCoords: { - string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width); - statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); - statement("inline uint2 spvTexelBufferCoord(uint tc)"); + if (msl_options.texel_buffer_texture_width > 0) + { + string tex_width_str = convert_to_string(msl_options.texel_buffer_texture_width); + statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); + statement(force_inline); + statement("uint2 spvTexelBufferCoord(uint tc)"); + begin_scope(); + statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");")); + end_scope(); + statement(""); + } + else + { + statement("// Returns 2D texture coords corresponding to 1D texel buffer coords"); + statement( + "#define spvTexelBufferCoord(tc, tex) uint2((tc) % (tex).get_width(), (tc) / (tex).get_width())"); + statement(""); + } + break; + } + + // Emulate texture2D atomic operations + case SPVFuncImplImage2DAtomicCoords: + { + statement("// Returns buffer coords corresponding to 2D texture coords for emulating 2D texture atomics"); + statement("#define spvImage2DAtomicCoord(tc, tex) (((tex).get_width() * (tc).x) + (tc).y)"); + statement(""); + break; + } + + // "fadd" intrinsic support + case SPVFuncImplFAdd: + statement("template<typename T>"); + statement("T spvFAdd(T l, T r)"); begin_scope(); - statement(join("return uint2(tc % ", tex_width_str, ", tc / ", tex_width_str, ");")); + statement("return fma(T(1), l, r);"); + end_scope(); + statement(""); + break; + + // "fmul' intrinsic support + case SPVFuncImplFMul: + statement("template<typename T>"); + statement("T spvFMul(T l, T r)"); + begin_scope(); + statement("return fma(l, r, T(0));"); + end_scope(); + statement(""); + + statement("template<typename T, int Cols, int Rows>"); + statement("vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)"); + begin_scope(); + statement("vec<T, Cols> res = vec<T, Cols>(0);"); + statement("for (uint i = Rows; i > 0; --i)"); + begin_scope(); + statement("vec<T, Cols> tmp(0);"); + statement("for (uint j = 0; j < Cols; ++j)"); + begin_scope(); + statement("tmp[j] = m[j][i - 1];"); + end_scope(); + statement("res = fma(tmp, vec<T, Cols>(v[i - 1]), res);"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + + statement("template<typename T, int Cols, int Rows>"); + statement("vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)"); + begin_scope(); + statement("vec<T, Rows> res = vec<T, Rows>(0);"); + statement("for (uint i = Cols; i > 0; --i)"); + begin_scope(); + statement("res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + + statement("template<typename T, int LCols, int LRows, int RCols, int RRows>"); + statement( + "matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)"); + begin_scope(); + statement("matrix<T, RCols, LRows> res;"); + statement("for (uint i = 0; i < RCols; i++)"); + begin_scope(); + statement("vec<T, RCols> tmp(0);"); + statement("for (uint j = 0; j < LCols; j++)"); + begin_scope(); + statement("tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);"); + end_scope(); + statement("res[i] = tmp;"); + end_scope(); + statement("return res;"); + end_scope(); + statement(""); + break; + + // Emulate texturecube_array with texture2d_array for iOS where this type is not available + case SPVFuncImplCubemapTo2DArrayFace: + statement(force_inline); + statement("float3 spvCubemapTo2DArrayFace(float3 P)"); + begin_scope(); + statement("float3 Coords = abs(P.xyz);"); + statement("float CubeFace = 0;"); + statement("float ProjectionAxis = 0;"); + statement("float u = 0;"); + statement("float v = 0;"); + statement("if (Coords.x >= Coords.y && Coords.x >= Coords.z)"); + begin_scope(); + statement("CubeFace = P.x >= 0 ? 0 : 1;"); + statement("ProjectionAxis = Coords.x;"); + statement("u = P.x >= 0 ? -P.z : P.z;"); + statement("v = -P.y;"); + end_scope(); + statement("else if (Coords.y >= Coords.x && Coords.y >= Coords.z)"); + begin_scope(); + statement("CubeFace = P.y >= 0 ? 2 : 3;"); + statement("ProjectionAxis = Coords.y;"); + statement("u = P.x;"); + statement("v = P.y >= 0 ? P.z : -P.z;"); + end_scope(); + statement("else"); + begin_scope(); + statement("CubeFace = P.z >= 0 ? 4 : 5;"); + statement("ProjectionAxis = Coords.z;"); + statement("u = P.z >= 0 ? P.x : -P.x;"); + statement("v = -P.y;"); + end_scope(); + statement("u = 0.5 * (u/ProjectionAxis + 1);"); + statement("v = 0.5 * (v/ProjectionAxis + 1);"); + statement("return float3(u, v, CubeFace);"); end_scope(); statement(""); break; - } case SPVFuncImplInverse4x4: statement("// Returns the determinant of a 2x2 matrix."); - statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)"); + statement(force_inline); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); statement(""); statement("// Returns the determinant of a 3x3 matrix."); - statement("inline float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + statement(force_inline); + statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " "float c2, float c3)"); begin_scope(); statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * spvDet2x2(a2, a3, " @@ -3424,7 +3687,8 @@ void CompilerMSL::emit_custom_functions() statement(""); statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("inline float4x4 spvInverse4x4(float4x4 m)"); + statement(force_inline); + statement("float4x4 spvInverse4x4(float4x4 m)"); begin_scope(); statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -3480,7 +3744,8 @@ void CompilerMSL::emit_custom_functions() if (spv_function_implementations.count(SPVFuncImplInverse4x4) == 0) { statement("// Returns the determinant of a 2x2 matrix."); - statement("inline float spvDet2x2(float a1, float a2, float b1, float b2)"); + statement(force_inline); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); begin_scope(); statement("return a1 * b2 - b1 * a2;"); end_scope(); @@ -3489,7 +3754,8 @@ void CompilerMSL::emit_custom_functions() statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("inline float3x3 spvInverse3x3(float3x3 m)"); + statement(force_inline); + statement("float3x3 spvInverse3x3(float3x3 m)"); begin_scope(); statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -3519,7 +3785,8 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplInverse2x2: statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("inline float2x2 spvInverse2x2(float2x2 m)"); + statement(force_inline); + statement("float2x2 spvInverse2x2(float2x2 m)"); begin_scope(); statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); statement_no_indent(""); @@ -4530,6 +4797,8 @@ void CompilerMSL::declare_undefined_values() void CompilerMSL::declare_constant_arrays() { + bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1; + // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to // global constants directly, so we are able to use constants as variable expressions. bool emitted = false; @@ -4539,7 +4808,11 @@ void CompilerMSL::declare_constant_arrays() return; auto &type = this->get<SPIRType>(c.constant_type); - if (!type.array.empty()) + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries. + // FIXME: However, hoisting constants to main() means we need to pass down constant arrays to leaf functions if they are used there. + // If there are multiple functions in the module, drop this case to avoid breaking use cases which do not need to + // link into Metal libraries. This is hacky. + if (!type.array.empty() && (!fully_inlined || is_scalar(type) || is_vector(type))) { auto name = to_name(c.self); statement("constant ", variable_decl(type, name), " = ", constant_expression(c), ";"); @@ -4551,6 +4824,36 @@ void CompilerMSL::declare_constant_arrays() statement(""); } +// Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries +void CompilerMSL::declare_complex_constant_arrays() +{ + // If we do not have a fully inlined module, we did not opt in to + // declaring constant arrays of complex types. See CompilerMSL::declare_constant_arrays(). + bool fully_inlined = ir.ids_for_type[TypeFunction].size() == 1; + if (!fully_inlined) + return; + + // MSL cannot declare arrays inline (except when declaring a variable), so we must move them out to + // global constants directly, so we are able to use constants as variable expressions. + bool emitted = false; + + ir.for_each_typed_id<SPIRConstant>([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; + + auto &type = this->get<SPIRType>(c.constant_type); + if (!type.array.empty() && !(is_scalar(type) || is_vector(type))) + { + auto name = to_name(c.self); + statement("", variable_decl(type, name), " = ", constant_expression(c), ";"); + emitted = true; + } + }); + + if (emitted) + statement(""); +} + void CompilerMSL::emit_resources() { declare_constant_arrays(); @@ -4716,15 +5019,301 @@ void CompilerMSL::emit_binary_unord_op(uint32_t result_type, uint32_t result_id, inherit_expression_dependencies(result_id, op1); } +bool CompilerMSL::emit_tessellation_io_load(uint32_t result_type_id, uint32_t id, uint32_t ptr) +{ + auto &ptr_type = expression_type(ptr); + auto &result_type = get<SPIRType>(result_type_id); + if (ptr_type.storage != StorageClassInput && ptr_type.storage != StorageClassOutput) + return false; + if (ptr_type.storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationEvaluation) + return false; + + bool flat_data_type = is_matrix(result_type) || is_array(result_type) || result_type.basetype == SPIRType::Struct; + if (!flat_data_type) + return false; + + if (has_decoration(ptr, DecorationPatch)) + return false; + + // Now, we must unflatten a composite type and take care of interleaving array access with gl_in/gl_out. + // Lots of painful code duplication since we *really* should not unroll these kinds of loads in entry point fixup + // unless we're forced to do this when the code is emitting inoptimal OpLoads. + string expr; + + uint32_t interface_index = get_extended_decoration(ptr, SPIRVCrossDecorationInterfaceMemberIndex); + auto *var = maybe_get_backing_variable(ptr); + bool ptr_is_io_variable = ir.ids[ptr].get_type() == TypeVariable; + + const auto &iface_type = expression_type(stage_in_ptr_var_id); + + if (result_type.array.size() > 2) + { + SPIRV_CROSS_THROW("Cannot load tessellation IO variables with more than 2 dimensions."); + } + else if (result_type.array.size() == 2) + { + if (!ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading an array-of-array must be loaded directly from an IO variable."); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + if (result_type.basetype == SPIRType::Struct || is_matrix(result_type)) + SPIRV_CROSS_THROW("Cannot load array-of-array of composite type in tessellation IO."); + + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, 1); + uint32_t base_interface_index = interface_index; + + auto &sub_type = get<SPIRType>(result_type.parent_type); + + for (uint32_t i = 0; i < num_control_points; i++) + { + expr += type_to_glsl(sub_type) + "({ "; + interface_index = base_interface_index; + uint32_t array_size = to_array_size_literal(result_type, 0); + for (uint32_t j = 0; j < array_size; j++, interface_index++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + + if (j + 1 < array_size) + expr += ", "; + } + expr += " })"; + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else if (result_type.basetype == SPIRType::Struct) + { + bool is_array_of_struct = is_array(result_type); + if (is_array_of_struct && !ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading array of struct from IO variable must come directly from IO variable."); + + uint32_t num_control_points = 1; + if (is_array_of_struct) + { + num_control_points = to_array_size_literal(result_type, 0); + expr += type_to_glsl(result_type) + "({ "; + } + + auto &struct_type = is_array_of_struct ? get<SPIRType>(result_type.parent_type) : result_type; + assert(struct_type.array.empty()); + + for (uint32_t i = 0; i < num_control_points; i++) + { + expr += type_to_glsl(struct_type) + "{ "; + for (uint32_t j = 0; j < uint32_t(struct_type.member_types.size()); j++) + { + // The base interface index is stored per variable for structs. + if (var) + { + interface_index = + get_extended_member_decoration(var->self, j, SPIRVCrossDecorationInterfaceMemberIndex); + } + + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + const auto &mbr_type = get<SPIRType>(struct_type.member_types[j]); + if (is_matrix(mbr_type)) + { + expr += type_to_glsl(mbr_type) + "("; + for (uint32_t k = 0; k < mbr_type.columns; k++, interface_index++) + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + + if (k + 1 < mbr_type.columns) + expr += ", "; + } + expr += ")"; + } + else if (is_array(mbr_type)) + { + expr += type_to_glsl(mbr_type) + "({ "; + uint32_t array_size = to_array_size_literal(mbr_type, 0); + for (uint32_t k = 0; k < array_size; k++, interface_index++) + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal( + stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + + if (k + 1 < array_size) + expr += ", "; + } + expr += " })"; + } + else + { + if (is_array_of_struct) + { + const uint32_t indices[2] = { i, interface_index }; + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, + &meta); + } + else + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + } + + if (j + 1 < struct_type.member_types.size()) + expr += ", "; + } + expr += " }"; + if (i + 1 < num_control_points) + expr += ", "; + } + if (is_array_of_struct) + expr += " })"; + } + else if (is_matrix(result_type)) + { + bool is_array_of_matrix = is_array(result_type); + if (is_array_of_matrix && !ptr_is_io_variable) + SPIRV_CROSS_THROW("Loading array of matrix from IO variable must come directly from IO variable."); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + if (is_array_of_matrix) + { + // Loading a matrix from each control point. + uint32_t base_interface_index = interface_index; + uint32_t num_control_points = to_array_size_literal(result_type, 0); + expr += type_to_glsl(result_type) + "({ "; + + auto &matrix_type = get_variable_element_type(get<SPIRVariable>(ptr)); + + for (uint32_t i = 0; i < num_control_points; i++) + { + interface_index = base_interface_index; + expr += type_to_glsl(matrix_type) + "("; + for (uint32_t j = 0; j < result_type.columns; j++, interface_index++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += + access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + if (j + 1 < result_type.columns) + expr += ", "; + } + expr += ")"; + if (i + 1 < num_control_points) + expr += ", "; + } + + expr += " })"; + } + else + { + expr += type_to_glsl(result_type) + "("; + for (uint32_t i = 0; i < result_type.columns; i++, interface_index++) + { + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (i + 1 < result_type.columns) + expr += ", "; + } + expr += ")"; + } + } + else if (ptr_is_io_variable) + { + assert(is_array(result_type)); + assert(result_type.array.size() == 1); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + // We're loading an array directly from a global variable. + // This means we're loading one member from each control point. + expr += type_to_glsl(result_type) + "({ "; + uint32_t num_control_points = to_array_size_literal(result_type, 0); + + for (uint32_t i = 0; i < num_control_points; i++) + { + const uint32_t indices[2] = { i, interface_index }; + + AccessChainMeta meta; + expr += access_chain_internal(stage_in_ptr_var_id, indices, 2, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_PTR_CHAIN_BIT, &meta); + + if (i + 1 < num_control_points) + expr += ", "; + } + expr += " })"; + } + else + { + // We're loading an array from a concrete control point. + assert(is_array(result_type)); + assert(result_type.array.size() == 1); + if (interface_index == uint32_t(-1)) + SPIRV_CROSS_THROW("Interface index is unknown. Cannot continue."); + + expr += type_to_glsl(result_type) + "({ "; + uint32_t array_size = to_array_size_literal(result_type, 0); + for (uint32_t i = 0; i < array_size; i++, interface_index++) + { + expr += to_expression(ptr) + "." + to_member_name(iface_type, interface_index); + if (i + 1 < array_size) + expr += ", "; + } + expr += " })"; + } + + emit_op(result_type_id, id, expr, false); + register_read(id, ptr, false); + return true; +} + bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t length) { // If this is a per-vertex output, remap it to the I/O array buffer. - auto *var = maybe_get<SPIRVariable>(ops[2]); + + // Any object which did not go through IO flattening shenanigans will go there instead. + // We will unflatten on-demand instead as needed, but not all possible cases can be supported, especially with arrays. + + auto *var = maybe_get_backing_variable(ops[2]); + bool patch = false; + bool flat_data = false; + bool ptr_is_chain = false; + + if (var) + { + patch = has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var)); + + // Should match strip_array in add_interface_block. + flat_data = var->storage == StorageClassInput || + (var->storage == StorageClassOutput && get_execution_model() == ExecutionModelTessellationControl); + + // We might have a chained access chain, where + // we first take the access chain to the control point, and then we chain into a member or something similar. + // In this case, we need to skip gl_in/gl_out remapping. + ptr_is_chain = var->self != ID(ops[2]); + } + BuiltIn bi_type = BuiltIn(get_decoration(ops[2], DecorationBuiltIn)); - if (var && - (var->storage == StorageClassInput || - (get_execution_model() == ExecutionModelTessellationControl && var->storage == StorageClassOutput)) && - !(has_decoration(ops[2], DecorationPatch) || is_patch_block(get_variable_data_type(*var))) && + if (var && flat_data && !patch && (!is_builtin_variable(*var) || bi_type == BuiltInPosition || bi_type == BuiltInPointSize || bi_type == BuiltInClipDistance || bi_type == BuiltInCullDistance || get_variable_data_type(*var).basetype == SPIRType::Struct)) @@ -4740,26 +5329,35 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l new_uint_type.width = 32; set<SPIRType>(type_id, new_uint_type); - indices.push_back(ops[3]); + uint32_t first_non_array_index = ptr_is_chain ? 3 : 4; + VariableID stage_var_id = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + VariableID ptr = ptr_is_chain ? VariableID(ops[2]) : stage_var_id; + if (!ptr_is_chain) + { + // Index into gl_in/gl_out with first array index. + indices.push_back(ops[3]); + } + + auto &result_ptr_type = get<SPIRType>(ops[0]); uint32_t const_mbr_id = next_id++; - uint32_t index = get_extended_decoration(ops[2], SPIRVCrossDecorationInterfaceMemberIndex); - uint32_t ptr = var->storage == StorageClassInput ? stage_in_ptr_var_id : stage_out_ptr_var_id; + uint32_t index = get_extended_decoration(var->self, SPIRVCrossDecorationInterfaceMemberIndex); if (var->storage == StorageClassInput || has_decoration(get_variable_element_type(*var).self, DecorationBlock)) { - uint32_t i = 4; + uint32_t i = first_non_array_index; auto *type = &get_variable_element_type(*var); - if (index == uint32_t(-1) && length >= 5) + if (index == uint32_t(-1) && length >= (first_non_array_index + 1)) { // Maybe this is a struct type in the input class, in which case // we put it as a decoration on the corresponding member. - index = get_extended_member_decoration(ops[2], get_constant(ops[4]).scalar(), + index = get_extended_member_decoration(var->self, get_constant(ops[first_non_array_index]).scalar(), SPIRVCrossDecorationInterfaceMemberIndex); assert(index != uint32_t(-1)); i++; - type = &get<SPIRType>(type->member_types[get_constant(ops[4]).scalar()]); + type = &get<SPIRType>(type->member_types[get_constant(ops[first_non_array_index]).scalar()]); } - // In this case, we flattened structures and arrays, so now we have to + + // In this case, we're poking into flattened structures and arrays, so now we have to // combine the following indices. If we encounter a non-constant index, // we're hosed. for (; i < length; ++i) @@ -4767,92 +5365,35 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l if (!is_array(*type) && !is_matrix(*type) && type->basetype != SPIRType::Struct) break; - auto &c = get_constant(ops[i]); - index += c.scalar(); + auto *c = maybe_get<SPIRConstant>(ops[i]); + if (!c || c->specialization) + SPIRV_CROSS_THROW("Trying to dynamically index into an array interface variable in tessellation. " + "This is currently unsupported."); + + // We're in flattened space, so just increment the member index into IO block. + // We can only do this once in the current implementation, so either: + // Struct, Matrix or 1-dimensional array for a control point. + index += c->scalar(); + if (type->parent_type) type = &get<SPIRType>(type->parent_type); else if (type->basetype == SPIRType::Struct) - type = &get<SPIRType>(type->member_types[c.scalar()]); + type = &get<SPIRType>(type->member_types[c->scalar()]); } - // If the access chain terminates at a composite type, the composite - // itself might be copied. In that case, we must unflatten it. - if (is_matrix(*type) || is_array(*type) || type->basetype == SPIRType::Struct) - { - std::string temp_name = join(to_name(var->self), "_", ops[1]); - statement(variable_decl(*type, temp_name, var->self), ";"); - // Set up the initializer for this temporary variable. - indices.push_back(const_mbr_id); - if (type->basetype == SPIRType::Struct) - { - for (uint32_t j = 0; j < type->member_types.size(); j++) - { - index = get_extended_member_decoration(ops[2], j, SPIRVCrossDecorationInterfaceMemberIndex); - const auto &mbr_type = get<SPIRType>(type->member_types[j]); - if (is_matrix(mbr_type)) - { - for (uint32_t k = 0; k < mbr_type.columns; k++, index++) - { - set<SPIRConstant>(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, - true); - statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";"); - } - } - else if (is_array(mbr_type)) - { - for (uint32_t k = 0; k < to_array_size_literal(mbr_type, 0); k++, index++) - { - set<SPIRConstant>(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, - true); - statement(temp_name, ".", to_member_name(*type, j), "[", k, "] = ", e, ";"); - } - } - else - { - set<SPIRConstant>(const_mbr_id, type_id, index, false); - auto e = - access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, true); - statement(temp_name, ".", to_member_name(*type, j), " = ", e, ";"); - } - } - } - else if (is_matrix(*type)) - { - for (uint32_t j = 0; j < type->columns; j++, index++) - { - set<SPIRConstant>(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); - statement(temp_name, "[", j, "] = ", e, ";"); - } - } - else // Must be an array - { - assert(is_array(*type)); - for (uint32_t j = 0; j < to_array_size_literal(*type, 0); j++, index++) - { - set<SPIRConstant>(const_mbr_id, type_id, index, false); - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); - statement(temp_name, "[", j, "] = ", e, ";"); - } - } - // This needs to be a variable instead of an expression so we don't - // try to dereference this as a variable pointer. - set<SPIRVariable>(ops[1], ops[0], var->storage); - ir.meta[ops[1]] = ir.meta[ops[2]]; - set_name(ops[1], temp_name); - if (has_decoration(var->self, DecorationInvariant)) - set_decoration(ops[1], DecorationInvariant); - for (uint32_t j = 2; j < length; j++) - inherit_expression_dependencies(ops[1], ops[j]); - return true; + if (is_matrix(result_ptr_type) || is_array(result_ptr_type) || result_ptr_type.basetype == SPIRType::Struct) + { + // We're not going to emit the actual member name, we let any further OpLoad take care of that. + // Tag the access chain with the member index we're referencing. + set_extended_decoration(ops[1], SPIRVCrossDecorationInterfaceMemberIndex, index); } else { + // Access the appropriate member of gl_in/gl_out. set<SPIRConstant>(const_mbr_id, type_id, index, false); indices.push_back(const_mbr_id); + // Append any straggling access chain indices. if (i < length) indices.insert(indices.end(), ops + i, ops + length); } @@ -4868,7 +5409,39 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // We use the pointer to the base of the input/output array here, // so this is always a pointer chain. - auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), get<SPIRType>(ops[0]), &meta, true); + string e; + + if (!ptr_is_chain) + { + // This is the start of an access chain, use ptr_chain to index into control point array. + e = access_chain(ptr, indices.data(), uint32_t(indices.size()), result_ptr_type, &meta, true); + } + else + { + // If we're accessing a struct, we need to use member indices which are based on the IO block, + // not actual struct type, so we have to use a split access chain here where + // first path resolves the control point index, i.e. gl_in[index], and second half deals with + // looking up flattened member name. + + // However, it is possible that we partially accessed a struct, + // by taking pointer to member inside the control-point array. + // For this case, we fall back to a natural access chain since we have already dealt with remapping struct members. + // One way to check this here is if we have 2 implied read expressions. + // First one is the gl_in/gl_out struct itself, then an index into that array. + // If we have traversed further, we use a normal access chain formulation. + auto *ptr_expr = maybe_get<SPIRExpression>(ptr); + if (ptr_expr && ptr_expr->implied_read_expressions.size() == 2) + { + e = join(to_expression(ptr), + access_chain_internal(stage_var_id, indices.data(), uint32_t(indices.size()), + ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta)); + } + else + { + e = access_chain_internal(ptr, indices.data(), uint32_t(indices.size()), 0, &meta); + } + } + auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2])); expr.loaded_from = var->self; expr.need_transpose = meta.need_transpose; @@ -4882,12 +5455,24 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l if (meta.storage_is_invariant) set_decoration(ops[1], DecorationInvariant); + // If we have some expression dependencies in our access chain, this access chain is technically a forwarded + // temporary which could be subject to invalidation. + // Need to assume we're forwarded while calling inherit_expression_depdendencies. + forwarded_temporaries.insert(ops[1]); + // The access chain itself is never forced to a temporary, but its dependencies might. + suppressed_usage_tracking.insert(ops[1]); + for (uint32_t i = 2; i < length; i++) { inherit_expression_dependencies(ops[1], ops[i]); add_implied_read_expression(expr, ops[i]); } + // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, + // we're not forwarded after all. + if (expr.expression_dependencies.empty()) + forwarded_temporaries.erase(ops[1]); + return true; } @@ -4966,6 +5551,24 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) switch (opcode) { + case OpLoad: + { + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + if (is_tessellation_shader()) + { + if (!emit_tessellation_io_load(ops[0], id, ptr)) + CompilerGLSL::emit_instruction(instruction); + } + else + { + // Sample mask input for Metal is not an array + if (BuiltIn(get_decoration(ptr, DecorationBuiltIn)) == BuiltInSampleMask) + set_decoration(id, DecorationBuiltIn, BuiltInSampleMask); + CompilerGLSL::emit_instruction(instruction); + } + break; + } // Comparisons case OpIEqual: @@ -5117,6 +5720,20 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) MSL_BFOP(fmod); break; + case OpFMul: + if (msl_options.invariant_float_math) + MSL_BFOP(spvFMul); + else + MSL_BOP(*); + break; + + case OpFAdd: + if (msl_options.invariant_float_math) + MSL_BFOP(spvFAdd); + else + MSL_BOP(+); + break; + // Atomics case OpAtomicExchange: { @@ -5242,6 +5859,41 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + if (var && atomic_image_vars.count(var->self)) + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + std::string coord = to_expression(ops[3]); + auto &type = expression_type(ops[2]); + if (type.image.dim == Dim2D) + { + coord = join("spvImage2DAtomicCoord(", coord, ", ", to_expression(ops[2]), ")"); + } + + auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), "_atomic[", coord, "]"), result_type, true); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + } + else + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = + set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + } + break; + } + case OpImageWrite: { uint32_t img_id = ops[0]; @@ -5337,7 +5989,11 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) expr += ", " + img_exp + ".get_depth(" + lod + ")"; if (img_is_array) + { expr += ", " + img_exp + ".get_array_size()"; + if (img_dim == DimCube && msl_options.emulate_cube_array) + expr += " / 6"; + } expr += ")"; @@ -5419,9 +6075,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } - case OpImageTexelPointer: - SPIRV_CROSS_THROW("MSL does not support atomic operations on images or texel buffers."); - // Casting case OpQuantizeToF16: { @@ -5514,6 +6167,85 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + if (!msl_options.invariant_float_math) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + // If the matrix needs transpose, just flip the multiply order. + auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]); + if (e && e->need_transpose) + { + e->need_transpose = false; + string expr; + + if (opcode == OpMatrixTimesVector) + { + expr = join("spvFMulVectorMatrix(", to_enclosed_unpacked_expression(ops[3]), ", ", + to_unpacked_row_major_matrix_expression(ops[2]), ")"); + } + else + { + expr = join("spvFMulMatrixVector(", to_unpacked_row_major_matrix_expression(ops[3]), ", ", + to_enclosed_unpacked_expression(ops[2]), ")"); + } + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + emit_op(ops[0], ops[1], expr, forward); + e->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + { + if (opcode == OpMatrixTimesVector) + MSL_BFOP(spvFMulMatrixVector); + else + MSL_BFOP(spvFMulVectorMatrix); + } + break; + } + + case OpMatrixTimesMatrix: + { + if (!msl_options.invariant_float_math) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + auto *a = maybe_get<SPIRExpression>(ops[2]); + auto *b = maybe_get<SPIRExpression>(ops[3]); + + // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. + // a^T * b^T = (b * a)^T. + if (a && b && a->need_transpose && b->need_transpose) + { + a->need_transpose = false; + b->need_transpose = false; + + auto expr = + join("spvFMulMatrixMatrix(", enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), ", ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), ")"); + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + b->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + MSL_BFOP(spvFMulMatrixMatrix); + + break; + } + case OpIAddCarry: case OpISubBorrow: { @@ -5654,6 +6386,34 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) previous_instruction_opcode = opcode; } +void CompilerMSL::emit_texture_op(const Instruction &i) +{ + if (msl_options.is_ios() && msl_options.ios_use_framebuffer_fetch_subpasses) + { + auto *ops = stream(i); + + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + + auto &type = expression_type(img); + auto &imgtype = get<SPIRType>(type.self); + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (imgtype.image.dim == DimSubpassData) + { + // Subpass inputs cannot be invalidated, + // so just forward the expression directly. + string expr = to_expression(img); + emit_op(result_type_id, id, expr, true); + return; + } + } + + // Fallback to default implementation + CompilerGLSL::emit_texture_op(i); +} + void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem) { if (get_execution_model() != ExecutionModelGLCompute && get_execution_model() != ExecutionModelTessellationControl) @@ -5682,8 +6442,11 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin if (get_execution_model() == ExecutionModelTessellationControl || (mem_sem & (MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask))) mem_flags += "mem_flags::mem_device"; - if (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | - MemorySemanticsAtomicCounterMemoryMask)) + + // Fix tessellation patch function processing + if (get_execution_model() == ExecutionModelTessellationControl || + (mem_sem & (MemorySemanticsSubgroupMemoryMask | MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsAtomicCounterMemoryMask))) { if (!mem_flags.empty()) mem_flags += " | "; @@ -5753,59 +6516,76 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass lhs_storage, StorageClass rhs_storage) { - // Assignment from an array initializer is fine. - auto &type = expression_type(rhs_id); - auto *var = maybe_get_backing_variable(rhs_id); + // Allow Metal to use the array<T> template to make arrays a value type. + // This, however, cannot be used for threadgroup address specifiers, so consider the custom array copy as fallback. + bool lhs_thread = (lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction || + lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate); + bool rhs_thread = (rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction || + rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate); - // Unfortunately, we cannot template on address space in MSL, - // so explicit address space redirection it is ... - bool is_constant = false; - if (ir.ids[rhs_id].get_type() == TypeConstant) + // If threadgroup storage qualifiers are *not* used: + // Avoid spvCopy* wrapper functions; Otherwise, spvUnsafeArray<> template cannot be used with that storage qualifier. + if (lhs_thread && rhs_thread && !use_builtin_array) { - is_constant = true; + statement(lhs, " = ", to_expression(rhs_id), ";"); } - else if (var && var->remapped_variable && var->statically_assigned && - ir.ids[var->static_expression].get_type() == TypeConstant) + else { - is_constant = true; - } + // Assignment from an array initializer is fine. + auto &type = expression_type(rhs_id); + auto *var = maybe_get_backing_variable(rhs_id); - // For the case where we have OpLoad triggering an array copy, - // we cannot easily detect this case ahead of time since it's - // context dependent. We might have to force a recompile here - // if this is the only use of array copies in our shader. - if (type.array.size() > 1) - { - if (type.array.size() > SPVFuncImplArrayCopyMultidimMax) - SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); - auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size()); - add_spv_func_and_recompile(func); - } - else - add_spv_func_and_recompile(SPVFuncImplArrayCopy); - - bool lhs_thread = lhs_storage == StorageClassOutput || lhs_storage == StorageClassFunction || - lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate; - bool rhs_thread = rhs_storage == StorageClassInput || rhs_storage == StorageClassFunction || - rhs_storage == StorageClassGeneric || rhs_storage == StorageClassPrivate; - - const char *tag = nullptr; - if (lhs_thread && is_constant) - tag = "FromConstantToStack"; - else if (lhs_storage == StorageClassWorkgroup && is_constant) - tag = "FromConstantToThreadGroup"; - else if (lhs_thread && rhs_thread) - tag = "FromStackToStack"; - else if (lhs_storage == StorageClassWorkgroup && rhs_thread) - tag = "FromStackToThreadGroup"; - else if (lhs_thread && rhs_storage == StorageClassWorkgroup) - tag = "FromThreadGroupToStack"; - else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup) - tag = "FromThreadGroupToThreadGroup"; - else - SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); + // Unfortunately, we cannot template on address space in MSL, + // so explicit address space redirection it is ... + bool is_constant = false; + if (ir.ids[rhs_id].get_type() == TypeConstant) + { + is_constant = true; + } + else if (var && var->remapped_variable && var->statically_assigned && + ir.ids[var->static_expression].get_type() == TypeConstant) + { + is_constant = true; + } + + // For the case where we have OpLoad triggering an array copy, + // we cannot easily detect this case ahead of time since it's + // context dependent. We might have to force a recompile here + // if this is the only use of array copies in our shader. + if (type.array.size() > 1) + { + if (type.array.size() > SPVFuncImplArrayCopyMultidimMax) + SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); + auto func = static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type.array.size()); + add_spv_func_and_recompile(func); + } + else + add_spv_func_and_recompile(SPVFuncImplArrayCopy); + + const char *tag = nullptr; + if (lhs_thread && is_constant) + tag = "FromConstantToStack"; + else if (lhs_storage == StorageClassWorkgroup && is_constant) + tag = "FromConstantToThreadGroup"; + else if (lhs_thread && rhs_thread) + tag = "FromStackToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_thread) + tag = "FromStackToThreadGroup"; + else if (lhs_thread && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToStack"; + else if (lhs_storage == StorageClassWorkgroup && rhs_storage == StorageClassWorkgroup) + tag = "FromThreadGroupToThreadGroup"; + else + SPIRV_CROSS_THROW("Unknown storage class used for copying arrays."); - statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); + // Pass internal array of spvUnsafeArray<> into wrapper functions + if (lhs_thread) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ".elements, ", to_expression(rhs_id), ");"); + else if (rhs_thread) + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ".elements);"); + else + statement("spvArrayCopy", tag, type.array.size(), "(", lhs, ", ", to_expression(rhs_id), ");"); + } } // Since MSL does not allow arrays to be copied via simple variable assignment, @@ -5860,7 +6640,18 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, auto *var = maybe_get_backing_variable(obj); if (!var) SPIRV_CROSS_THROW("No backing variable for atomic operation."); - exp += get_argument_address_space(*var); + + // Emulate texture2D atomic operations + const auto &res_type = get<SPIRType>(var->basetype); + if (res_type.storage == StorageClassUniformConstant && res_type.basetype == SPIRType::Image) + { + exp += "device"; + } + else + { + exp += get_argument_address_space(*var); + } + exp += " atomic_"; exp += type_to_glsl(type); exp += "*)"; @@ -6230,38 +7021,21 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) add_function_overload(func); local_variable_names = resource_names; + string decl; processing_entry_point = func.self == ir.default_entry_point; - string decl = processing_entry_point ? "" : "inline "; + // Metal helper functions must be static force-inline otherwise they will cause problems when linked together in a single Metallib. + if (!processing_entry_point) + statement(force_inline); auto &type = get<SPIRType>(func.return_type); - if (type.array.empty()) - { - decl += func_type_decl(type); - } - else - { - // We cannot return arrays in MSL, so "return" through an out variable. - decl += "void"; - } - + decl += func_type_decl(type); decl += " "; decl += to_name(func.self); decl += "("; - if (!type.array.empty()) - { - // Fake arrays returns by writing to an out array instead. - decl += "thread "; - decl += type_to_glsl(type); - decl += " (&SPIRV_Cross_return_value)"; - decl += type_to_array_glsl(type); - if (!func.arguments.empty()) - decl += ", "; - } - if (processing_entry_point) { if (msl_options.argument_buffers) @@ -6596,6 +7370,14 @@ string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bo else if (sampling_type_needs_f32_conversion(coord_type)) tex_coords = convert_to_f32(tex_coords, 1); + if (msl_options.texture_1D_as_2D) + { + if (is_fetch) + tex_coords = "uint2(" + tex_coords + ", 0)"; + else + tex_coords = "float2(" + tex_coords + ", 0.5)"; + } + alt_coord_component = 1; break; @@ -6610,14 +7392,27 @@ string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bo else { // Metal texel buffer textures are 2D, so convert 1D coord to 2D. + // Support for Metal 2.1's new texture_buffer type. if (is_fetch) - tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + { + if (msl_options.texel_buffer_texture_width > 0) + { + tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ")"; + } + else + { + tex_coords = "spvTexelBufferCoord(" + round_fp_tex_coords(tex_coords, coord_is_fp) + ", " + + to_expression(img) + ")"; + } + } } alt_coord_component = 1; break; case DimSubpassData: + // If we're using Metal's native frame-buffer fetch API for subpass inputs, + // this path will not be hit. if (imgtype.image.ms) tex_coords = "uint2(gl_FragCoord.xy)"; else @@ -6703,28 +7498,46 @@ string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bo if (!farg_str.empty()) farg_str += ", "; - farg_str += tex_coords; - // If fetch from cube, add face explicitly - if (is_cube_fetch) + if (imgtype.image.dim == DimCube && imgtype.image.arrayed && msl_options.emulate_cube_array) { - // Special case for cube arrays, face and layer are packed in one dimension. - if (imgtype.image.arrayed) - farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") % 6u"; - else - farg_str += ", uint(" + round_fp_tex_coords(to_extract_component_expression(coord, 2), coord_is_fp) + ")"; - } + farg_str += "spvCubemapTo2DArrayFace(" + tex_coords + ").xy"; - // If array, use alt coord - if (imgtype.image.arrayed) - { - // Special case for cube arrays, face and layer are packed in one dimension. - if (imgtype.image.dim == DimCube && is_fetch) - farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") / 6u"; + if (is_cube_fetch) + farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ")"; else - farg_str += ", uint(" + + farg_str += ", uint(spvCubemapTo2DArrayFace(" + tex_coords + ").z) + (uint(" + round_fp_tex_coords(to_extract_component_expression(coord, alt_coord_component), coord_is_fp) + - ")"; + ") * 6u)"; + + add_spv_func_and_recompile(SPVFuncImplCubemapTo2DArrayFace); + } + else + { + farg_str += tex_coords; + + // If fetch from cube, add face explicitly + if (is_cube_fetch) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.arrayed) + farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") % 6u"; + else + farg_str += + ", uint(" + round_fp_tex_coords(to_extract_component_expression(coord, 2), coord_is_fp) + ")"; + } + + // If array, use alt coord + if (imgtype.image.arrayed) + { + // Special case for cube arrays, face and layer are packed in one dimension. + if (imgtype.image.dim == DimCube && is_fetch) + farg_str += ", uint(" + to_extract_component_expression(coord, 2) + ") / 6u"; + else + farg_str += + ", uint(" + + round_fp_tex_coords(to_extract_component_expression(coord, alt_coord_component), coord_is_fp) + ")"; + } } // Depth compare reference value @@ -6787,14 +7600,14 @@ string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bo // LOD Options // Metal does not support LOD for 1D textures. - if (bias && imgtype.image.dim != Dim1D) + if (bias && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) { forward = forward && should_forward(bias); farg_str += ", bias(" + to_expression(bias) + ")"; } // Metal does not support LOD for 1D textures. - if (lod && imgtype.image.dim != Dim1D) + if (lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) { forward = forward && should_forward(lod); if (is_fetch) @@ -6806,8 +7619,8 @@ string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bo farg_str += ", level(" + to_expression(lod) + ")"; } } - else if (is_fetch && !lod && imgtype.image.dim != Dim1D && imgtype.image.dim != DimBuffer && !imgtype.image.ms && - imgtype.image.sampled != 2) + else if (is_fetch && !lod && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D) && + imgtype.image.dim != DimBuffer && !imgtype.image.ms && imgtype.image.sampled != 2) { // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. // Check for sampled type as well, because is_fetch is also used for OpImageRead in MSL. @@ -6815,7 +7628,7 @@ string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bo } // Metal does not support LOD for 1D textures. - if ((grad_x || grad_y) && imgtype.image.dim != Dim1D) + if ((grad_x || grad_y) && (imgtype.image.dim != Dim1D || msl_options.texture_1D_as_2D)) { forward = forward && should_forward(grad_x); forward = forward && should_forward(grad_y); @@ -6829,7 +7642,10 @@ string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bo grad_opt = "3d"; break; case DimCube: - grad_opt = "cube"; + if (imgtype.image.arrayed && msl_options.emulate_cube_array) + grad_opt = "2d"; + else + grad_opt = "cube"; break; default: grad_opt = "unsupported_gradient_dimension"; @@ -7164,38 +7980,16 @@ string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_ if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler) arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">("); - auto *c = maybe_get<SPIRConstant>(id); - if (c && !get<SPIRType>(c->constant_type).array.empty()) - { - // If we are passing a constant array directly to a function for some reason, - // the callee will expect an argument in thread const address space - // (since we can only bind to arrays with references in MSL). - // To resolve this, we must emit a copy in this address space. - // This kind of code gen should be rare enough that performance is not a real concern. - // Inline the SPIR-V to avoid this kind of suboptimal codegen. - // - // We risk calling this inside a continue block (invalid code), - // so just create a thread local copy in the current function. - arg_str = join("_", id, "_array_copy"); - auto &constants = current_function->constant_arrays_needed_on_stack; - auto itr = find(begin(constants), end(constants), ID(id)); - if (itr == end(constants)) - { - force_recompile(); - constants.push_back(id); - } - } - else - arg_str += CompilerGLSL::to_func_call_arg(arg, id); + arg_str += CompilerGLSL::to_func_call_arg(arg, id); + + // Need to check the base variable in case we need to apply a qualified alias. + uint32_t var_id = 0; + auto *var = maybe_get<SPIRVariable>(id); + if (var) + var_id = var->basevariable; if (!arg_is_dynamic_img_sampler) { - // Need to check the base variable in case we need to apply a qualified alias. - uint32_t var_id = 0; - auto *var = maybe_get<SPIRVariable>(id); - if (var) - var_id = var->basevariable; - auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id); if (type.basetype == SPIRType::SampledImage) { @@ -7287,6 +8081,13 @@ string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_ arg_str += ")"; } + // Emulate texture2D atomic operations + auto *backing_var = maybe_get_backing_variable(var_id); + if (backing_var && atomic_image_vars.count(backing_var->self)) + { + arg_str += ", " + to_expression(var_id) + "_atomic"; + } + return arg_str; } @@ -7435,6 +8236,7 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ // If this member is packed, mark it as so. string pack_pfx; + // Allow Metal to use the array<T> template to make arrays a value type uint32_t orig_id = 0; if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)) orig_id = get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID); @@ -7446,6 +8248,17 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ SPIRType row_major_physical_type; const SPIRType *declared_type = &physical_type; + // If a struct is being declared with physical layout, + // do not use array<T> wrappers. + // This avoids a lot of complicated cases with packed vectors and matrices, + // and generally we cannot copy full arrays in and out of buffers into Function + // address space. + // Array of resources should also be declared as builtin arrays. + if (has_member_decoration(type.self, index, DecorationOffset)) + use_builtin_array = true; + else if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) + use_builtin_array = true; + if (member_is_packed_physical_type(type, index)) { // If we're packing a matrix, output an appropriate typedef @@ -7498,11 +8311,17 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_ if (physical_type.basetype != SPIRType::Image && physical_type.basetype != SPIRType::Sampler && physical_type.basetype != SPIRType::SampledImage) { + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) + use_builtin_array = true; array_type = type_to_array_glsl(physical_type); } - return join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index), - member_attribute_qualifier(type, index), array_type, ";"); + auto result = join(pack_pfx, type_to_glsl(*declared_type, orig_id), " ", qualifier, to_member_name(type, index), + member_attribute_qualifier(type, index), array_type, ";"); + + use_builtin_array = false; + return result; } // Emit a structure member, padding and packing to maintain the correct memeber alignments. @@ -7516,7 +8335,10 @@ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_ statement("char _m", index, "_pad", "[", pad_len, "];"); } + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; statement(to_struct_member(type, member_type_id, index, qualifier)); + builtin_declaration = false; } void CompilerMSL::emit_struct_padding_target(const SPIRType &type) @@ -8071,6 +8893,8 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) if (!ep_args.empty()) ep_args += ", "; + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; ep_args += builtin_type_decl(bi_type, var_id) + " " + to_expression(var_id); ep_args += " [[" + builtin_qualifier(bi_type); if (bi_type == BuiltInSampleMask && get_entry_point().flags.get(ExecutionModePostDepthCoverage)) @@ -8082,6 +8906,7 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) ep_args += ", post_depth_coverage"; } ep_args += "]]"; + builtin_declaration = false; } } @@ -8105,12 +8930,12 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args) for (auto &var : active_builtins) var.first->basetype = ensure_correct_builtin_type(var.first->basetype, var.second); - // Vertex and instance index built-ins - if (needs_vertex_idx_arg) - ep_args += built_in_func_arg(BuiltInVertexIndex, !ep_args.empty()); + // Handle HLSL-style 0-based vertex/instance index. + if (needs_base_vertex_arg == TriState::Yes) + ep_args += built_in_func_arg(BuiltInBaseVertex, !ep_args.empty()); - if (needs_instance_idx_arg) - ep_args += built_in_func_arg(BuiltInInstanceIndex, !ep_args.empty()); + if (needs_base_instance_arg == TriState::Yes) + ep_args += built_in_func_arg(BuiltInBaseInstance, !ep_args.empty()); if (capture_output_to_buffer) { @@ -8255,6 +9080,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) SPIRType::BaseType basetype; uint32_t index; uint32_t plane; + uint32_t secondary_index; }; SmallVector<Resource> resources; @@ -8289,6 +9115,13 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) } } + // Emulate texture2D atomic operations + uint32_t secondary_index = 0; + if (atomic_image_vars.count(var.self)) + { + secondary_index = get_metal_resource_index(var, SPIRType::AtomicCounter, 0); + } + if (type.basetype == SPIRType::SampledImage) { add_resource_name(var_id); @@ -8299,20 +9132,20 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) for (uint32_t i = 0; i < plane_count; i++) resources.push_back({ &var, to_name(var_id), SPIRType::Image, - get_metal_resource_index(var, SPIRType::Image, i), i }); + get_metal_resource_index(var, SPIRType::Image, i), i, secondary_index }); if (type.image.dim != DimBuffer && !constexpr_sampler) { resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler, - get_metal_resource_index(var, SPIRType::Sampler), 0 }); + get_metal_resource_index(var, SPIRType::Sampler), 0, 0 }); } } else if (!constexpr_sampler) { // constexpr samplers are not declared as resources. add_resource_name(var_id); - resources.push_back( - { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 }); + resources.push_back({ &var, to_name(var_id), type.basetype, + get_metal_resource_index(var, type.basetype), 0, secondary_index }); } } }); @@ -8348,6 +9181,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (array_size == 0) SPIRV_CROSS_THROW("Unsized arrays of buffers are not supported in MSL."); + // Allow Metal to use the array<T> template to make arrays a value type + use_builtin_array = true; buffer_arrays.push_back(var_id); for (uint32_t i = 0; i < array_size; ++i) { @@ -8360,6 +9195,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += ", raster_order_group(0)"; ep_args += "]]"; } + use_builtin_array = false; } else { @@ -8381,16 +9217,38 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += " [[sampler(" + convert_to_string(r.index) + ")]]"; break; case SPIRType::Image: + { if (!ep_args.empty()) ep_args += ", "; - ep_args += image_type_glsl(type, var_id) + " " + r.name; - if (r.plane > 0) - ep_args += join(plane_name_suffix, r.plane); - ep_args += " [[texture(" + convert_to_string(r.index) + ")"; - if (interlocked_resources.count(var_id)) - ep_args += ", raster_order_group(0)"; - ep_args += "]]"; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + const auto &basetype = get<SPIRType>(var.basetype); + if (basetype.image.dim != DimSubpassData || !msl_options.is_ios() || + !msl_options.ios_use_framebuffer_fetch_subpasses) + { + ep_args += image_type_glsl(type, var_id) + " " + r.name; + if (r.plane > 0) + ep_args += join(plane_name_suffix, r.plane); + ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; + } + else + { + ep_args += image_type_glsl(type, var_id) + "4 " + r.name; + ep_args += " [[color(" + convert_to_string(r.index) + ")]]"; + } + + // Emulate texture2D atomic operations + if (atomic_image_vars.count(var.self)) + { + ep_args += ", device atomic_" + type_to_glsl(get<SPIRType>(basetype.image.type), 0); + ep_args += "* " + r.name + "_atomic"; + ep_args += " [[buffer(" + convert_to_string(r.secondary_index) + ")]]"; + } break; + } default: if (!ep_args.empty()) ep_args += ", "; @@ -8799,6 +9657,19 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base if (has_extended_decoration(var.self, resource_decoration)) return get_extended_decoration(var.self, resource_decoration); + // Allow user to enable decoration binding + if (msl_options.enable_decoration_binding) + { + // If there is no explicit mapping of bindings to MSL, use the declared binding. + if (has_decoration(var.self, DecorationBinding)) + { + var_binding = get_decoration(var.self, DecorationBinding); + // Avoid emitting sentinel bindings. + if (var_binding < 0x80000000u) + return var_binding; + } + } + // If we did not explicitly remap, allocate bindings on demand. // We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different. @@ -8887,13 +9758,22 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) type.image.dim == Dim2D && type_is_floating_point(get<SPIRType>(type.image.type)) && spv_function_implementations.count(SPVFuncImplDynamicImageSampler); + // Allow Metal to use the array<T> template to make arrays a value type + string address_space = get_argument_address_space(var); bool builtin = is_builtin_variable(var); + use_builtin_array = builtin; + if (address_space == "threadgroup") + use_builtin_array = true; + if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) decl += type_to_glsl(type, arg.id); else if (builtin) decl += builtin_type_decl(static_cast<BuiltIn>(get_decoration(arg.id, DecorationBuiltIn)), arg.id); else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) + { + use_builtin_array = true; decl += join(type_to_glsl(type, arg.id), "*"); + } else if (is_dynamic_img_sampler) { decl += join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">"); @@ -8905,40 +9785,14 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) bool opaque_handle = storage == StorageClassUniformConstant; - string address_space = get_argument_address_space(var); - if (!builtin && !opaque_handle && !is_pointer && (storage == StorageClassFunction || storage == StorageClassGeneric)) { // If the argument is a pure value and not an opaque type, we will pass by value. - if (is_array(type)) - { - // We are receiving an array by value. This is problematic. - // We cannot be sure of the target address space since we are supposed to receive a copy, - // but this is not possible with MSL without some extra work. - // We will have to assume we're getting a reference in thread address space. - // If we happen to get a reference in constant address space, the caller must emit a copy and pass that. - // Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from - // non-constant arrays, but we can create thread const from constant. - decl = string("thread const ") + decl; - decl += " (&"; - const char *restrict_kw = to_restrict(name_id); - if (*restrict_kw) - { - decl += " "; - decl += restrict_kw; - } - decl += to_expression(name_id); - decl += ")"; - decl += type_to_array_glsl(type); - } - else - { - if (!address_space.empty()) - decl = join(address_space, " ", decl); - decl += " "; - decl += to_expression(name_id); - } + if (!address_space.empty()) + decl = join(address_space, " ", decl); + decl += " "; + decl += to_expression(name_id); } else if (is_array(type) && !type_is_image) { @@ -9006,6 +9860,16 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) decl += to_expression(name_id); } + // Emulate texture2D atomic operations + auto *backing_var = maybe_get_backing_variable(name_id); + if (backing_var && atomic_image_vars.count(backing_var->self)) + { + decl += ", device atomic_" + type_to_glsl(get<SPIRType>(var_type.image.type), 0); + decl += "* " + to_expression(name_id) + "_atomic"; + } + + use_builtin_array = false; + return decl; } @@ -9376,6 +10240,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) { const char *restrict_kw; type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id)); + switch (type.basetype) { case SPIRType::Image: @@ -9401,7 +10266,9 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) { case SPIRType::Struct: // Need OpName lookup here to get a "sensible" name for a struct. - return to_name(type.self); + // Allow Metal to use the array<T> template to make arrays a value type + type_name = to_name(type.self); + break; case SPIRType::Image: case SPIRType::SampledImage: @@ -9474,7 +10341,69 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) if (type.vecsize > 1) type_name += to_string(type.vecsize); - return type_name; + if (type.array.empty() || use_builtin_array) + { + return type_name; + } + else + { + // Allow Metal to use the array<T> template to make arrays a value type + add_spv_func_and_recompile(SPVFuncImplUnsafeArray); + string res; + string sizes; + + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + { + res += "spvUnsafeArray<"; + sizes += ", "; + sizes += to_array_size(type, i); + sizes += ">"; + } + + res += type_name + sizes; + return res; + } +} + +string CompilerMSL::type_to_array_glsl(const SPIRType &type) +{ + // Allow Metal to use the array<T> template to make arrays a value type + switch (type.basetype) + { + case SPIRType::AtomicCounter: + case SPIRType::ControlPointArray: + { + return CompilerGLSL::type_to_array_glsl(type); + } + default: + { + if (use_builtin_array) + return CompilerGLSL::type_to_array_glsl(type); + else + return ""; + } + } +} + +// Threadgroup arrays can't have a wrapper type +std::string CompilerMSL::variable_decl(const SPIRVariable &variable) +{ + if (variable.storage == StorageClassWorkgroup) + { + use_builtin_array = true; + } + std::string expr = CompilerGLSL::variable_decl(variable); + if (variable.storage == StorageClassWorkgroup) + { + use_builtin_array = false; + } + return expr; +} + +// GCC workaround of lambdas calling protected funcs +std::string CompilerMSL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id) +{ + return CompilerGLSL::variable_decl(type, name, id); } std::string CompilerMSL::sampler_type(const SPIRType &type) @@ -9547,9 +10476,14 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) switch (img_type.dim) { case Dim1D: - img_type_name += "depth1d_unsupported_by_metal"; - break; case Dim2D: + if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D) + { + // Use a native Metal 1D texture + img_type_name += "depth1d_unsupported_by_metal"; + break; + } + if (img_type.ms && img_type.arrayed) { if (!msl_options.supports_msl_version(2, 1)) @@ -9567,7 +10501,10 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) img_type_name += "depth3d_unsupported_by_metal"; break; case DimCube: - img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube"); + if (!msl_options.emulate_cube_array) + img_type_name += (img_type.arrayed ? "depthcube_array" : "depthcube"); + else + img_type_name += (img_type.arrayed ? "depth2d_array" : "depthcube"); break; default: img_type_name += "unknown_depth_texture_type"; @@ -9578,9 +10515,6 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) { switch (img_type.dim) { - case Dim1D: - img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d"); - break; case DimBuffer: if (img_type.ms || img_type.arrayed) SPIRV_CROSS_THROW("Cannot use texel buffers with multisampling or array layers."); @@ -9594,8 +10528,22 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) else img_type_name += "texture2d"; break; + case Dim1D: case Dim2D: case DimSubpassData: + if (img_type.dim == Dim1D && !msl_options.texture_1D_as_2D) + { + // Use a native Metal 1D texture + img_type_name += (img_type.arrayed ? "texture1d_array" : "texture1d"); + break; + } + + // Use Metal's native frame-buffer fetch API for subpass inputs. + if (img_type.dim == DimSubpassData && msl_options.is_ios() && + msl_options.ios_use_framebuffer_fetch_subpasses) + { + return type_to_glsl(get<SPIRType>(img_type.type)); + } if (img_type.ms && img_type.arrayed) { if (!msl_options.supports_msl_version(2, 1)) @@ -9613,7 +10561,10 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id) img_type_name += "texture3d"; break; case DimCube: - img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube"); + if (!msl_options.emulate_cube_array) + img_type_name += (img_type.arrayed ? "texturecube_array" : "texturecube"); + else + img_type_name += (img_type.arrayed ? "texture2d_array" : "texturecube"); break; default: img_type_name += "unknown_texture_type"; @@ -9926,19 +10877,114 @@ string CompilerMSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) switch (builtin) { + // Handle HLSL-style 0-based vertex/instance index. // Override GLSL compiler strictness case BuiltInVertexId: - return "gl_VertexID"; + ensure_builtin(StorageClassInput, BuiltInVertexId); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_vertex_arg != TriState::No) + needs_base_vertex_arg = TriState::Yes; + return "gl_VertexID"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseVertex); + return "(gl_VertexID - gl_BaseVertex)"; + } + } + else + { + return "gl_VertexID"; + } case BuiltInInstanceId: - return "gl_InstanceID"; + ensure_builtin(StorageClassInput, BuiltInInstanceId); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_instance_arg != TriState::No) + needs_base_instance_arg = TriState::Yes; + return "gl_InstanceID"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseInstance); + return "(gl_InstanceID - gl_BaseInstance)"; + } + } + else + { + return "gl_InstanceID"; + } case BuiltInVertexIndex: - return "gl_VertexIndex"; + ensure_builtin(StorageClassInput, BuiltInVertexIndex); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_vertex_arg != TriState::No) + needs_base_vertex_arg = TriState::Yes; + return "gl_VertexIndex"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseVertex); + return "(gl_VertexIndex - gl_BaseVertex)"; + } + } + else + { + return "gl_VertexIndex"; + } case BuiltInInstanceIndex: - return "gl_InstanceIndex"; + ensure_builtin(StorageClassInput, BuiltInInstanceIndex); + if (msl_options.enable_base_index_zero && msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + if (builtin_declaration) + { + if (needs_base_instance_arg != TriState::No) + needs_base_instance_arg = TriState::Yes; + return "gl_InstanceIndex"; + } + else + { + ensure_builtin(StorageClassInput, BuiltInBaseInstance); + return "(gl_InstanceIndex - gl_BaseInstance)"; + } + } + else + { + return "gl_InstanceIndex"; + } case BuiltInBaseVertex: - return "gl_BaseVertex"; + if (msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + needs_base_vertex_arg = TriState::No; + return "gl_BaseVertex"; + } + else + { + SPIRV_CROSS_THROW("BaseVertex requires Metal 1.1 and Mac or Apple A9+ hardware."); + } case BuiltInBaseInstance: - return "gl_BaseInstance"; + if (msl_options.supports_msl_version(1, 1) && + (msl_options.ios_support_base_vertex_instance || msl_options.is_macos())) + { + needs_base_instance_arg = TriState::No; + return "gl_BaseInstance"; + } + else + { + SPIRV_CROSS_THROW("BaseInstance requires Metal 1.1 and Mac or Apple A9+ hardware."); + } case BuiltInDrawIndex: SPIRV_CROSS_THROW("DrawIndex is not supported in MSL."); @@ -10321,9 +11367,12 @@ string CompilerMSL::built_in_func_arg(BuiltIn builtin, bool prefix_comma) if (prefix_comma) bi_arg += ", "; + // Handle HLSL-style 0-based vertex/instance index. + builtin_declaration = true; bi_arg += builtin_type_decl(builtin); bi_arg += " " + builtin_to_glsl(builtin, StorageClassInput); bi_arg += " [[" + builtin_qualifier(builtin) + "]]"; + builtin_declaration = false; return bi_arg; } @@ -10617,6 +11666,14 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui suppress_missing_prototypes = true; break; + // Emulate texture2D atomic operations + case OpImageTexelPointer: + { + auto *var = compiler.maybe_get_backing_variable(args[2]); + image_pointers[args[1]] = var ? var->self : ID(0); + break; + } + case OpImageWrite: uses_resource_write = true; break; @@ -10625,6 +11682,7 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui check_resource_write(args[0]); break; + // Emulate texture2D atomic operations case OpAtomicExchange: case OpAtomicCompareExchange: case OpAtomicCompareExchangeWeak: @@ -10639,13 +11697,39 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui case OpAtomicAnd: case OpAtomicOr: case OpAtomicXor: + { uses_atomics = true; + auto it = image_pointers.find(args[2]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + } check_resource_write(args[2]); break; + } + + case OpAtomicStore: + { + uses_atomics = true; + auto it = image_pointers.find(args[0]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + } + check_resource_write(args[0]); + break; + } case OpAtomicLoad: + { uses_atomics = true; + auto it = image_pointers.find(args[2]); + if (it != image_pointers.end()) + { + compiler.atomic_image_vars.insert(it->second); + } break; + } case OpGroupNonUniformInverseBallot: needs_subgroup_invocation_id = true; @@ -10672,6 +11756,7 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui uint32_t result_type = args[0]; uint32_t id = args[1]; uint32_t ptr = args[2]; + compiler.set<SPIRExpression>(id, "", result_type, true); compiler.register_read(id, ptr, true); compiler.ir.ids[id].set_allow_type_rewrite(); @@ -10707,60 +11792,55 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpFMod: return SPVFuncImplMod; - case OpFunctionCall: - { - auto &return_type = compiler.get<SPIRType>(args[0]); - if (return_type.array.size() > 1) + case OpFAdd: + if (compiler.msl_options.invariant_float_math) { - if (return_type.array.size() > SPVFuncImplArrayCopyMultidimMax) - SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); - return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + return_type.array.size()); + return SPVFuncImplFAdd; } - else if (return_type.array.size() > 0) - return SPVFuncImplArrayCopy; - break; - } - - case OpStore: - { - // Get the result type of the RHS. Since this is run as a pre-processing stage, - // we must extract the result type directly from the Instruction, rather than the ID. - uint32_t id_lhs = args[0]; - uint32_t id_rhs = args[1]; - const SPIRType *type = nullptr; - if (compiler.ir.ids[id_rhs].get_type() != TypeNone) - { - // Could be a constant, or similar. - type = &compiler.expression_type(id_rhs); - } - else + case OpFMul: + case OpOuterProduct: + case OpMatrixTimesVector: + case OpVectorTimesMatrix: + case OpMatrixTimesMatrix: + if (compiler.msl_options.invariant_float_math) { - // Or ... an expression. - uint32_t tid = result_types[id_rhs]; - if (tid) - type = &compiler.get<SPIRType>(tid); + return SPVFuncImplFMul; } + break; - auto *var = compiler.maybe_get<SPIRVariable>(id_lhs); + case OpTypeArray: + { + // Allow Metal to use the array<T> template to make arrays a value type + return SPVFuncImplUnsafeArray; + } - // Are we simply assigning to a statically assigned variable which takes a constant? - // Don't bother emitting this function. - bool static_expression_lhs = - var && var->storage == StorageClassFunction && var->statically_assigned && var->remapped_variable; - if (type && compiler.is_array(*type) && !static_expression_lhs) + // Emulate texture2D atomic operations + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicCompareExchangeWeak: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicLoad: + case OpAtomicStore: + { + auto it = image_pointers.find(args[opcode == OpAtomicStore ? 0 : 2]); + if (it != image_pointers.end()) { - if (type->array.size() > 1) - { - if (type->array.size() > SPVFuncImplArrayCopyMultidimMax) - SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); - return static_cast<SPVFuncImpl>(SPVFuncImplArrayCopyMultidimBase + type->array.size()); - } - else - return SPVFuncImplArrayCopy; + uint32_t tid = compiler.get<SPIRVariable>(it->second).basetype; + if (tid && compiler.get<SPIRType>(tid).image.dim == Dim2D) + return SPVFuncImplImage2DAtomicCoords; } - break; } diff --git a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp index d16b593..31fcc2c 100644 --- a/src/3rdparty/SPIRV-Cross/spirv_msl.hpp +++ b/src/3rdparty/SPIRV-Cross/spirv_msl.hpp @@ -274,15 +274,34 @@ public: bool multiview = false; bool view_index_from_device_index = false; bool dispatch_base = false; + bool texture_1D_as_2D = false; // Enable use of MSL 2.0 indirect argument buffers. // MSL 2.0 must also be enabled. bool argument_buffers = false; + // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. + bool enable_base_index_zero = false; + // Fragment output in MSL must have at least as many components as the render pass. // Add support to explicit pad out components. bool pad_fragment_output_components = false; + // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes. + bool ios_support_base_vertex_instance = false; + + // Use Metal's native frame-buffer fetch API for subpass inputs. + bool ios_use_framebuffer_fetch_subpasses = false; + + // Enables use of "fma" intrinsic for invariant float math + bool invariant_float_math = false; + + // Emulate texturecube_array with texture2d_array for iOS where this type is not available + bool emulate_cube_array = false; + + // Allow user to enable decoration binding + bool enable_decoration_binding = false; + // Requires MSL 2.1, use the native support for texel buffers. bool texture_buffer_native = false; @@ -487,6 +506,11 @@ protected: SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5, SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6, SPVFuncImplTexelBufferCoords, + SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations + SPVFuncImplFMul, + SPVFuncImplFAdd, + SPVFuncImplCubemapTo2DArrayFace, + SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type SPVFuncImplInverse4x4, SPVFuncImplInverse3x3, SPVFuncImplInverse2x2, @@ -531,6 +555,9 @@ protected: SPVFuncImplArrayCopyMultidimMax = 6 }; + // If the underlying resource has been used for comparison then duplicate loads of that resource must be too + // Use Metal's native frame-buffer fetch API for subpass inputs. + void emit_texture_op(const Instruction &i) override; void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); void emit_instruction(const Instruction &instr) override; void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, @@ -550,6 +577,16 @@ protected: const std::string &qualifier = "", uint32_t base_offset = 0) override; void emit_struct_padding_target(const SPIRType &type) override; std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; + + // Allow Metal to use the array<T> template to make arrays a value type + std::string type_to_array_glsl(const SPIRType &type) override; + + // Threadgroup arrays can't have a wrapper type + std::string variable_decl(const SPIRVariable &variable) override; + + // GCC workaround of lambdas calling protected functions (for older GCC versions) + std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; + std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; std::string sampler_type(const SPIRType &type); std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; @@ -563,9 +600,13 @@ protected: uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override; std::string to_initializer_expression(const SPIRVariable &var) override; + std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, bool is_packed, bool row_major) override; + // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal. + bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; bool skip_argument(uint32_t id) const override; std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override; @@ -573,6 +614,10 @@ protected: void replace_illegal_names() override; void declare_undefined_values() override; void declare_constant_arrays(); + + // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries + void declare_complex_constant_arrays(); + bool is_patch_block(const SPIRType &type); bool is_non_native_row_major_matrix(uint32_t id) override; bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; @@ -613,6 +658,7 @@ protected: uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); uint32_t ensure_correct_attribute_type(uint32_t type_id, uint32_t location); + void emit_custom_templates(); void emit_custom_functions(); void emit_resources(); void emit_specialization_constants_and_structs(); @@ -710,8 +756,11 @@ protected: void analyze_sampled_image_usage(); bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); + bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); + void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin); + void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); std::string convert_to_f32(const std::string &expr, uint32_t components); @@ -762,9 +811,20 @@ protected: VariableID patch_stage_out_var_id = 0; VariableID stage_in_ptr_var_id = 0; VariableID stage_out_ptr_var_id = 0; + + // Handle HLSL-style 0-based vertex/instance index. + enum class TriState + { + Neutral, + No, + Yes + }; + TriState needs_base_vertex_arg = TriState::Neutral; + TriState needs_base_instance_arg = TriState::Neutral; + bool has_sampled_images = false; - bool needs_vertex_idx_arg = false; - bool needs_instance_idx_arg = false; + bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. + bool use_builtin_array = false; // Force the use of C style array declaration. bool is_rasterization_disabled = false; bool capture_output_to_buffer = false; bool needs_swizzle_buffer_def = false; @@ -793,6 +853,7 @@ protected: std::unordered_set<uint32_t> buffers_requiring_array_length; SmallVector<uint32_t> buffer_arrays; + std::unordered_set<uint32_t> atomic_image_vars; // Emulate texture2D atomic operations // Must be ordered since array is in a specific order. std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset; @@ -825,6 +886,7 @@ protected: CompilerMSL &compiler; std::unordered_map<uint32_t, uint32_t> result_types; + std::unordered_map<uint32_t, uint32_t> image_pointers; // Emulate texture2D atomic operations bool suppress_missing_prototypes = false; bool uses_atomics = false; bool uses_resource_write = false; |