diff options
Diffstat (limited to 'src/3rdparty/angle/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp')
-rw-r--r-- | src/3rdparty/angle/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp | 693 |
1 files changed, 456 insertions, 237 deletions
diff --git a/src/3rdparty/angle/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp b/src/3rdparty/angle/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp index 242c09d6ce..3a6d797ea6 100644 --- a/src/3rdparty/angle/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp +++ b/src/3rdparty/angle/src/libANGLE/renderer/d3d/d3d11/InputLayoutCache.cpp @@ -8,43 +8,141 @@ // D3D11 input layouts. #include "libANGLE/renderer/d3d/d3d11/InputLayoutCache.h" -#include "libANGLE/renderer/d3d/d3d11/VertexBuffer11.h" + +#include "common/utilities.h" +#include "libANGLE/Program.h" +#include "libANGLE/VertexAttribute.h" +#include "libANGLE/renderer/d3d/IndexDataManager.h" +#include "libANGLE/renderer/d3d/ProgramD3D.h" +#include "libANGLE/renderer/d3d/VertexDataManager.h" #include "libANGLE/renderer/d3d/d3d11/Buffer11.h" #include "libANGLE/renderer/d3d/d3d11/ShaderExecutable11.h" +#include "libANGLE/renderer/d3d/d3d11/VertexBuffer11.h" #include "libANGLE/renderer/d3d/d3d11/formatutils11.h" -#include "libANGLE/renderer/d3d/ProgramD3D.h" -#include "libANGLE/renderer/d3d/VertexDataManager.h" -#include "libANGLE/Program.h" -#include "libANGLE/VertexAttribute.h" - #include "third_party/murmurhash/MurmurHash3.h" namespace rx { -static void GetInputLayout(const TranslatedAttribute translatedAttributes[gl::MAX_VERTEX_ATTRIBS], - gl::VertexFormat inputLayout[gl::MAX_VERTEX_ATTRIBS]) +namespace +{ + +size_t GetReservedBufferCount(bool usesPointSpriteEmulation) { - for (unsigned int attributeIndex = 0; attributeIndex < gl::MAX_VERTEX_ATTRIBS; attributeIndex++) + return usesPointSpriteEmulation ? 1 : 0; +} + +gl::InputLayout GetInputLayout(const SortedAttribArray &translatedAttributes, size_t attributeCount) +{ + gl::InputLayout inputLayout(attributeCount, gl::VERTEX_FORMAT_INVALID); + + for (size_t attributeIndex = 0; attributeIndex < attributeCount; ++attributeIndex) { - const TranslatedAttribute &translatedAttribute = translatedAttributes[attributeIndex]; + const TranslatedAttribute *translatedAttribute = translatedAttributes[attributeIndex]; + + if (translatedAttribute->active) + { + inputLayout[attributeIndex] = gl::GetVertexFormatType( + *translatedAttribute->attribute, translatedAttribute->currentValueType); + } + } + return inputLayout; +} + +GLenum GetGLSLAttributeType(const std::vector<sh::Attribute> &shaderAttributes, int index) +{ + // Count matrices differently + for (const sh::Attribute &attrib : shaderAttributes) + { + if (attrib.location == -1) + { + continue; + } + + GLenum transposedType = gl::TransposeMatrixType(attrib.type); + int rows = gl::VariableRowCount(transposedType); + + if (index >= attrib.location && index < attrib.location + rows) + { + return transposedType; + } + } + + UNREACHABLE(); + return GL_NONE; +} + +const unsigned int kDefaultCacheSize = 1024; + +struct PackedAttribute +{ + uint8_t attribType; + uint8_t semanticIndex; + uint8_t vertexFormatType; + uint8_t divisor; +}; - if (translatedAttributes[attributeIndex].active) +Optional<size_t> FindFirstNonInstanced(const SortedAttribArray &sortedAttributes, size_t maxIndex) +{ + for (size_t index = 0; index < maxIndex; ++index) + { + if (sortedAttributes[index]->divisor == 0) { - inputLayout[attributeIndex] = gl::VertexFormat(*translatedAttribute.attribute, - translatedAttribute.currentValueType); + return Optional<size_t>(index); } } + + return Optional<size_t>::Invalid(); } -const unsigned int InputLayoutCache::kMaxInputLayouts = 1024; +} // anonymous namespace + +void InputLayoutCache::PackedAttributeLayout::addAttributeData( + GLenum glType, + UINT semanticIndex, + gl::VertexFormatType vertexFormatType, + unsigned int divisor) +{ + gl::AttributeType attribType = gl::GetAttributeType(glType); + + PackedAttribute packedAttrib; + packedAttrib.attribType = static_cast<uint8_t>(attribType); + packedAttrib.semanticIndex = static_cast<uint8_t>(semanticIndex); + packedAttrib.vertexFormatType = static_cast<uint8_t>(vertexFormatType); + packedAttrib.divisor = static_cast<uint8_t>(divisor); + + ASSERT(static_cast<gl::AttributeType>(packedAttrib.attribType) == attribType); + ASSERT(static_cast<UINT>(packedAttrib.semanticIndex) == semanticIndex); + ASSERT(static_cast<gl::VertexFormatType>(packedAttrib.vertexFormatType) == vertexFormatType); + ASSERT(static_cast<unsigned int>(packedAttrib.divisor) == divisor); + + static_assert(sizeof(uint32_t) == sizeof(PackedAttribute), "PackedAttributes must be 32-bits exactly."); + + attributeData[numAttributes++] = gl::bitCast<uint32_t>(packedAttrib); +} + +bool InputLayoutCache::PackedAttributeLayout::operator<(const PackedAttributeLayout &other) const +{ + if (numAttributes != other.numAttributes) + { + return numAttributes < other.numAttributes; + } + + if (flags != other.flags) + { + return flags < other.flags; + } + + return memcmp(attributeData, other.attributeData, sizeof(uint32_t) * numAttributes) < 0; +} -InputLayoutCache::InputLayoutCache() : mInputLayoutMap(kMaxInputLayouts, hashInputLayout, compareInputLayouts) +InputLayoutCache::InputLayoutCache() : mUnsortedAttributesCount(0), mCacheSize(kDefaultCacheSize) { mCounter = 0; mDevice = NULL; mDeviceContext = NULL; mCurrentIL = NULL; + for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) { mCurrentBuffers[i] = NULL; @@ -70,11 +168,11 @@ void InputLayoutCache::initialize(ID3D11Device *device, ID3D11DeviceContext *con void InputLayoutCache::clear() { - for (InputLayoutMap::iterator i = mInputLayoutMap.begin(); i != mInputLayoutMap.end(); i++) + for (auto &layout : mLayoutMap) { - SafeRelease(i->second.inputLayout); + SafeRelease(layout.second); } - mInputLayoutMap.clear(); + mLayoutMap.clear(); SafeRelease(mPointSpriteVertexBuffer); SafeRelease(mPointSpriteIndexBuffer); markDirty(); @@ -89,236 +187,135 @@ void InputLayoutCache::markDirty() mCurrentVertexStrides[i] = static_cast<UINT>(-1); mCurrentVertexOffsets[i] = static_cast<UINT>(-1); } + mUnsortedAttributesCount = 0; } -gl::Error InputLayoutCache::applyVertexBuffers(TranslatedAttribute attributes[gl::MAX_VERTEX_ATTRIBS], - GLenum mode, gl::Program *program) +gl::Error InputLayoutCache::applyVertexBuffers( + const std::vector<TranslatedAttribute> &unsortedAttributes, + GLenum mode, + gl::Program *program, + TranslatedIndexData *indexInfo, + GLsizei numIndicesPerInstance) { + ASSERT(mDevice && mDeviceContext); + ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program); - int sortedSemanticIndices[gl::MAX_VERTEX_ATTRIBS]; - programD3D->sortAttributesByLayout(attributes, sortedSemanticIndices); bool programUsesInstancedPointSprites = programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation(); bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS); - if (!mDevice || !mDeviceContext) - { - return gl::Error(GL_OUT_OF_MEMORY, "Internal input layout cache is not initialized."); - } - - InputLayoutKey ilKey = { 0 }; - - static const char* semanticName = "TEXCOORD"; + SortedIndexArray sortedSemanticIndices; + mSortedAttributes.fill(nullptr); + mUnsortedAttributesCount = unsortedAttributes.size(); - unsigned int firstIndexedElement = gl::MAX_VERTEX_ATTRIBS; - unsigned int firstInstancedElement = gl::MAX_VERTEX_ATTRIBS; - unsigned int nextAvailableInputSlot = 0; + programD3D->sortAttributesByLayout(unsortedAttributes, sortedSemanticIndices.data(), + mSortedAttributes.data()); - for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) + // If we are using FL 9_3, make sure the first attribute is not instanced + if (mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && !unsortedAttributes.empty()) { - if (attributes[i].active) + if (mSortedAttributes[0]->divisor > 0) { - D3D11_INPUT_CLASSIFICATION inputClass = attributes[i].divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA; - // If rendering points and instanced pointsprite emulation is being used, the inputClass is required to be configured as per instance data - inputClass = instancedPointSpritesActive ? D3D11_INPUT_PER_INSTANCE_DATA : inputClass; - - gl::VertexFormat vertexFormat(*attributes[i].attribute, attributes[i].currentValueType); - const d3d11::VertexFormat &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormat, mFeatureLevel); - - // Record the type of the associated vertex shader vector in our key - // This will prevent mismatched vertex shaders from using the same input layout - GLint attributeSize; - program->getActiveAttribute(ilKey.elementCount, 0, NULL, &attributeSize, &ilKey.elements[ilKey.elementCount].glslElementType, NULL); - - ilKey.elements[ilKey.elementCount].desc.SemanticName = semanticName; - ilKey.elements[ilKey.elementCount].desc.SemanticIndex = sortedSemanticIndices[i]; - ilKey.elements[ilKey.elementCount].desc.Format = vertexFormatInfo.nativeFormat; - ilKey.elements[ilKey.elementCount].desc.InputSlot = i; - ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = 0; - ilKey.elements[ilKey.elementCount].desc.InputSlotClass = inputClass; - ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = instancedPointSpritesActive ? 1 : attributes[i].divisor; - - if (inputClass == D3D11_INPUT_PER_VERTEX_DATA && firstIndexedElement == gl::MAX_VERTEX_ATTRIBS) + Optional<size_t> firstNonInstancedIndex = + FindFirstNonInstanced(mSortedAttributes, unsortedAttributes.size()); + if (firstNonInstancedIndex.valid()) { - firstIndexedElement = ilKey.elementCount; + size_t index = firstNonInstancedIndex.value(); + std::swap(mSortedAttributes[0], mSortedAttributes[index]); + std::swap(sortedSemanticIndices[0], sortedSemanticIndices[index]); } - else if (inputClass == D3D11_INPUT_PER_INSTANCE_DATA && firstInstancedElement == gl::MAX_VERTEX_ATTRIBS) - { - firstInstancedElement = ilKey.elementCount; - } - - ilKey.elementCount++; - nextAvailableInputSlot = i + 1; } } - // Instanced PointSprite emulation requires additional entries in the - // inputlayout to support the vertices that make up the pointsprite quad. - // We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the input layout must match the shader - if (programUsesInstancedPointSprites) + gl::Error error = updateInputLayout(program, mode, mSortedAttributes, sortedSemanticIndices, + unsortedAttributes.size(), numIndicesPerInstance); + if (error.isError()) { - ilKey.elements[ilKey.elementCount].desc.SemanticName = "SPRITEPOSITION"; - ilKey.elements[ilKey.elementCount].desc.SemanticIndex = 0; - ilKey.elements[ilKey.elementCount].desc.Format = DXGI_FORMAT_R32G32B32_FLOAT; - ilKey.elements[ilKey.elementCount].desc.InputSlot = nextAvailableInputSlot; - ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = 0; - ilKey.elements[ilKey.elementCount].desc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; - ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = 0; - - // The new elements are D3D11_INPUT_PER_VERTEX_DATA data so the indexed element - // tracking must be applied. This ensures that the instancing specific - // buffer swapping logic continues to work. - if (firstIndexedElement == gl::MAX_VERTEX_ATTRIBS) - { - firstIndexedElement = ilKey.elementCount; - } - - ilKey.elementCount++; - - ilKey.elements[ilKey.elementCount].desc.SemanticName = "SPRITETEXCOORD"; - ilKey.elements[ilKey.elementCount].desc.SemanticIndex = 0; - ilKey.elements[ilKey.elementCount].desc.Format = DXGI_FORMAT_R32G32_FLOAT; - ilKey.elements[ilKey.elementCount].desc.InputSlot = nextAvailableInputSlot; - ilKey.elements[ilKey.elementCount].desc.AlignedByteOffset = sizeof(float) * 3; - ilKey.elements[ilKey.elementCount].desc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; - ilKey.elements[ilKey.elementCount].desc.InstanceDataStepRate = 0; - - ilKey.elementCount++; + return error; } - // On 9_3, we must ensure that slot 0 contains non-instanced data. - // If slot 0 currently contains instanced data then we swap it with a non-instanced element. - // Note that instancing is only available on 9_3 via ANGLE_instanced_arrays, since 9_3 doesn't support OpenGL ES 3.0. - // As per the spec for ANGLE_instanced_arrays, not all attributes can be instanced simultaneously, so a non-instanced element must exist. - ASSERT(!(mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && firstIndexedElement == gl::MAX_VERTEX_ATTRIBS)); - bool moveFirstIndexedIntoSlotZero = mFeatureLevel <= D3D_FEATURE_LEVEL_9_3 && firstInstancedElement == 0 && firstIndexedElement != gl::MAX_VERTEX_ATTRIBS; - - if (moveFirstIndexedIntoSlotZero) - { - ilKey.elements[firstInstancedElement].desc.InputSlot = ilKey.elements[firstIndexedElement].desc.InputSlot; - ilKey.elements[firstIndexedElement].desc.InputSlot = 0; - - // Instanced PointSprite emulation uses multiple layout entries across a single vertex buffer. - // If an index swap is performed, we need to ensure that all elements get the proper InputSlot. - if (programUsesInstancedPointSprites) - { - ilKey.elements[firstIndexedElement + 1].desc.InputSlot = 0; - } - } + bool dirtyBuffers = false; + size_t minDiff = gl::MAX_VERTEX_ATTRIBS; + size_t maxDiff = 0; - ID3D11InputLayout *inputLayout = NULL; + // Note that if we use instance emulation, we reserve the first buffer slot. + size_t reservedBuffers = GetReservedBufferCount(programUsesInstancedPointSprites); - InputLayoutMap::iterator keyIter = mInputLayoutMap.find(ilKey); - if (keyIter != mInputLayoutMap.end()) - { - inputLayout = keyIter->second.inputLayout; - keyIter->second.lastUsedTime = mCounter++; - } - else + for (size_t attribIndex = 0; attribIndex < (gl::MAX_VERTEX_ATTRIBS - reservedBuffers); + ++attribIndex) { - gl::VertexFormat shaderInputLayout[gl::MAX_VERTEX_ATTRIBS]; - GetInputLayout(attributes, shaderInputLayout); - - ShaderExecutableD3D *shader = NULL; - gl::Error error = programD3D->getVertexExecutableForInputLayout(shaderInputLayout, &shader, nullptr); - if (error.isError()) - { - return error; - } - - ShaderExecutableD3D *shader11 = ShaderExecutable11::makeShaderExecutable11(shader); + ID3D11Buffer *buffer = NULL; + UINT vertexStride = 0; + UINT vertexOffset = 0; - D3D11_INPUT_ELEMENT_DESC descs[gl::MAX_VERTEX_ATTRIBS]; - for (unsigned int j = 0; j < ilKey.elementCount; ++j) - { - descs[j] = ilKey.elements[j].desc; - } + const auto &attrib = *mSortedAttributes[attribIndex]; - HRESULT result = mDevice->CreateInputLayout(descs, ilKey.elementCount, shader11->getFunction(), shader11->getLength(), &inputLayout); - if (FAILED(result)) + if (attribIndex < unsortedAttributes.size() && attrib.active) { - return gl::Error(GL_OUT_OF_MEMORY, "Failed to create internal input layout, HRESULT: 0x%08x", result); - } - - if (mInputLayoutMap.size() >= kMaxInputLayouts) - { - TRACE("Overflowed the limit of %u input layouts, removing the least recently used " - "to make room.", kMaxInputLayouts); - - InputLayoutMap::iterator leastRecentlyUsed = mInputLayoutMap.begin(); - for (InputLayoutMap::iterator i = mInputLayoutMap.begin(); i != mInputLayoutMap.end(); i++) + VertexBuffer11 *vertexBuffer = GetAs<VertexBuffer11>(attrib.vertexBuffer); + Buffer11 *bufferStorage = attrib.storage ? GetAs<Buffer11>(attrib.storage) : nullptr; + + // If indexed pointsprite emulation is active, then we need to take a less efficent code path. + // Emulated indexed pointsprite rendering requires that the vertex buffers match exactly to + // the indices passed by the caller. This could expand or shrink the vertex buffer depending + // on the number of points indicated by the index list or how many duplicates are found on the index list. + if (bufferStorage == nullptr) { - if (i->second.lastUsedTime < leastRecentlyUsed->second.lastUsedTime) + buffer = vertexBuffer->getBuffer(); + } + else if (instancedPointSpritesActive && (indexInfo != nullptr)) + { + if (indexInfo->srcIndexData.srcBuffer != nullptr) { - leastRecentlyUsed = i; + const uint8_t *bufferData = nullptr; + error = indexInfo->srcIndexData.srcBuffer->getData(&bufferData); + if (error.isError()) + { + return error; + } + ASSERT(bufferData != nullptr); + + ptrdiff_t offset = + reinterpret_cast<ptrdiff_t>(indexInfo->srcIndexData.srcIndices); + indexInfo->srcIndexData.srcBuffer = nullptr; + indexInfo->srcIndexData.srcIndices = bufferData + offset; } - } - SafeRelease(leastRecentlyUsed->second.inputLayout); - mInputLayoutMap.erase(leastRecentlyUsed); - } - - InputLayoutCounterPair inputCounterPair; - inputCounterPair.inputLayout = inputLayout; - inputCounterPair.lastUsedTime = mCounter++; - - mInputLayoutMap.insert(std::make_pair(ilKey, inputCounterPair)); - } - - if (inputLayout != mCurrentIL) - { - mDeviceContext->IASetInputLayout(inputLayout); - mCurrentIL = inputLayout; - } - bool dirtyBuffers = false; - size_t minDiff = gl::MAX_VERTEX_ATTRIBS; - size_t maxDiff = 0; - unsigned int nextAvailableIndex = 0; - - for (unsigned int i = 0; i < gl::MAX_VERTEX_ATTRIBS; i++) - { - ID3D11Buffer *buffer = NULL; - - if (attributes[i].active) - { - VertexBuffer11 *vertexBuffer = VertexBuffer11::makeVertexBuffer11(attributes[i].vertexBuffer); - Buffer11 *bufferStorage = attributes[i].storage ? Buffer11::makeBuffer11(attributes[i].storage) : NULL; + buffer = bufferStorage->getEmulatedIndexedBuffer(&indexInfo->srcIndexData, &attrib); + } + else + { + buffer = bufferStorage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK); + } - buffer = bufferStorage ? bufferStorage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK) - : vertexBuffer->getBuffer(); + vertexStride = attrib.stride; + vertexOffset = attrib.offset; } - UINT vertexStride = attributes[i].stride; - UINT vertexOffset = attributes[i].offset; + size_t bufferIndex = reservedBuffers + attribIndex; - if (buffer != mCurrentBuffers[i] || vertexStride != mCurrentVertexStrides[i] || - vertexOffset != mCurrentVertexOffsets[i]) + if (buffer != mCurrentBuffers[bufferIndex] || + vertexStride != mCurrentVertexStrides[bufferIndex] || + vertexOffset != mCurrentVertexOffsets[bufferIndex]) { dirtyBuffers = true; - minDiff = std::min(minDiff, static_cast<size_t>(i)); - maxDiff = std::max(maxDiff, static_cast<size_t>(i)); - - mCurrentBuffers[i] = buffer; - mCurrentVertexStrides[i] = vertexStride; - mCurrentVertexOffsets[i] = vertexOffset; + minDiff = std::min(minDiff, bufferIndex); + maxDiff = std::max(maxDiff, bufferIndex); - // If a non null ID3D11Buffer is being assigned to mCurrentBuffers, - // then the next available index needs to be tracked to ensure - // that any instanced pointsprite emulation buffers will be properly packed. - if (buffer) - { - nextAvailableIndex = i + 1; - } + mCurrentBuffers[bufferIndex] = buffer; + mCurrentVertexStrides[bufferIndex] = vertexStride; + mCurrentVertexOffsets[bufferIndex] = vertexOffset; } } - // Instanced PointSprite emulation requires two additional ID3D11Buffers. - // A vertex buffer needs to be created and added to the list of current buffers, - // strides and offsets collections. This buffer contains the vertices for a single - // PointSprite quad. - // An index buffer also needs to be created and applied because rendering instanced - // data on D3D11 FL9_3 requires DrawIndexedInstanced() to be used. - if (instancedPointSpritesActive) + // Instanced PointSprite emulation requires two additional ID3D11Buffers. A vertex buffer needs + // to be created and added to the list of current buffers, strides and offsets collections. + // This buffer contains the vertices for a single PointSprite quad. + // An index buffer also needs to be created and applied because rendering instanced data on + // D3D11 FL9_3 requires DrawIndexedInstanced() to be used. Shaders that contain gl_PointSize and + // used without the GL_POINTS rendering mode require a vertex buffer because some drivers cannot + // handle missing vertex data and will TDR the system. + if (programUsesInstancedPointSprites) { HRESULT result = S_OK; const UINT pointSpriteVertexStride = sizeof(float) * 5; @@ -352,9 +349,16 @@ gl::Error InputLayoutCache::applyVertexBuffers(TranslatedAttribute attributes[gl } } - mCurrentBuffers[nextAvailableIndex] = mPointSpriteVertexBuffer; - mCurrentVertexStrides[nextAvailableIndex] = pointSpriteVertexStride; - mCurrentVertexOffsets[nextAvailableIndex] = 0; + mCurrentBuffers[0] = mPointSpriteVertexBuffer; + // Set the stride to 0 if GL_POINTS mode is not being used to instruct the driver to avoid + // indexing into the vertex buffer. + mCurrentVertexStrides[0] = instancedPointSpritesActive ? pointSpriteVertexStride : 0; + mCurrentVertexOffsets[0] = 0; + + // Update maxDiff to include the additional point sprite vertex buffer + // to ensure that IASetVertexBuffers uses the correct buffer count. + minDiff = 0; + maxDiff = std::max(maxDiff, static_cast<size_t>(0)); if (!mPointSpriteIndexBuffer) { @@ -381,50 +385,265 @@ gl::Error InputLayoutCache::applyVertexBuffers(TranslatedAttribute attributes[gl } } - // The index buffer is applied here because Instanced PointSprite emulation uses - // the a non-indexed rendering path in ANGLE (DrawArrays). This means that applyIndexBuffer() - // on the renderer will not be called and setting this buffer here ensures that the rendering - // path will contain the correct index buffers. - mDeviceContext->IASetIndexBuffer(mPointSpriteIndexBuffer, DXGI_FORMAT_R16_UINT, 0); - } - - if (moveFirstIndexedIntoSlotZero) - { - // In this case, we swapped the slots of the first instanced element and the first indexed element, to ensure - // that the first slot contains non-instanced data (required by Feature Level 9_3). - // We must also swap the corresponding buffers sent to IASetVertexBuffers so that the correct data is sent to each slot. - std::swap(mCurrentBuffers[firstIndexedElement], mCurrentBuffers[firstInstancedElement]); - std::swap(mCurrentVertexStrides[firstIndexedElement], mCurrentVertexStrides[firstInstancedElement]); - std::swap(mCurrentVertexOffsets[firstIndexedElement], mCurrentVertexOffsets[firstInstancedElement]); + if (instancedPointSpritesActive) + { + // The index buffer is applied here because Instanced PointSprite emulation uses the a + // non-indexed rendering path in ANGLE (DrawArrays). This means that applyIndexBuffer() + // on the renderer will not be called and setting this buffer here ensures that the + // rendering path will contain the correct index buffers. + mDeviceContext->IASetIndexBuffer(mPointSpriteIndexBuffer, DXGI_FORMAT_R16_UINT, 0); + } } if (dirtyBuffers) { ASSERT(minDiff <= maxDiff && maxDiff < gl::MAX_VERTEX_ATTRIBS); - mDeviceContext->IASetVertexBuffers(minDiff, maxDiff - minDiff + 1, mCurrentBuffers + minDiff, - mCurrentVertexStrides + minDiff, mCurrentVertexOffsets + minDiff); + mDeviceContext->IASetVertexBuffers( + static_cast<UINT>(minDiff), static_cast<UINT>(maxDiff - minDiff + 1), + mCurrentBuffers + minDiff, mCurrentVertexStrides + minDiff, + mCurrentVertexOffsets + minDiff); } return gl::Error(GL_NO_ERROR); } -std::size_t InputLayoutCache::hashInputLayout(const InputLayoutKey &inputLayout) +gl::Error InputLayoutCache::updateVertexOffsetsForPointSpritesEmulation(GLsizei emulatedInstanceId) { - static const unsigned int seed = 0xDEADBEEF; + size_t reservedBuffers = GetReservedBufferCount(true); + for (size_t attribIndex = 0; attribIndex < mUnsortedAttributesCount; ++attribIndex) + { + const auto &attrib = *mSortedAttributes[attribIndex]; + size_t bufferIndex = reservedBuffers + attribIndex; + + if (attrib.active && attrib.divisor > 0) + { + mCurrentVertexOffsets[bufferIndex] = + attrib.offset + (attrib.stride * (emulatedInstanceId / attrib.divisor)); + } + } + + mDeviceContext->IASetVertexBuffers(0, gl::MAX_VERTEX_ATTRIBS, mCurrentBuffers, + mCurrentVertexStrides, mCurrentVertexOffsets); - std::size_t hash = 0; - MurmurHash3_x86_32(inputLayout.begin(), inputLayout.end() - inputLayout.begin(), seed, &hash); - return hash; + return gl::Error(GL_NO_ERROR); } -bool InputLayoutCache::compareInputLayouts(const InputLayoutKey &a, const InputLayoutKey &b) +gl::Error InputLayoutCache::updateInputLayout(gl::Program *program, + GLenum mode, + const SortedAttribArray &sortedAttributes, + const SortedIndexArray &sortedSemanticIndices, + size_t attribCount, + GLsizei numIndicesPerInstance) { - if (a.elementCount != b.elementCount) + const std::vector<sh::Attribute> &shaderAttributes = program->getAttributes(); + PackedAttributeLayout layout; + + ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program); + bool programUsesInstancedPointSprites = + programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation(); + bool instancedPointSpritesActive = programUsesInstancedPointSprites && (mode == GL_POINTS); + + if (programUsesInstancedPointSprites) + { + layout.flags |= PackedAttributeLayout::FLAG_USES_INSTANCED_SPRITES; + } + + if (instancedPointSpritesActive) { - return false; + layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_SPRITES_ACTIVE; } - return std::equal(a.begin(), a.end(), b.begin()); + if (numIndicesPerInstance > 0) + { + layout.flags |= PackedAttributeLayout::FLAG_INSTANCED_RENDERING_ACTIVE; + } + + const auto &semanticToLocation = programD3D->getAttributesByLayout(); + + for (size_t attribIndex = 0; attribIndex < attribCount; ++attribIndex) + { + const auto &attrib = *sortedAttributes[attribIndex]; + int sortedIndex = sortedSemanticIndices[attribIndex]; + + if (!attrib.active) + continue; + + gl::VertexFormatType vertexFormatType = + gl::GetVertexFormatType(*attrib.attribute, attrib.currentValueType); + + // Record the type of the associated vertex shader vector in our key + // This will prevent mismatched vertex shaders from using the same input layout + GLenum glslElementType = + GetGLSLAttributeType(shaderAttributes, semanticToLocation[sortedIndex]); + + layout.addAttributeData(glslElementType, sortedIndex, vertexFormatType, attrib.divisor); + } + + ID3D11InputLayout *inputLayout = nullptr; + if (layout.numAttributes > 0 || layout.flags != 0) + { + auto layoutMapIt = mLayoutMap.find(layout); + if (layoutMapIt != mLayoutMap.end()) + { + inputLayout = layoutMapIt->second; + } + else + { + gl::Error error = + createInputLayout(sortedAttributes, sortedSemanticIndices, attribCount, mode, + program, numIndicesPerInstance, &inputLayout); + if (error.isError()) + { + return error; + } + if (mLayoutMap.size() >= mCacheSize) + { + TRACE("Overflowed the limit of %u input layouts, purging half the cache.", + mCacheSize); + + // Randomly release every second element + auto it = mLayoutMap.begin(); + while (it != mLayoutMap.end()) + { + it++; + if (it != mLayoutMap.end()) + { + // c++11 erase allows us to easily delete the current iterator. + SafeRelease(it->second); + it = mLayoutMap.erase(it); + } + } + } + + mLayoutMap[layout] = inputLayout; + } + } + + if (inputLayout != mCurrentIL) + { + mDeviceContext->IASetInputLayout(inputLayout); + mCurrentIL = inputLayout; + } + + return gl::Error(GL_NO_ERROR); } +gl::Error InputLayoutCache::createInputLayout(const SortedAttribArray &sortedAttributes, + const SortedIndexArray &sortedSemanticIndices, + size_t attribCount, + GLenum mode, + gl::Program *program, + GLsizei numIndicesPerInstance, + ID3D11InputLayout **inputLayoutOut) +{ + ProgramD3D *programD3D = GetImplAs<ProgramD3D>(program); + + bool programUsesInstancedPointSprites = + programD3D->usesPointSize() && programD3D->usesInstancedPointSpriteEmulation(); + + unsigned int inputElementCount = 0; + std::array<D3D11_INPUT_ELEMENT_DESC, gl::MAX_VERTEX_ATTRIBS> inputElements; + + for (size_t attribIndex = 0; attribIndex < attribCount; ++attribIndex) + { + const auto &attrib = *sortedAttributes[attribIndex]; + const int sortedIndex = sortedSemanticIndices[attribIndex]; + + if (!attrib.active) + continue; + + D3D11_INPUT_CLASSIFICATION inputClass = + attrib.divisor > 0 ? D3D11_INPUT_PER_INSTANCE_DATA : D3D11_INPUT_PER_VERTEX_DATA; + + const auto &vertexFormatType = + gl::GetVertexFormatType(*attrib.attribute, attrib.currentValueType); + const auto &vertexFormatInfo = d3d11::GetVertexFormatInfo(vertexFormatType, mFeatureLevel); + + auto *inputElement = &inputElements[inputElementCount]; + + inputElement->SemanticName = "TEXCOORD"; + inputElement->SemanticIndex = sortedIndex; + inputElement->Format = vertexFormatInfo.nativeFormat; + inputElement->InputSlot = static_cast<UINT>(attribIndex); + inputElement->AlignedByteOffset = 0; + inputElement->InputSlotClass = inputClass; + inputElement->InstanceDataStepRate = attrib.divisor; + + inputElementCount++; + } + + // Instanced PointSprite emulation requires additional entries in the + // inputlayout to support the vertices that make up the pointsprite quad. + // We do this even if mode != GL_POINTS, since the shader signature has these inputs, and the + // input layout must match the shader + if (programUsesInstancedPointSprites) + { + // On 9_3, we must ensure that slot 0 contains non-instanced data. + // If slot 0 currently contains instanced data then we swap it with a non-instanced element. + // Note that instancing is only available on 9_3 via ANGLE_instanced_arrays, since 9_3 + // doesn't support OpenGL ES 3.0. + // As per the spec for ANGLE_instanced_arrays, not all attributes can be instanced + // simultaneously, so a non-instanced element must exist. + for (size_t elementIndex = 0; elementIndex < inputElementCount; ++elementIndex) + { + if (sortedAttributes[elementIndex]->active) + { + // If rendering points and instanced pointsprite emulation is being used, the + // inputClass is required to be configured as per instance data + if (mode == GL_POINTS) + { + inputElements[elementIndex].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA; + inputElements[elementIndex].InstanceDataStepRate = 1; + if (numIndicesPerInstance > 0 && sortedAttributes[elementIndex]->divisor > 0) + { + inputElements[elementIndex].InstanceDataStepRate = numIndicesPerInstance; + } + } + inputElements[elementIndex].InputSlot++; + } + } + + inputElements[inputElementCount].SemanticName = "SPRITEPOSITION"; + inputElements[inputElementCount].SemanticIndex = 0; + inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32B32_FLOAT; + inputElements[inputElementCount].InputSlot = 0; + inputElements[inputElementCount].AlignedByteOffset = 0; + inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + inputElements[inputElementCount].InstanceDataStepRate = 0; + inputElementCount++; + + inputElements[inputElementCount].SemanticName = "SPRITETEXCOORD"; + inputElements[inputElementCount].SemanticIndex = 0; + inputElements[inputElementCount].Format = DXGI_FORMAT_R32G32_FLOAT; + inputElements[inputElementCount].InputSlot = 0; + inputElements[inputElementCount].AlignedByteOffset = sizeof(float) * 3; + inputElements[inputElementCount].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + inputElements[inputElementCount].InstanceDataStepRate = 0; + inputElementCount++; + } + + const gl::InputLayout &shaderInputLayout = GetInputLayout(sortedAttributes, attribCount); + + ShaderExecutableD3D *shader = nullptr; + gl::Error error = + programD3D->getVertexExecutableForInputLayout(shaderInputLayout, &shader, nullptr); + if (error.isError()) + { + return error; + } + + ShaderExecutableD3D *shader11 = GetAs<ShaderExecutable11>(shader); + + HRESULT result = + mDevice->CreateInputLayout(inputElements.data(), inputElementCount, shader11->getFunction(), + shader11->getLength(), inputLayoutOut); + if (FAILED(result)) + { + return gl::Error(GL_OUT_OF_MEMORY, + "Failed to create internal input layout, HRESULT: 0x%08x", result); + } + + return gl::Error(GL_NO_ERROR); } + +} // namespace rx |