diff --git a/src/Device/Renderer.cpp b/src/Device/Renderer.cpp index de4075d652a9c..765e59485bf98 100644 --- a/src/Device/Renderer.cpp +++ b/src/Device/Renderer.cpp @@ -314,7 +314,7 @@ namespace sw for(int i = 0; i < MAX_VERTEX_INPUTS; i++) { data->input[i] = context->input[i].buffer; - data->stride[i] = context->input[i].stride; + data->stride[i] = context->input[i].vertexStride; } if(context->indexBuffer) @@ -322,7 +322,7 @@ namespace sw data->indices = context->indexBuffer; } - if(context->vertexShader->hasBuiltinInput(spv::BuiltInInstanceId)) + if(context->vertexShader->hasBuiltinInput(spv::BuiltInInstanceIndex)) { data->instanceID = context->instanceID; } @@ -1550,6 +1550,19 @@ namespace sw queries.remove(query); } + void Renderer::advanceInstanceAttributes() + { + for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++) + { + auto &attrib = context->input[i]; + if (attrib.count && attrib.instanceStride) + { + // Under the casts: attrib.buffer += attrib.instanceStride + attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride); + } + } + } + #if PERF_HUD int Renderer::getThreadCount() { diff --git a/src/Device/Renderer.hpp b/src/Device/Renderer.hpp index e51b78897cf9e..b65cea4f65014 100644 --- a/src/Device/Renderer.hpp +++ b/src/Device/Renderer.hpp @@ -250,6 +250,8 @@ namespace sw void addQuery(Query *query); void removeQuery(Query *query); + void advanceInstanceAttributes(); + void synchronize(); #if PERF_HUD diff --git a/src/Device/Stream.hpp b/src/Device/Stream.hpp index 54841e3684161..b9ff604a27114 100644 --- a/src/Device/Stream.hpp +++ b/src/Device/Stream.hpp @@ -39,15 +39,17 @@ namespace sw struct StreamResource { const void *buffer; - unsigned int stride; + unsigned int vertexStride; + unsigned int instanceStride; }; struct Stream : public StreamResource { - Stream(const void *buffer = nullptr, unsigned int stride = 0) + Stream(const void *buffer = nullptr, unsigned int vertexStride = 0) { this->buffer = buffer; - this->stride = stride; + this->vertexStride = vertexStride; + this->instanceStride = 0; } Stream &define(StreamType type, unsigned int count, bool normalized = false) @@ -74,7 +76,8 @@ namespace sw static const float4 null = {0, 0, 0, 1}; buffer = &null; - stride = 0; + vertexStride = 0; + instanceStride = 0; type = STREAMTYPE_FLOAT; count = 0; normalized = false; diff --git a/src/Vulkan/VkCommandBuffer.cpp b/src/Vulkan/VkCommandBuffer.cpp index 06e1c1bffaf70..cbf928746d898 100644 --- a/src/Vulkan/VkCommandBuffer.cpp +++ b/src/Vulkan/VkCommandBuffer.cpp @@ -206,7 +206,7 @@ struct IndexBufferBind : public CommandBuffer::Command const VkIndexType indexType; }; -void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int firstVertex) +void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int firstVertex, int firstInstance) { for(uint32_t i = 0; i < MAX_VERTEX_INPUT_BINDINGS; i++) { @@ -216,7 +216,7 @@ void CommandBuffer::ExecutionState::bindVertexInputs(sw::Context& context, int f const auto &vertexInput = vertexInputBindings[attrib.binding]; Buffer *buffer = Cast(vertexInput.buffer); attrib.buffer = buffer ? buffer->getOffsetPointer( - attrib.offset + vertexInput.offset + attrib.stride * firstVertex) : nullptr; + attrib.offset + vertexInput.offset + attrib.vertexStride * firstVertex + attrib.instanceStride * firstInstance) : nullptr; } } } @@ -266,7 +266,7 @@ struct Draw : public CommandBuffer::Command executionState.pipelines[VK_PIPELINE_BIND_POINT_GRAPHICS]); sw::Context context = pipeline->getContext(); - executionState.bindVertexInputs(context, firstVertex); + executionState.bindVertexInputs(context, firstVertex, firstInstance); const auto& boundDescriptorSets = executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_GRAPHICS]; for(int i = 0; i < vk::MAX_BOUND_DESCRIPTOR_SETS; i++) @@ -284,11 +284,11 @@ struct Draw : public CommandBuffer::Command executionState.bindAttachments(); const uint32_t primitiveCount = pipeline->computePrimitiveCount(vertexCount); - const uint32_t lastInstance = firstInstance + instanceCount - 1; - for(uint32_t instance = firstInstance; instance <= lastInstance; instance++) + for(uint32_t instance = firstInstance; instance != firstInstance + instanceCount; instance++) { executionState.renderer->setInstanceID(instance); executionState.renderer->draw(context.drawType, primitiveCount); + executionState.renderer->advanceInstanceAttributes(); } } @@ -311,7 +311,8 @@ struct DrawIndexed : public CommandBuffer::Command executionState.pipelines[VK_PIPELINE_BIND_POINT_GRAPHICS]); sw::Context context = pipeline->getContext(); - executionState.bindVertexInputs(context, vertexOffset); + + executionState.bindVertexInputs(context, vertexOffset, firstInstance); const auto& boundDescriptorSets = executionState.boundDescriptorSets[VK_PIPELINE_BIND_POINT_GRAPHICS]; for(int i = 0; i < vk::MAX_BOUND_DESCRIPTOR_SETS; i++) @@ -335,11 +336,11 @@ struct DrawIndexed : public CommandBuffer::Command ? (context.drawType | sw::DRAW_INDEXED16) : (context.drawType | sw::DRAW_INDEXED32); const uint32_t primitiveCount = pipeline->computePrimitiveCount(indexCount); - const uint32_t lastInstance = firstInstance + instanceCount - 1; - for(uint32_t instance = firstInstance; instance <= lastInstance; instance++) + for(uint32_t instance = firstInstance; instance != firstInstance + instanceCount; instance++) { executionState.renderer->setInstanceID(instance); executionState.renderer->draw(static_cast(drawType), primitiveCount); + executionState.renderer->advanceInstanceAttributes(); } } diff --git a/src/Vulkan/VkCommandBuffer.hpp b/src/Vulkan/VkCommandBuffer.hpp index 4fbbe35515c38..03a0b2007cdb2 100644 --- a/src/Vulkan/VkCommandBuffer.hpp +++ b/src/Vulkan/VkCommandBuffer.hpp @@ -140,7 +140,7 @@ class CommandBuffer VkIndexType indexType; void bindAttachments(); - void bindVertexInputs(sw::Context& context, int firstVertex); + void bindVertexInputs(sw::Context& context, int firstVertex, int firstInstance); }; void submit(CommandBuffer::ExecutionState& executionState); diff --git a/src/Vulkan/VkPipeline.cpp b/src/Vulkan/VkPipeline.cpp index a0b6f2cf8790f..d3ec02b2e564f 100644 --- a/src/Vulkan/VkPipeline.cpp +++ b/src/Vulkan/VkPipeline.cpp @@ -276,15 +276,13 @@ GraphicsPipeline::GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateIn // Temporary in-binding-order representation of buffer strides, to be consumed below // when considering attributes. TODO: unfuse buffers from attributes in backend, is old GL model. - uint32_t bufferStrides[MAX_VERTEX_INPUT_BINDINGS]; + uint32_t vertexStrides[MAX_VERTEX_INPUT_BINDINGS]; + uint32_t instanceStrides[MAX_VERTEX_INPUT_BINDINGS]; for(uint32_t i = 0; i < vertexInputState->vertexBindingDescriptionCount; i++) { auto const & desc = vertexInputState->pVertexBindingDescriptions[i]; - bufferStrides[desc.binding] = desc.stride; - if(desc.inputRate != VK_VERTEX_INPUT_RATE_VERTEX) - { - UNIMPLEMENTED("vertexInputState->pVertexBindingDescriptions[%d]", i); - } + vertexStrides[desc.binding] = desc.inputRate == VK_VERTEX_INPUT_RATE_VERTEX ? desc.stride : 0; + instanceStrides[desc.binding] = desc.inputRate == VK_VERTEX_INPUT_RATE_INSTANCE ? desc.stride : 0; } for(uint32_t i = 0; i < vertexInputState->vertexAttributeDescriptionCount; i++) @@ -296,7 +294,8 @@ GraphicsPipeline::GraphicsPipeline(const VkGraphicsPipelineCreateInfo* pCreateIn input.normalized = !vk::Format(desc.format).isNonNormalizedInteger(); input.offset = desc.offset; input.binding = desc.binding; - input.stride = bufferStrides[desc.binding]; + input.vertexStride = vertexStrides[desc.binding]; + input.instanceStride = instanceStrides[desc.binding]; } const VkPipelineInputAssemblyStateCreateInfo* assemblyState = pCreateInfo->pInputAssemblyState;