diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 122585e595..d774386cd5 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -3102,6 +3102,8 @@ Vector RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec /* SHADER_STAGE_TESSELATION_CONTROL */ MESA_SHADER_TESS_CTRL, /* SHADER_STAGE_TESSELATION_EVALUATION */ MESA_SHADER_TESS_EVAL, /* SHADER_STAGE_COMPUTE */ MESA_SHADER_COMPUTE, + /* SHADER_STAGE_MESH_TASK */ MESA_SHADER_TASK, + /* SHADER_STAGE_MESH */ MESA_SHADER_MESH, }; nir_shader *shader = spirv_to_nir( @@ -3494,15 +3496,19 @@ Vector RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(Vec D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags = D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS; if (!stages_processed.has_flag(SHADER_STAGE_VERTEX_BIT)) { root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS; } if (!stages_processed.has_flag(SHADER_STAGE_FRAGMENT_BIT)) { root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; } + if (!stages_processed.has_flag(SHADER_STAGE_MESH_TASK_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS; + } + if (!stages_processed.has_flag(SHADER_STAGE_MESH_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + } if (binary_data.vertex_input_mask) { root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; } @@ -5417,6 +5423,29 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); } +void RenderingDeviceDriverD3D12::command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + ((ID3D12GraphicsCommandList6 *)cmd_buf_info->cmd_list.Get())->DispatchMesh(p_x_groups, p_y_groups, p_z_groups); +} + +void RenderingDeviceDriverD3D12::command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch_mesh.Get(), p_draw_count, indirect_buf_info->resource, p_offset, nullptr, 0); +} + +void RenderingDeviceDriverD3D12::command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; + BufferInfo *indirect_buf_info = (BufferInfo *)p_indirect_buffer.id; + BufferInfo *count_buf_info = (BufferInfo *)p_count_buffer.id; + _resource_transition_batch(indirect_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transition_batch(count_buf_info, 0, 1, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + _resource_transitions_flush(cmd_buf_info->cmd_list.Get()); + cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.dispatch_mesh.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); +} + void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; @@ -6160,6 +6189,18 @@ uint64_t RenderingDeviceDriverD3D12::limit_get(Limit p_limit) { return D3D12_CS_THREAD_GROUP_MAX_Y; case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z: return D3D12_CS_THREAD_GROUP_MAX_Z; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_X: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Y: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Z: + return MeshShaderCapabilities::MAX_THREAD_GROUPS; case LIMIT_SUBGROUP_SIZE: // Note in min/max. Shader model 6.6 supports it (see https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_6_WaveSize.html), // but at this time I don't know the implications on the transpilation to DXIL, etc. @@ -6214,6 +6255,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { return vrs_capabilities.ss_image_supported; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_MESH_SHADER: + return mesh_shader_capabilities.is_supported; default: return false; } @@ -6492,6 +6535,14 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { } } + D3D12_FEATURE_DATA_D3D12_OPTIONS7 options7 = {}; + res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS7, &options7, sizeof(options7)); + if (SUCCEEDED(res)) { + if (options7.MeshShaderTier >= D3D12_MESH_SHADER_TIER_1) { + mesh_shader_capabilities.is_supported = true; + } + } + D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = {}; res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); if (SUCCEEDED(res)) { @@ -6539,6 +6590,12 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { print_verbose("- Relaxed casting not supported"); } + if (mesh_shader_capabilities.is_supported) { + print_verbose("- D3D12 Mesh Shader supported"); + } else { + print_verbose("- D3D12 Mesh Shader not supported"); + } + print_verbose(String("- D3D12 16-bit ops supported: ") + (shader_capabilities.native_16bit_ops ? "yes" : "no")); if (misc_features_support.depth_bounds_supported) { @@ -6650,6 +6707,9 @@ Error RenderingDeviceDriverD3D12::_initialize_command_signatures() { err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, sizeof(D3D12_DISPATCH_ARGUMENTS), &indirect_cmd_signatures.dispatch); ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + err = create_command_signature(device.Get(), D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH, sizeof(D3D12_DISPATCH_MESH_ARGUMENTS), &indirect_cmd_signatures.dispatch_mesh); + ERR_FAIL_COND_V(err != OK, ERR_CANT_CREATE); + return OK; } diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index c45a40ff73..74e7fd8737 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -143,6 +143,11 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { bool depth_bounds_supported = false; }; + struct MeshShaderCapabilities { + static const uint32_t MAX_THREAD_GROUPS = 63999; // Quoting the DirectX Mesh Shader Spec: "Each of the three thread group counts must be less than 64k" so ok... + bool is_supported = false; + }; + RenderingContextDriverD3D12 *context_driver = nullptr; RenderingContextDriver::Device context_device; ComPtr adapter; @@ -198,6 +203,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { ComPtr draw; ComPtr draw_indexed; ComPtr dispatch; + ComPtr dispatch_mesh; } indirect_cmd_signatures; static void STDMETHODCALLTYPE _debug_message_func(D3D12_MESSAGE_CATEGORY p_category, D3D12_MESSAGE_SEVERITY p_severity, D3D12_MESSAGE_ID p_id, LPCSTR p_description, void *p_context); @@ -854,6 +860,11 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Mesh Shader Drawing. + virtual void command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final; + virtual void command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; + virtual void command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Buffer binding. virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 97fd156584..495d1bd43f 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -497,6 +497,7 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_KHR_MAINTENANCE_2_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + _register_requested_device_extension(VK_EXT_MESH_SHADER_EXTENSION_NAME, false); if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) { _register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true); @@ -667,6 +668,7 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; + VkPhysicalDeviceMeshShaderFeaturesEXT mesh_shader_features = {}; const bool use_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2; if (use_1_2_features) { @@ -703,6 +705,12 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_features = &pipeline_cache_control_features; } + if (enabled_device_extension_names.has(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + mesh_shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT; + mesh_shader_features.pNext = next_features; + next_features = &mesh_shader_features; + } + VkPhysicalDeviceFeatures2 device_features_2 = {}; device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; device_features_2.pNext = next_features; @@ -745,6 +753,14 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { if (enabled_device_extension_names.has(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) { pipeline_cache_control_support = pipeline_cache_control_features.pipelineCreationCacheControl; } + + if (enabled_device_extension_names.has(VK_EXT_MESH_SHADER_EXTENSION_NAME)) { + mesh_shader_capabilities.task_shader_is_supported = mesh_shader_features.taskShader; + mesh_shader_capabilities.mesh_shader_is_supported = mesh_shader_features.meshShader; + mesh_shader_capabilities.multiview_mesh_shader_is_supported = mesh_shader_features.multiviewMeshShader; + mesh_shader_capabilities.primitive_fragment_shading_rate_mesh_shader_is_supported = mesh_shader_features.primitiveFragmentShadingRateMeshShader; + mesh_shader_capabilities.mesh_shader_queries_is_supported = mesh_shader_features.meshShaderQueries; + } } if (functions.GetPhysicalDeviceProperties2 != nullptr) { @@ -754,6 +770,7 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; VkPhysicalDeviceProperties2 physical_device_properties_2 = {}; + VkPhysicalDeviceMeshShaderPropertiesEXT mesh_shader_properties = {}; const bool use_1_1_properties = physical_device_properties.apiVersion >= VK_API_VERSION_1_1; if (use_1_1_properties) { @@ -781,6 +798,12 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_properties = &vrs_properties; } + if (mesh_shader_capabilities.task_shader_is_supported || mesh_shader_capabilities.mesh_shader_is_supported) { + mesh_shader_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT; + mesh_shader_properties.pNext = next_properties; + next_properties = &mesh_shader_properties; + } + physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; physical_device_properties_2.pNext = next_properties; functions.GetPhysicalDeviceProperties2(physical_device, &physical_device_properties_2); @@ -839,6 +862,19 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { print_verbose("- Vulkan multiview not supported"); } + if (mesh_shader_capabilities.task_shader_is_supported || mesh_shader_capabilities.mesh_shader_is_supported) { + mesh_shader_capabilities.max_task_work_group_count[0] = mesh_shader_properties.maxTaskWorkGroupCount[0]; + mesh_shader_capabilities.max_task_work_group_count[1] = mesh_shader_properties.maxTaskWorkGroupCount[1]; + mesh_shader_capabilities.max_task_work_group_count[2] = mesh_shader_properties.maxTaskWorkGroupCount[2]; + mesh_shader_capabilities.max_mesh_work_group_count[0] = mesh_shader_properties.maxMeshWorkGroupCount[0]; + mesh_shader_capabilities.max_mesh_work_group_count[1] = mesh_shader_properties.maxMeshWorkGroupCount[1]; + mesh_shader_capabilities.max_mesh_work_group_count[2] = mesh_shader_properties.maxMeshWorkGroupCount[2]; + + print_verbose("- Vulkan Mesh Shader supported:"); + } else { + print_verbose("- Vulkan Mesh Shader not supported"); + } + print_verbose("- Vulkan subgroup:"); print_verbose(" size: " + itos(subgroup_capabilities.size)); print_verbose(" min size: " + itos(subgroup_capabilities.min_size)); @@ -913,6 +949,18 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVectorvk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } +void RenderingDeviceDriverVulkan::command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + vkCmdDrawMeshTasksEXT((VkCommandBuffer)p_cmd_buffer.id, p_x_groups, p_y_groups, p_z_groups); +} + +void RenderingDeviceDriverVulkan::command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) { + const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id; + vkCmdDrawMeshTasksIndirectEXT((VkCommandBuffer)p_cmd_buffer.id, buf_info->vk_buffer, p_offset, p_draw_count, p_stride); +} + +void RenderingDeviceDriverVulkan::command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) { + const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id; + const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id; + vkCmdDrawMeshTasksIndirectCountEXT((VkCommandBuffer)p_cmd_buffer.id, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); +} + void RenderingDeviceDriverVulkan::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { VkBuffer *vk_buffers = ALLOCA_ARRAY(VkBuffer, p_binding_count); for (uint32_t i = 0; i < p_binding_count; i++) { @@ -4927,6 +4992,18 @@ uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) { return limits.maxViewportDimensions[0]; case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y: return limits.maxViewportDimensions[1]; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X: + return mesh_shader_capabilities.max_task_work_group_count[0]; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y: + return mesh_shader_capabilities.max_task_work_group_count[1]; + case LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z: + return mesh_shader_capabilities.max_task_work_group_count[2]; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_X: + return mesh_shader_capabilities.max_mesh_work_group_count[0]; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Y: + return mesh_shader_capabilities.max_mesh_work_group_count[1]; + case LIMIT_MAX_MESH_WORKGROUP_COUNT_Z: + return mesh_shader_capabilities.max_mesh_work_group_count[2]; case LIMIT_SUBGROUP_SIZE: return subgroup_capabilities.size; case LIMIT_SUBGROUP_MIN_SIZE: @@ -4971,6 +5048,8 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { return vrs_capabilities.attachment_vrs_supported && physical_device_features.shaderStorageImageExtendedFormats; case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS: return true; + case SUPPORTS_MESH_SHADER: + return mesh_shader_capabilities.task_shader_is_supported && mesh_shader_capabilities.mesh_shader_is_supported; default: return false; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 6847ae00be..815f24a433 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -104,6 +104,17 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { bool storage_input_output_16 = false; }; + struct MeshShaderCapabilities { + bool task_shader_is_supported = false; + bool mesh_shader_is_supported = false; + bool multiview_mesh_shader_is_supported = false; + bool primitive_fragment_shading_rate_mesh_shader_is_supported = false; + bool mesh_shader_queries_is_supported = false; + + uint32_t max_task_work_group_count[3] = { 0, 0, 0 }; + uint32_t max_mesh_work_group_count[3] = { 0, 0, 0 }; + }; + struct DeviceFunctions { PFN_vkCreateSwapchainKHR CreateSwapchainKHR = nullptr; PFN_vkDestroySwapchainKHR DestroySwapchainKHR = nullptr; @@ -131,6 +142,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { VRSCapabilities vrs_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; + MeshShaderCapabilities mesh_shader_capabilities; bool pipeline_cache_control_support = false; DeviceFunctions device_functions; @@ -541,6 +553,11 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Mesh Shader Drawing. + virtual void command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) override final; + virtual void command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) override final; + virtual void command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; + // Buffer binding. virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; diff --git a/editor/plugins/shader_file_editor_plugin.cpp b/editor/plugins/shader_file_editor_plugin.cpp index 05919fb0f6..77f853d62f 100644 --- a/editor/plugins/shader_file_editor_plugin.cpp +++ b/editor/plugins/shader_file_editor_plugin.cpp @@ -271,7 +271,9 @@ ShaderFileEditor::ShaderFileEditor() { "Fragment", "TessControl", "TessEval", - "Compute" + "Compute", + "MeshTask", + "Mesh" }; stage_hb = memnew(HBoxContainer); diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index b5f70fb98b..302f3217c8 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -48,7 +48,9 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage EShLangFragment, EShLangTessControl, EShLangTessEvaluation, - EShLangCompute + EShLangCompute, + EShLangTask, + EShLangMesh }; int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100 diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 7af0201a75..1dd666f16d 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -2791,6 +2791,12 @@ RID RenderingDevice::shader_create_from_bytecode(const Vector &p_shader case SHADER_STAGE_COMPUTE: shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); break; + case SHADER_STAGE_MESH_TASK: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_MESH_TASK_SHADER_BIT); + break; + case SHADER_STAGE_MESH: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_MESH_SHADER_BIT); + break; default: DEV_ASSERT(false && "Unknown shader stage."); break; @@ -4240,6 +4246,164 @@ void RenderingDevice::draw_list_draw(DrawListID p_list, bool p_use_indices, uint dl->state.draw_count++; } +void RenderingDevice::draw_list_dispatch_mesh(DrawListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!has_feature(SUPPORTS_MESH_SHADER), + "The GPU doesn't support Mesh Shaders, its your responsibility to check it does before calling this."); +#endif + DrawList *dl = _get_draw_list_ptr(p_list); + ERR_FAIL_NULL(dl); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_x_groups == 0, "Dispatch amount of X mesh/task groups (" + itos(p_x_groups) + ") is zero."); + ERR_FAIL_COND_MSG(p_y_groups == 0, "Dispatch amount of Y mesh/task groups (" + itos(p_y_groups) + ") is zero."); + ERR_FAIL_COND_MSG(p_z_groups == 0, "Dispatch amount of Z mesh/task groups (" + itos(p_z_groups) + ") is zero."); + + const Shader *shader = shader_owner.get_or_null(dl->state.pipeline_shader); + ERR_FAIL_NULL(shader); + if (shader->stage_bits.has_flag(RDD::PIPELINE_STAGE_MESH_TASK_SHADER_BIT)) { + ERR_FAIL_COND_MSG(p_x_groups > driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X), + "Dispatch amount of X task groups (" + itos(p_x_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X)) + ")"); + ERR_FAIL_COND_MSG(p_y_groups > driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y), + "Dispatch amount of Y task groups (" + itos(p_y_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y)) + ")"); + ERR_FAIL_COND_MSG(p_z_groups > driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z), + "Dispatch amount of Z task groups (" + itos(p_z_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z)) + ")"); + } else if (shader->stage_bits.has_flag(RDD::PIPELINE_STAGE_MESH_SHADER_BIT)) { + ERR_FAIL_COND_MSG(p_x_groups > driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_X), + "Dispatch amount of X mesh groups (" + itos(p_x_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_X)) + ")"); + ERR_FAIL_COND_MSG(p_y_groups > driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Y), + "Dispatch amount of Y mesh groups (" + itos(p_y_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Y)) + ")"); + ERR_FAIL_COND_MSG(p_z_groups > driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Z), + "Dispatch amount of Z mesh groups (" + itos(p_z_groups) + ") is larger than device limit (" + itos(driver->limit_get(LIMIT_MAX_MESH_WORKGROUP_COUNT_Z)) + ")"); + } else { + ERR_FAIL_MSG("Unexpected pipeline stage."); + } + + ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); +#endif + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.pipeline_active, + "No render pipeline was set before attempting to draw."); + + if (dl->validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + // Bind descriptor sets. + + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } +#ifdef DEBUG_ENABLED + if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) { + if (dl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); + } else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } + } +#endif + draw_graph.add_draw_list_uniform_set_prepare_for_use(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + } + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!dl->state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + dl->state.sets[i].bound = true; + } + } + + draw_graph.add_draw_list_dispatch_mesh(p_x_groups, p_y_groups, p_z_groups); +} + +void RenderingDevice::draw_list_dispatch_mesh_indirect(DrawListID p_list, RID p_buffer, uint32_t p_offset) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!has_feature(SUPPORTS_MESH_SHADER), + "The GPU doesn't support Mesh Shaders, its your responsibility to check it does before calling this."); +#endif + DrawList *dl = _get_draw_list_ptr(p_list); + ERR_FAIL_NULL(dl); +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.active, "Submitted Draw Lists can no longer be modified."); +#endif + + Buffer *buffer = storage_buffer_owner.get_or_null(p_buffer); + ERR_FAIL_NULL(buffer); + + ERR_FAIL_COND_MSG(!buffer->usage.has_flag(RDD::BUFFER_USAGE_INDIRECT_BIT), "Buffer provided was not created to do indirect dispatch."); + + ERR_FAIL_COND_MSG(p_offset + 12 > buffer->size, "Offset provided (+12) is past the end of buffer."); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(!dl->validation.pipeline_active, + "No render pipeline was set before attempting to draw."); + + if (dl->validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!dl->validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + + // Bind descriptor sets. + + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } +#ifdef DEBUG_ENABLED + if (dl->state.sets[i].pipeline_expected_format != dl->state.sets[i].uniform_set_format) { + if (dl->state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline"); + } else if (uniform_set_owner.owns(dl->state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(dl->state.pipeline_shader)); + } + } +#endif + draw_graph.add_draw_list_uniform_set_prepare_for_use(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + } + for (uint32_t i = 0; i < dl->state.set_count; i++) { + if (dl->state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!dl->state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_draw_list_bind_uniform_set(dl->state.pipeline_shader_driver_id, dl->state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(dl->state.sets[i].uniform_set); + draw_graph.add_draw_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + dl->state.sets[i].bound = true; + } + } + + draw_graph.add_draw_list_dispatch_mesh_indirect(buffer->driver_id, p_offset); + + if (buffer->draw_tracker != nullptr) { + draw_graph.add_draw_list_usage(buffer->draw_tracker, RDG::RESOURCE_USAGE_INDIRECT_BUFFER_READ); + } +} + void RenderingDevice::draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect) { DrawList *dl = _get_draw_list_ptr(p_list); @@ -6020,6 +6184,8 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_set_push_constant", "draw_list", "buffer", "size_bytes"), &RenderingDevice::_draw_list_set_push_constant); ClassDB::bind_method(D_METHOD("draw_list_draw", "draw_list", "use_indices", "instances", "procedural_vertex_count"), &RenderingDevice::draw_list_draw, DEFVAL(0)); + ClassDB::bind_method(D_METHOD("draw_list_dispatch_mesh", "draw_list", "x_groups", "y_groups", "z_groups"), &RenderingDevice::draw_list_dispatch_mesh); + ClassDB::bind_method(D_METHOD("draw_list_dispatch_mesh_indirect", "draw_list", "buffer", "offset"), &RenderingDevice::draw_list_dispatch_mesh_indirect); ClassDB::bind_method(D_METHOD("draw_list_enable_scissor", "draw_list", "rect"), &RenderingDevice::draw_list_enable_scissor, DEFVAL(Rect2())); ClassDB::bind_method(D_METHOD("draw_list_disable_scissor", "draw_list"), &RenderingDevice::draw_list_disable_scissor); @@ -6049,6 +6215,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("get_captured_timestamp_cpu_time", "index"), &RenderingDevice::get_captured_timestamp_cpu_time); ClassDB::bind_method(D_METHOD("get_captured_timestamp_name", "index"), &RenderingDevice::get_captured_timestamp_name); + ClassDB::bind_method(D_METHOD("has_feature", "feature"), &RenderingDevice::has_feature); ClassDB::bind_method(D_METHOD("limit_get", "limit"), &RenderingDevice::limit_get); ClassDB::bind_method(D_METHOD("get_frame_delay"), &RenderingDevice::get_frame_delay); ClassDB::bind_method(D_METHOD("submit"), &RenderingDevice::submit); @@ -6541,12 +6708,16 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH_TASK); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH); BIND_ENUM_CONSTANT(SHADER_STAGE_MAX); BIND_ENUM_CONSTANT(SHADER_STAGE_VERTEX_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_FRAGMENT_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH_TASK_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MESH_BIT); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_GLSL); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_HLSL); @@ -6555,6 +6726,8 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT); BIND_ENUM_CONSTANT(PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT); + BIND_ENUM_CONSTANT(SUPPORTS_MESH_SHADER); + BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS); BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS); BIND_ENUM_CONSTANT(LIMIT_MAX_TEXTURES_PER_UNIFORM_SET); @@ -6590,6 +6763,12 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X); BIND_ENUM_CONSTANT(LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y); BIND_ENUM_CONSTANT(LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_WORKGROUP_COUNT_X); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_WORKGROUP_COUNT_Y); + BIND_ENUM_CONSTANT(LIMIT_MAX_MESH_WORKGROUP_COUNT_Z); BIND_ENUM_CONSTANT(LIMIT_MAX_VIEWPORT_DIMENSIONS_X); BIND_ENUM_CONSTANT(LIMIT_MAX_VIEWPORT_DIMENSIONS_Y); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 5846f85ee0..1e2b23757a 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -1164,6 +1164,9 @@ class RenderingDevice : public RenderingDeviceCommons { void draw_list_draw(DrawListID p_list, bool p_use_indices, uint32_t p_instances = 1, uint32_t p_procedural_vertices = 0); + void draw_list_dispatch_mesh(DrawListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + void draw_list_dispatch_mesh_indirect(DrawListID p_list, RID p_buffer, uint32_t p_offset); + void draw_list_enable_scissor(DrawListID p_list, const Rect2 &p_rect); void draw_list_disable_scissor(DrawListID p_list); diff --git a/servers/rendering/rendering_device_binds.cpp b/servers/rendering/rendering_device_binds.cpp index 1985573b22..302e70b14d 100644 --- a/servers/rendering/rendering_device_binds.cpp +++ b/servers/rendering/rendering_device_binds.cpp @@ -36,7 +36,7 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String Vector lines = p_text.split("\n"); bool reading_versions = false; - bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false }; + bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false, false, false }; RD::ShaderStage stage = RD::SHADER_STAGE_MAX; static const char *stage_str[RD::SHADER_STAGE_MAX] = { "vertex", @@ -44,6 +44,8 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String "tesselation_control", "tesselation_evaluation", "compute", + "mesh_task", + "mesh", }; String stage_code[RD::SHADER_STAGE_MAX]; int stages_found = 0; diff --git a/servers/rendering/rendering_device_binds.h b/servers/rendering/rendering_device_binds.h index 4d9b565080..dd2243eb58 100644 --- a/servers/rendering/rendering_device_binds.h +++ b/servers/rendering/rendering_device_binds.h @@ -262,6 +262,8 @@ class RDShaderSource : public RefCounted { ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_control"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_evaluation"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_compute"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_mesh_task"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_MESH_TASK); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_mesh"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_MESH); ADD_GROUP("Syntax", "source_"); ADD_PROPERTY(PropertyInfo(Variant::INT, "language", PROPERTY_HINT_RANGE, "GLSL,HLSL"), "set_language", "get_language"); } @@ -321,12 +323,16 @@ class RDShaderSPIRV : public Resource { ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_control"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_evaluation"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_compute"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_mesh_task"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_MESH_TASK); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_mesh"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_MESH); ADD_GROUP("Compile Error", "compile_error_"); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_vertex"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_VERTEX); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_fragment"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_FRAGMENT); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_control"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_evaluation"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_compute"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_mesh_task"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_MESH_TASK); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_mesh"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_MESH); } }; @@ -389,7 +395,9 @@ class RDShaderFile : public Resource { "fragment", "tesselation_control", "tesselation_evaluation", - "compute" + "compute", + "mesh_task", + "mesh" }; ERR_PRINT("Error parsing shader '" + p_file + "', version '" + String(E.key) + "', stage '" + stage_str[i] + "':\n\n" + error); diff --git a/servers/rendering/rendering_device_commons.cpp b/servers/rendering/rendering_device_commons.cpp index 4dbd0e3964..5c49adc7d8 100644 --- a/servers/rendering/rendering_device_commons.cpp +++ b/servers/rendering/rendering_device_commons.cpp @@ -910,4 +910,6 @@ const char *RenderingDeviceCommons::SHADER_STAGE_NAMES[SHADER_STAGE_MAX] = { "TesselationControl", "TesselationEvaluation", "Compute", + "MeshTask", + "Mesh", }; diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 918bf9b834..a6adb2469e 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -466,12 +466,16 @@ class RenderingDeviceCommons : public Object { SHADER_STAGE_TESSELATION_CONTROL, SHADER_STAGE_TESSELATION_EVALUATION, SHADER_STAGE_COMPUTE, + SHADER_STAGE_MESH_TASK, + SHADER_STAGE_MESH, SHADER_STAGE_MAX, SHADER_STAGE_VERTEX_BIT = (1 << SHADER_STAGE_VERTEX), SHADER_STAGE_FRAGMENT_BIT = (1 << SHADER_STAGE_FRAGMENT), SHADER_STAGE_TESSELATION_CONTROL_BIT = (1 << SHADER_STAGE_TESSELATION_CONTROL), SHADER_STAGE_TESSELATION_EVALUATION_BIT = (1 << SHADER_STAGE_TESSELATION_EVALUATION), SHADER_STAGE_COMPUTE_BIT = (1 << SHADER_STAGE_COMPUTE), + SHADER_STAGE_MESH_TASK_BIT = (1 << SHADER_STAGE_MESH_TASK), + SHADER_STAGE_MESH_BIT = (1 << SHADER_STAGE_MESH), }; struct ShaderStageSPIRVData { @@ -801,6 +805,12 @@ class RenderingDeviceCommons : public Object { LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X, LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y, LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z, + LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_X, + LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Y, + LIMIT_MAX_MESH_TASK_WORKGROUP_COUNT_Z, + LIMIT_MAX_MESH_WORKGROUP_COUNT_X, + LIMIT_MAX_MESH_WORKGROUP_COUNT_Y, + LIMIT_MAX_MESH_WORKGROUP_COUNT_Z, LIMIT_MAX_VIEWPORT_DIMENSIONS_X, LIMIT_MAX_VIEWPORT_DIMENSIONS_Y, LIMIT_SUBGROUP_SIZE, @@ -820,6 +830,7 @@ class RenderingDeviceCommons : public Object { SUPPORTS_ATTACHMENT_VRS, // If not supported, a fragment shader with only side effets (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver. SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, + SUPPORTS_MESH_SHADER, }; enum SubgroupOperations { diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 0b5fc51a1d..2960f43d11 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -325,7 +325,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { PIPELINE_STAGE_RESOLVE_BIT = (1 << 14), PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), - PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), + PIPELINE_STAGE_MESH_TASK_SHADER_BIT = (1 << 17), + PIPELINE_STAGE_MESH_SHADER_BIT = (1 << 18), }; enum BarrierAccessBits { @@ -644,6 +645,11 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual void command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0; virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0; + // Mesh Shader Drawing. + virtual void command_render_dispatch_mesh(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0; + virtual void command_render_dispatch_mesh_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) = 0; + virtual void command_render_dispatch_mesh_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0; + // Buffer binding. virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) = 0; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) = 0; diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 221ec72e4a..a71dad330f 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -699,6 +699,16 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command driver->command_render_draw_indexed(p_command_buffer, draw_indexed_instruction->index_count, draw_indexed_instruction->instance_count, draw_indexed_instruction->first_index, 0, 0); instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction); } break; + case DrawListInstruction::TYPE_DISPATCH_MESH: { + const DrawListDispatchMeshInstruction *dispatch_mesh_instruction = reinterpret_cast(instruction); + driver->command_render_dispatch_mesh(p_command_buffer, dispatch_mesh_instruction->x_groups, dispatch_mesh_instruction->y_groups, dispatch_mesh_instruction->z_groups); + instruction_data_cursor += sizeof(DrawListDispatchMeshInstruction); + } break; + case DrawListInstruction::TYPE_DISPATCH_MESH_INDIRECT: { + const DrawListDispatchMeshIndirectInstruction *dispatch_mesh_indirect_instruction = reinterpret_cast(instruction); + driver->command_render_dispatch_mesh_indirect(p_command_buffer, dispatch_mesh_indirect_instruction->buffer, dispatch_mesh_indirect_instruction->offset, 1, 0); + instruction_data_cursor += sizeof(DrawListDispatchMeshIndirectInstruction); + } break; case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { const DrawListExecuteCommandsInstruction *execute_commands_instruction = reinterpret_cast(instruction); driver->command_buffer_execute_secondary(p_command_buffer, execute_commands_instruction->command_buffer); @@ -1175,6 +1185,16 @@ void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, u print_line("\tDRAW INDICES", draw_indexed_instruction->index_count, "INSTANCES", draw_indexed_instruction->instance_count, "FIRST INDEX", draw_indexed_instruction->first_index); instruction_data_cursor += sizeof(DrawListDrawIndexedInstruction); } break; + case DrawListInstruction::TYPE_DISPATCH_MESH: { + const DrawListDispatchMeshInstruction *dispatch_mesh_instruction = reinterpret_cast(instruction); + print_line("\tDISPATCH MESH", dispatch_mesh_instruction->x_groups, dispatch_mesh_instruction->y_groups, dispatch_mesh_instruction->z_groups); + instruction_data_cursor += sizeof(DrawListDispatchMeshInstruction); + } break; + case DrawListInstruction::TYPE_DISPATCH_MESH_INDIRECT: { + const DrawListDispatchMeshIndirectInstruction *dispatch_mesh_indirect_instruction = reinterpret_cast(instruction); + print_line("\tDISPATCH MESH INDIRECT BUFFER ID", itos(dispatch_mesh_indirect_instruction->buffer.id), "OFFSET", dispatch_mesh_indirect_instruction->offset); + instruction_data_cursor += sizeof(DrawListDispatchMeshIndirectInstruction); + } break; case DrawListInstruction::TYPE_EXECUTE_COMMANDS: { print_line("\tEXECUTE COMMANDS"); instruction_data_cursor += sizeof(DrawListExecuteCommandsInstruction); @@ -1591,6 +1611,21 @@ void RenderingDeviceGraph::add_draw_list_draw_indexed(uint32_t p_index_count, ui instruction->first_index = p_first_index; } +void RenderingDeviceGraph::add_draw_list_dispatch_mesh(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { + DrawListDispatchMeshInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDispatchMeshInstruction))); + instruction->type = DrawListInstruction::TYPE_DISPATCH_MESH; + instruction->x_groups = p_x_groups; + instruction->y_groups = p_y_groups; + instruction->z_groups = p_z_groups; +} + +void RenderingDeviceGraph::add_draw_list_dispatch_mesh_indirect(RDD::BufferID p_buffer, uint32_t p_offset) { + DrawListDispatchMeshIndirectInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListDispatchMeshIndirectInstruction))); + instruction->type = DrawListInstruction::TYPE_DISPATCH_MESH_INDIRECT; + instruction->buffer = p_buffer; + instruction->offset = p_offset; +} + void RenderingDeviceGraph::add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer) { DrawListExecuteCommandsInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(sizeof(DrawListExecuteCommandsInstruction))); instruction->type = DrawListInstruction::TYPE_EXECUTE_COMMANDS; diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index baa15f63f6..b4ccc1caa2 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -69,6 +69,8 @@ class RenderingDeviceGraph { TYPE_CLEAR_ATTACHMENTS, TYPE_DRAW, TYPE_DRAW_INDEXED, + TYPE_DISPATCH_MESH, + TYPE_DISPATCH_MESH_INDIRECT, TYPE_EXECUTE_COMMANDS, TYPE_NEXT_SUBPASS, TYPE_SET_BLEND_CONSTANTS, @@ -460,6 +462,17 @@ class RenderingDeviceGraph { uint32_t first_index = 0; }; + struct DrawListDispatchMeshInstruction : DrawListInstruction { + uint32_t x_groups = 0; + uint32_t y_groups = 0; + uint32_t z_groups = 0; + }; + + struct DrawListDispatchMeshIndirectInstruction : DrawListInstruction { + RDD::BufferID buffer; + uint32_t offset = 0; + }; + struct DrawListEndRenderPassInstruction : DrawListInstruction { // No contents. }; @@ -672,6 +685,8 @@ class RenderingDeviceGraph { void add_draw_list_clear_attachments(VectorView p_attachments_clear, VectorView p_attachments_clear_rect); void add_draw_list_draw(uint32_t p_vertex_count, uint32_t p_instance_count); void add_draw_list_draw_indexed(uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index); + void add_draw_list_dispatch_mesh(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups); + void add_draw_list_dispatch_mesh_indirect(RDD::BufferID p_buffer, uint32_t p_offset); void add_draw_list_execute_commands(RDD::CommandBufferID p_command_buffer); void add_draw_list_next_subpass(RDD::CommandBufferType p_command_buffer_type); void add_draw_list_set_blend_constants(const Color &p_color);