diff --git a/Cargo.lock b/Cargo.lock index e254fed8..e362176e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1183,13 +1183,14 @@ dependencies = [ [[package]] name = "hassle-rs" -version = "0.4.0" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08839e15d5d12dbe4eeded38d6986175baf24b1bd8baeef32c52e19ce2a05283" +checksum = "cd67699699f525438e8de6635d65739157d4dbe69e763242e4b07b2b3673a829" dependencies = [ "bitflags", "com-rs", - "libloading 0.6.7", + "libc", + "libloading 0.7.1", "thiserror", "widestring", "winapi 0.3.9", @@ -2921,9 +2922,9 @@ dependencies = [ [[package]] name = "widestring" -version = "0.4.3" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c168940144dd21fd8046987c16a46a33d5fc84eec29ef9dcddc2ac9e31526b7c" +checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983" [[package]] name = "winapi" diff --git a/README.md b/README.md index 53514f38..e17d8856 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,10 @@ Operating systems: * `uuid-dev` * In case the bundled `libdxcompiler.so` doesn't work: https://github.com/microsoft/DirectXShaderCompiler#downloads +### (Some) MacOS dependencies + +* `ossp-uuid` (`brew install ossp-uuid`) + ## Building and running To build `kajiya` and its tools, [you need Rust](https://www.rust-lang.org/tools/install). diff --git a/assets/fonts/LICENSE.txt b/assets/fonts/LICENSE.txt old mode 100644 new mode 100755 diff --git a/crates/lib/kajiya-backend/Cargo.toml b/crates/lib/kajiya-backend/Cargo.toml index af209804..997bd4ce 100644 --- a/crates/lib/kajiya-backend/Cargo.toml +++ b/crates/lib/kajiya-backend/Cargo.toml @@ -17,7 +17,7 @@ derive_builder = { version = "0.9", default-features = false } futures = "0.3" glam = "0.18" gpu-allocator = { git = "https://github.com/Traverse-Research/gpu-allocator.git", rev = "e66d062cbd73a6c98834fc3e3acef98318097156" } -hassle-rs = "0.4" +hassle-rs = "0.5" hotwatch = "0.4" lazy_static = "1.4" log = "0.4" @@ -36,7 +36,5 @@ turbosloth = { git = "https://github.com/h3r2tic/turbosloth.git", rev = "92030af vk-sync = { git = "https://github.com/h3r2tic/vk-sync-rs", rev = "cb5bbf2" } [features] -default = ["ray-tracing"] #default = [] -ray-tracing = [] dlss = [] diff --git a/crates/lib/kajiya-backend/src/lib.rs b/crates/lib/kajiya-backend/src/lib.rs index 8d0786c5..a276c28c 100644 --- a/crates/lib/kajiya-backend/src/lib.rs +++ b/crates/lib/kajiya-backend/src/lib.rs @@ -16,9 +16,4 @@ pub use file::{canonical_path_from_vfs, normalized_path_from_vfs, set_vfs_mount_ pub use gpu_allocator; pub use rspirv_reflect; pub use vk_sync; -pub use vulkan::{ - device::Device, - image::*, - shader::{MAX_BINDLESS_DESCRIPTOR_COUNT, MAX_DESCRIPTOR_SETS}, - RenderBackend, -}; +pub use vulkan::{device::Device, image::*, shader::MAX_DESCRIPTOR_SETS, RenderBackend}; diff --git a/crates/lib/kajiya-backend/src/vulkan/device.rs b/crates/lib/kajiya-backend/src/vulkan/device.rs index e7ce222a..a443510f 100644 --- a/crates/lib/kajiya-backend/src/vulkan/device.rs +++ b/crates/lib/kajiya-backend/src/vulkan/device.rs @@ -21,6 +21,11 @@ use std::{ sync::Arc, }; +/// Descriptor count to subtract from the max bindless descriptor count, +/// so that we don't overflow the max when using bindless _and_ non-bindless descriptors +/// in the same shader stage. +pub const RESERVED_DESCRIPTOR_COUNT: u32 = 32; + pub struct Queue { pub raw: vk::Queue, pub family: QueueFamily, @@ -145,6 +150,8 @@ pub struct Device { pub ray_tracing_pipeline_properties: vk::PhysicalDeviceRayTracingPipelinePropertiesKHR, frames: [Mutex>; 2], + + ray_tracing_enabled: bool, } // Allowing `Send` on `frames` is technically unsound. There are some checks @@ -156,8 +163,26 @@ unsafe impl Send for Device {} unsafe impl Sync for Device {} impl Device { - fn extension_names(pdevice: &Arc) -> Vec<*const i8> { - let mut device_extension_names_raw = vec![ + pub fn create(pdevice: &Arc) -> Result> { + let supported_extensions: HashSet = unsafe { + let extension_properties = pdevice + .instance + .raw + .enumerate_device_extension_properties(pdevice.raw)?; + debug!("Extension properties:\n{:#?}", &extension_properties); + + extension_properties + .iter() + .map(|ext| { + std::ffi::CStr::from_ptr(ext.extension_name.as_ptr() as *const c_char) + .to_string_lossy() + .as_ref() + .to_owned() + }) + .collect() + }; + + let mut device_extension_names = vec![ vk::ExtDescriptorIndexingFn::name().as_ptr(), vk::ExtScalarBlockLayoutFn::name().as_ptr(), vk::KhrMaintenance1Fn::name().as_ptr(), @@ -168,10 +193,8 @@ impl Device { vk::KhrImagelessFramebufferFn::name().as_ptr(), vk::KhrImageFormatListFn::name().as_ptr(), vk::KhrDescriptorUpdateTemplateFn::name().as_ptr(), - vk::KhrDrawIndirectCountFn::name().as_ptr(), // Rust-GPU vk::KhrShaderFloat16Int8Fn::name().as_ptr(), - vk::KhrVulkanMemoryModelFn::name().as_ptr(), // DLSS #[cfg(feature = "dlss")] { @@ -185,55 +208,47 @@ impl Device { vk::NvxImageViewHandleFn::name().as_ptr(), ]; - #[cfg(feature = "ray-tracing")] - { - device_extension_names_raw.extend( - [ - vk::KhrPipelineLibraryFn::name().as_ptr(), // rt dep - vk::KhrDeferredHostOperationsFn::name().as_ptr(), // rt dep - vk::KhrBufferDeviceAddressFn::name().as_ptr(), // rt dep - vk::KhrAccelerationStructureFn::name().as_ptr(), - vk::KhrRayTracingPipelineFn::name().as_ptr(), - //vk::KhrRayQueryFn::name().as_ptr(), - ] - .iter(), - ); - } + let ray_tracing_extensions = [ + vk::KhrVulkanMemoryModelFn::name().as_ptr(), // used in ray tracing shaders + vk::KhrPipelineLibraryFn::name().as_ptr(), // rt dep + vk::KhrDeferredHostOperationsFn::name().as_ptr(), // rt dep + vk::KhrBufferDeviceAddressFn::name().as_ptr(), // rt dep + vk::KhrAccelerationStructureFn::name().as_ptr(), + vk::KhrRayTracingPipelineFn::name().as_ptr(), + ]; - if pdevice.presentation_requested { - device_extension_names_raw.push(khr::Swapchain::name().as_ptr()); - } + let ray_tracing_enabled = unsafe { + ray_tracing_extensions.iter().all(|ext| { + let ext = std::ffi::CStr::from_ptr(*ext).to_string_lossy(); - device_extension_names_raw - } + let supported = supported_extensions.contains(ext.as_ref()); - pub fn create(pdevice: &Arc) -> Result> { - let device_extension_names = Self::extension_names(pdevice); + if !supported { + log::info!("Ray tracing extension not supported: {}", ext); + } - unsafe { - let extension_properties = pdevice - .instance - .raw - .enumerate_device_extension_properties(pdevice.raw)?; - debug!("Extension properties:\n{:#?}", &extension_properties); + supported + }) + }; - let supported_extensions: HashSet = extension_properties - .iter() - .map(|ext| { - std::ffi::CStr::from_ptr(ext.extension_name.as_ptr() as *const c_char) - .to_string_lossy() - .as_ref() - .to_owned() - }) - .collect(); + if ray_tracing_enabled { + log::info!("All ray tracing extensions are supported"); + + device_extension_names.extend(ray_tracing_extensions.iter()); + } + if pdevice.presentation_requested { + device_extension_names.push(khr::Swapchain::name().as_ptr()); + } + + unsafe { for &ext in &device_extension_names { let ext = std::ffi::CStr::from_ptr(ext).to_string_lossy(); if !supported_extensions.contains(ext.as_ref()) { panic!("Device extension not supported: {}", ext); } } - }; + } let priorities = [1.0]; @@ -264,11 +279,9 @@ impl Device { let mut get_buffer_device_address_features = ash::vk::PhysicalDeviceBufferDeviceAddressFeatures::default(); - #[cfg(feature = "ray-tracing")] let mut acceleration_structure_features = ash::vk::PhysicalDeviceAccelerationStructureFeaturesKHR::default(); - #[cfg(feature = "ray-tracing")] let mut ray_tracing_pipeline_features = ash::vk::PhysicalDeviceRayTracingPipelineFeaturesKHR::default(); @@ -283,8 +296,7 @@ impl Device { .push_next(&mut vulkan_memory_model) .push_next(&mut get_buffer_device_address_features); - #[cfg(feature = "ray-tracing")] - { + if ray_tracing_enabled { features2 = features2 .push_next(&mut acceleration_structure_features) .push_next(&mut ray_tracing_pipeline_features); @@ -311,9 +323,7 @@ impl Device { assert!(descriptor_indexing.shader_uniform_texel_buffer_array_dynamic_indexing != 0); assert!(descriptor_indexing.shader_storage_texel_buffer_array_dynamic_indexing != 0); - assert!(descriptor_indexing.shader_uniform_buffer_array_non_uniform_indexing != 0); assert!(descriptor_indexing.shader_sampled_image_array_non_uniform_indexing != 0); - assert!(descriptor_indexing.shader_storage_buffer_array_non_uniform_indexing != 0); assert!(descriptor_indexing.shader_storage_image_array_non_uniform_indexing != 0); assert!(descriptor_indexing.shader_uniform_texel_buffer_array_non_uniform_indexing != 0); assert!(descriptor_indexing.shader_storage_texel_buffer_array_non_uniform_indexing != 0); @@ -327,18 +337,20 @@ impl Device { assert!(shader_float16_int8.shader_int8 != 0); - assert!(vulkan_memory_model.vulkan_memory_model != 0); + if ray_tracing_enabled { + assert!(descriptor_indexing.shader_uniform_buffer_array_non_uniform_indexing != 0); + assert!(descriptor_indexing.shader_storage_buffer_array_non_uniform_indexing != 0); + + assert!(vulkan_memory_model.vulkan_memory_model != 0); - #[cfg(feature = "ray-tracing")] - { assert!(acceleration_structure_features.acceleration_structure != 0); assert!(acceleration_structure_features.descriptor_binding_acceleration_structure_update_after_bind != 0); assert!(ray_tracing_pipeline_features.ray_tracing_pipeline != 0); assert!(ray_tracing_pipeline_features.ray_tracing_pipeline_trace_rays_indirect != 0); - } - assert!(get_buffer_device_address_features.buffer_device_address != 0); + assert!(get_buffer_device_address_features.buffer_device_address != 0); + } } let device_create_info = vk::DeviceCreateInfo::builder() @@ -412,6 +424,7 @@ impl Device { Mutex::new(Arc::new(frame1)), //Mutex::new(Arc::new(frame2)), ], + ray_tracing_enabled, })) } } @@ -601,6 +614,20 @@ impl Device { pub fn debug_utils(&self) -> Option<&DebugUtils> { self.instance.debug_utils.as_ref() } + + pub fn max_bindless_descriptor_count(&self) -> u32 { + (512 * 1024).min( + self.pdevice + .properties + .limits + .max_per_stage_descriptor_sampled_images + - RESERVED_DESCRIPTOR_COUNT, + ) + } + + pub fn ray_tracing_enabled(&self) -> bool { + self.ray_tracing_enabled + } } impl Drop for Device { diff --git a/crates/lib/kajiya-backend/src/vulkan/shader.rs b/crates/lib/kajiya-backend/src/vulkan/shader.rs index 43e0eb25..d5963494 100644 --- a/crates/lib/kajiya-backend/src/vulkan/shader.rs +++ b/crates/lib/kajiya-backend/src/vulkan/shader.rs @@ -19,7 +19,6 @@ use std::{ }; pub const MAX_DESCRIPTOR_SETS: usize = 4; -pub const MAX_BINDLESS_DESCRIPTOR_COUNT: usize = 512 * 1024; type DescriptorSetLayout = HashMap; type StageDescriptorSetLayouts = HashMap; @@ -196,7 +195,7 @@ pub fn create_descriptor_set_layouts( rspirv_reflect::DescriptorDimensionality::Single => 1, rspirv_reflect::DescriptorDimensionality::Array(size) => size, rspirv_reflect::DescriptorDimensionality::RuntimeArray => { - MAX_BINDLESS_DESCRIPTOR_COUNT as u32 + device.max_bindless_descriptor_count() } }; diff --git a/crates/lib/kajiya-rg/Cargo.toml b/crates/lib/kajiya-rg/Cargo.toml index fe96c5b6..e10b36b6 100644 --- a/crates/lib/kajiya-rg/Cargo.toml +++ b/crates/lib/kajiya-rg/Cargo.toml @@ -15,8 +15,3 @@ log = "0.4" parking_lot = "0.11" puffin = "0.11.0" turbosloth = { git = "https://github.com/h3r2tic/turbosloth.git", rev = "92030af" } - -[features] -default = ["ray-tracing"] -#default = [] -ray-tracing = [] diff --git a/crates/lib/kajiya-rg/src/temporal.rs b/crates/lib/kajiya-rg/src/temporal.rs index 5dfbd80d..0e5484f5 100644 --- a/crates/lib/kajiya-rg/src/temporal.rs +++ b/crates/lib/kajiya-rg/src/temporal.rs @@ -131,6 +131,10 @@ impl TemporalRenderGraph { temporal_state: state, } } + + pub fn device(&self) -> &Device { + self.device.as_ref() + } } pub trait GetOrCreateTemporal { diff --git a/crates/lib/kajiya/src/bindless_descriptor_set.rs b/crates/lib/kajiya/src/bindless_descriptor_set.rs index e23fc3d8..78864cde 100644 --- a/crates/lib/kajiya/src/bindless_descriptor_set.rs +++ b/crates/lib/kajiya/src/bindless_descriptor_set.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use kajiya_backend::{ash::vk, rspirv_reflect, vulkan::device, MAX_BINDLESS_DESCRIPTOR_COUNT}; +use kajiya_backend::{ash::vk, rspirv_reflect, vulkan::device}; lazy_static::lazy_static! { pub static ref BINDLESS_DESCRIPTOR_SET_LAYOUT: HashMap = [ @@ -60,7 +60,7 @@ pub fn create_bindless_descriptor_set(device: &device::Device) -> vk::Descriptor .build(), vk::DescriptorSetLayoutBinding::builder() .binding(2) - .descriptor_count(MAX_BINDLESS_DESCRIPTOR_COUNT as _) + .descriptor_count(device.max_bindless_descriptor_count() as _) .descriptor_type(vk::DescriptorType::SAMPLED_IMAGE) .stage_flags(vk::ShaderStageFlags::ALL) .build(), @@ -80,7 +80,7 @@ pub fn create_bindless_descriptor_set(device: &device::Device) -> vk::Descriptor }, vk::DescriptorPoolSize { ty: vk::DescriptorType::SAMPLED_IMAGE, - descriptor_count: MAX_BINDLESS_DESCRIPTOR_COUNT as _, + descriptor_count: device.max_bindless_descriptor_count() as _, }, ]; @@ -95,7 +95,7 @@ pub fn create_bindless_descriptor_set(device: &device::Device) -> vk::Descriptor .unwrap() }; - let variable_descriptor_count = MAX_BINDLESS_DESCRIPTOR_COUNT as _; + let variable_descriptor_count = device.max_bindless_descriptor_count() as _; let mut variable_descriptor_count_allocate_info = vk::DescriptorSetVariableDescriptorCountAllocateInfo::builder() .descriptor_counts(std::slice::from_ref(&variable_descriptor_count)) diff --git a/crates/lib/kajiya/src/default_world_renderer.rs b/crates/lib/kajiya/src/default_world_renderer.rs index e71636ae..1a13805a 100644 --- a/crates/lib/kajiya/src/default_world_renderer.rs +++ b/crates/lib/kajiya/src/default_world_renderer.rs @@ -46,7 +46,9 @@ impl WorldRenderer { } // Build an empty TLAS to create the resources. We'll update it at runtime. - world_renderer.build_ray_tracing_top_level_acceleration(); + if backend.device.ray_tracing_enabled() { + world_renderer.build_ray_tracing_top_level_acceleration(); + } Ok(world_renderer) } diff --git a/crates/lib/kajiya/src/renderers/csgi.rs b/crates/lib/kajiya/src/renderers/csgi.rs index 5867e78f..1691764b 100644 --- a/crates/lib/kajiya/src/renderers/csgi.rs +++ b/crates/lib/kajiya/src/renderers/csgi.rs @@ -184,66 +184,12 @@ impl CsgiRenderer { bindless_descriptor_set: vk::DescriptorSet, tlas: &rg::Handle, ) -> CsgiVolume { - let mut direct_cascades: [rg::Handle; CASCADE_COUNT] = array_init::array_init(|i| { - rg.get_or_create_temporal( - format!("csgi.direct_cascade{}", i), - ImageDesc::new_3d( - //vk::Format::B10G11R11_UFLOAT_PACK32, - vk::Format::R16G16B16A16_SFLOAT, - [ - VOLUME_DIMS * CARDINAL_DIRECTION_COUNT as u32, - VOLUME_DIMS, - VOLUME_DIMS, - ], - ) - .usage(vk::ImageUsageFlags::SAMPLED | vk::ImageUsageFlags::STORAGE), - ) - .unwrap() - }); - - let mut indirect_cascades: [rg::Handle; CASCADE_COUNT] = - array_init::array_init(|i| { - rg.get_or_create_temporal( - format!("csgi.indirect_cascade{}", i), - ImageDesc::new_3d( - vk::Format::B10G11R11_UFLOAT_PACK32, - //vk::Format::R16G16B16A16_SFLOAT, - [ - VOLUME_DIMS * TOTAL_SUBRAY_COUNT as u32, - VOLUME_DIMS, - VOLUME_DIMS, - ], - ) - .usage(vk::ImageUsageFlags::SAMPLED | vk::ImageUsageFlags::STORAGE), - ) - .unwrap() - }); - - let mut indirect_combined_cascades: [rg::Handle; CASCADE_COUNT] = - array_init::array_init(|i| { - rg.get_or_create_temporal( - format!("csgi.indirect_cascade_combined{}", i), - ImageDesc::new_3d( - vk::Format::B10G11R11_UFLOAT_PACK32, - //vk::Format::R16G16B16A16_SFLOAT, - [ - VOLUME_DIMS * TOTAL_DIRECTION_COUNT as u32, - VOLUME_DIMS, - VOLUME_DIMS, - ], - ) - .usage(vk::ImageUsageFlags::SAMPLED | vk::ImageUsageFlags::STORAGE), - ) - .unwrap() - }); - - let mut opacity_cascades: [rg::Handle; CASCADE_COUNT] = - array_init::array_init(|_| { - rg.create(ImageDesc::new_3d( - vk::Format::R8_UNORM, - [VOLUME_DIMS, VOLUME_DIMS, VOLUME_DIMS], - )) - }); + let CsgiVolume { + direct: mut direct_cascades, + indirect: mut indirect_combined_cascades, + subray_indirect: mut indirect_cascades, + opacity: mut opacity_cascades, + } = self.create_volume(rg); // Stagger cascade updates over frames //let cascade_update_mask = 1usize << (self.frame_idx as usize % CASCADE_COUNT); @@ -382,6 +328,95 @@ impl CsgiRenderer { opacity: opacity_cascades, } } + + pub fn create_volume(&self, rg: &mut rg::TemporalRenderGraph) -> CsgiVolume { + self.create_volume_with_dimensions( + rg, + [ + VOLUME_DIMS * CARDINAL_DIRECTION_COUNT as u32, + VOLUME_DIMS, + VOLUME_DIMS, + ], + [ + VOLUME_DIMS * TOTAL_SUBRAY_COUNT as u32, + VOLUME_DIMS, + VOLUME_DIMS, + ], + [ + VOLUME_DIMS * TOTAL_DIRECTION_COUNT as u32, + VOLUME_DIMS, + VOLUME_DIMS, + ], + [VOLUME_DIMS, VOLUME_DIMS, VOLUME_DIMS], + ) + } + + pub fn create_dummy_volume(&self, rg: &mut rg::TemporalRenderGraph) -> CsgiVolume { + self.create_volume_with_dimensions(rg, [1; 3], [1; 3], [1; 3], [1; 3]) + } + + pub(crate) fn create_volume_with_dimensions( + &self, + rg: &mut rg::TemporalRenderGraph, + direct_cascade_dimensions: [u32; 3], + indirect_cascade_dimensions: [u32; 3], + indirect_combined_cascade_dimensions: [u32; 3], + opacity_cascade_dimensions: [u32; 3], + ) -> CsgiVolume { + let direct_cascades: [rg::Handle; CASCADE_COUNT] = array_init::array_init(|i| { + rg.get_or_create_temporal( + format!("csgi.direct_cascade{}", i), + ImageDesc::new_3d( + //vk::Format::B10G11R11_UFLOAT_PACK32, + vk::Format::R16G16B16A16_SFLOAT, + direct_cascade_dimensions, + ) + .usage(vk::ImageUsageFlags::SAMPLED | vk::ImageUsageFlags::STORAGE), + ) + .unwrap() + }); + + let indirect_cascades: [rg::Handle; CASCADE_COUNT] = array_init::array_init(|i| { + rg.get_or_create_temporal( + format!("csgi.indirect_cascade{}", i), + ImageDesc::new_3d( + vk::Format::B10G11R11_UFLOAT_PACK32, + //vk::Format::R16G16B16A16_SFLOAT, + indirect_cascade_dimensions, + ) + .usage(vk::ImageUsageFlags::SAMPLED | vk::ImageUsageFlags::STORAGE), + ) + .unwrap() + }); + + let indirect_combined_cascades: [rg::Handle; CASCADE_COUNT] = + array_init::array_init(|i| { + rg.get_or_create_temporal( + format!("csgi.indirect_cascade_combined{}", i), + ImageDesc::new_3d( + vk::Format::B10G11R11_UFLOAT_PACK32, + //vk::Format::R16G16B16A16_SFLOAT, + indirect_combined_cascade_dimensions, + ) + .usage(vk::ImageUsageFlags::SAMPLED | vk::ImageUsageFlags::STORAGE), + ) + .unwrap() + }); + + let opacity_cascades: [rg::Handle; CASCADE_COUNT] = array_init::array_init(|_| { + rg.create(ImageDesc::new_3d( + vk::Format::R8_UNORM, + opacity_cascade_dimensions, + )) + }); + + CsgiVolume { + direct: direct_cascades, + indirect: indirect_combined_cascades, + subray_indirect: indirect_cascades, + opacity: opacity_cascades, + } + } } impl CsgiVolume { diff --git a/crates/lib/kajiya/src/renderers/rtr.rs b/crates/lib/kajiya/src/renderers/rtr.rs index f9d07e66..555998da 100644 --- a/crates/lib/kajiya/src/renderers/rtr.rs +++ b/crates/lib/kajiya/src/renderers/rtr.rs @@ -184,6 +184,40 @@ impl RtrRenderer { temporal2_tex: &mut self.temporal2_tex, } } + + pub fn create_dummy_output( + &mut self, + rg: &mut rg::TemporalRenderGraph, + gbuffer_depth: &GbufferDepth, + ) -> TracedRtr { + let gbuffer_desc = gbuffer_depth.gbuffer.desc(); + + let resolved_tex = rg.create( + gbuffer_depth + .gbuffer + .desc() + .usage(vk::ImageUsageFlags::empty()) + .format(vk::Format::R8G8B8A8_UNORM), + ); + + let (temporal_output_tex, history_tex) = self + .temporal_tex + .get_output_and_history(rg, Self::temporal_tex_desc(gbuffer_desc.extent_2d())); + + let (ray_len_output_tex, _ray_len_history_tex) = self.ray_len_tex.get_output_and_history( + rg, + ImageDesc::new_2d(vk::Format::R8G8B8A8_UNORM, gbuffer_desc.extent_2d()) + .usage(vk::ImageUsageFlags::SAMPLED | vk::ImageUsageFlags::STORAGE), + ); + + TracedRtr { + resolved_tex, + temporal_output_tex, + history_tex, + ray_len_tex: ray_len_output_tex, + temporal2_tex: &mut self.temporal2_tex, + } + } } impl<'a> TracedRtr<'a> { diff --git a/crates/lib/kajiya/src/world_render_passes.rs b/crates/lib/kajiya/src/world_render_passes.rs index 5ae0d981..3325f9c8 100644 --- a/crates/lib/kajiya/src/world_render_passes.rs +++ b/crates/lib/kajiya/src/world_render_passes.rs @@ -15,7 +15,11 @@ impl WorldRenderer { rg: &mut rg::TemporalRenderGraph, frame_desc: &WorldFrameDesc, ) -> rg::Handle { - let tlas = self.prepare_top_level_acceleration(rg); + let tlas = if rg.device().ray_tracing_enabled() { + Some(self.prepare_top_level_acceleration(rg)) + } else { + None + }; let mut accum_img = rg .get_or_create_temporal( @@ -31,13 +35,17 @@ impl WorldRenderer { let sky_cube = crate::renderers::sky::render_sky_cube(rg); let convolved_sky_cube = crate::renderers::sky::convolve_cube(rg, &sky_cube); - let csgi_volume = self.csgi.render( - frame_desc.camera_matrices.eye_position(), - rg, - &convolved_sky_cube, - self.bindless_descriptor_set, - &tlas, - ); + let csgi_volume = if let Some(tlas) = tlas.as_ref() { + self.csgi.render( + frame_desc.camera_matrices.eye_position(), + rg, + &convolved_sky_cube, + self.bindless_descriptor_set, + tlas, + ) + } else { + self.csgi.create_dummy_volume(rg) + }; let (gbuffer_depth, velocity_img) = { let mut gbuffer_depth = { @@ -104,8 +112,11 @@ impl WorldRenderer { .render(rg, &gbuffer_depth, &reprojection_map, &accum_img); //let ssgi_tex = rg.create(ImageDesc::new_2d(vk::Format::R8_UNORM, [1, 1])); - let sun_shadow_mask = - trace_sun_shadow_mask(rg, &gbuffer_depth, &tlas, self.bindless_descriptor_set); + let sun_shadow_mask = if let Some(tlas) = tlas.as_ref() { + trace_sun_shadow_mask(rg, &gbuffer_depth, tlas, self.bindless_descriptor_set) + } else { + rg.create(gbuffer_depth.depth.desc().format(vk::Format::R8_UNORM)) + }; let denoised_shadow_mask = if self.sun_size_multiplier > 0.0f32 { self.shadow_denoise @@ -114,16 +125,21 @@ impl WorldRenderer { sun_shadow_mask.into() }; - let rtdgi = self.rtdgi.render( - rg, - &gbuffer_depth, - &reprojection_map, - &sky_cube, - self.bindless_descriptor_set, - &tlas, - &csgi_volume, - &ssgi_tex, - ); + let rtdgi = if let Some(tlas) = tlas.as_ref() { + self.rtdgi.render( + rg, + &gbuffer_depth, + &reprojection_map, + &sky_cube, + self.bindless_descriptor_set, + tlas, + &csgi_volume, + &ssgi_tex, + ) + } else { + rg.create(ImageDesc::new_2d(vk::Format::R8G8B8A8_UNORM, [1, 1])) + .into() + }; // TODO: don't iter over all the things let any_triangle_lights = self @@ -131,26 +147,32 @@ impl WorldRenderer { .iter() .any(|inst| !self.mesh_lights[inst.mesh.0].lights.is_empty()); - let mut rtr = self.rtr.trace( - rg, - &gbuffer_depth, - &reprojection_map, - &sky_cube, - self.bindless_descriptor_set, - &tlas, - &csgi_volume, - &rtdgi, - ); - - if any_triangle_lights { - // Render specular lighting into the RTR image so they can be jointly filtered - self.lighting.render_specular( - &mut rtr.resolved_tex, + let mut rtr = if let Some(tlas) = tlas.as_ref() { + self.rtr.trace( rg, &gbuffer_depth, + &reprojection_map, + &sky_cube, self.bindless_descriptor_set, - &tlas, - ); + tlas, + &csgi_volume, + &rtdgi, + ) + } else { + self.rtr.create_dummy_output(rg, &gbuffer_depth) + }; + + if any_triangle_lights { + if let Some(tlas) = tlas.as_ref() { + // Render specular lighting into the RTR image so they can be jointly filtered + self.lighting.render_specular( + &mut rtr.resolved_tex, + rg, + &gbuffer_depth, + self.bindless_descriptor_set, + tlas, + ); + } } let rtr = rtr.filter_temporal(rg, &gbuffer_depth, &reprojection_map); @@ -241,9 +263,11 @@ impl WorldRenderer { rg::imageops::clear_color(rg, &mut accum_img, [0.0, 0.0, 0.0, 0.0]); } - let tlas = self.prepare_top_level_acceleration(rg); + if rg.device().ray_tracing_enabled() { + let tlas = self.prepare_top_level_acceleration(rg); - reference_path_trace(rg, &mut accum_img, self.bindless_descriptor_set, &tlas); + reference_path_trace(rg, &mut accum_img, self.bindless_descriptor_set, &tlas); + } post_process( rg, diff --git a/crates/lib/kajiya/src/world_renderer.rs b/crates/lib/kajiya/src/world_renderer.rs index a1aba88b..acbd9655 100644 --- a/crates/lib/kajiya/src/world_renderer.rs +++ b/crates/lib/kajiya/src/world_renderer.rs @@ -367,7 +367,12 @@ impl WorldRenderer { temporal_upscale_extent, debug_mode: RenderDebugMode::None, - debug_shading_mode: 0, + debug_shading_mode: if backend.device.ray_tracing_enabled() { + 0 + } else { + // RTX OFF; HACK: reflections buffers currently smear without ray tracing. + 4 + }, ev_shift: 0.0, world_gi_scale: 1.0, sun_size_multiplier: 1.0, // Sun as seen from Earth @@ -537,36 +542,40 @@ impl WorldRenderer { std::slice::from_raw_parts_mut(mesh_buffer_dst, MAX_GPU_MESHES) }; - let base_da = vertex_buffer.device_address(&self.device); - let vertex_buffer_da = base_da + vertex_core_offset as u64; - let index_buffer_da = base_da + vertex_index_offset as u64; + if self.device.ray_tracing_enabled() { + let base_da = vertex_buffer.device_address(&self.device); + let vertex_buffer_da = base_da + vertex_core_offset as u64; + let index_buffer_da = base_da + vertex_index_offset as u64; - let blas = self - .device - .create_ray_tracing_bottom_acceleration( - &RayTracingBottomAccelerationDesc { - geometries: vec![RayTracingGeometryDesc { - geometry_type: RayTracingGeometryType::Triangle, - vertex_buffer: vertex_buffer_da, - index_buffer: index_buffer_da, - vertex_format: vk::Format::R32G32B32_SFLOAT, - vertex_stride: size_of::(), - parts: vec![RayTracingGeometryPart { - index_count: mesh.indices.len(), - index_offset: 0, - max_vertex: mesh - .indices - .as_slice() - .iter() - .copied() - .max() - .expect("mesh must not be empty"), + let blas = self + .device + .create_ray_tracing_bottom_acceleration( + &RayTracingBottomAccelerationDesc { + geometries: vec![RayTracingGeometryDesc { + geometry_type: RayTracingGeometryType::Triangle, + vertex_buffer: vertex_buffer_da, + index_buffer: index_buffer_da, + vertex_format: vk::Format::R32G32B32_SFLOAT, + vertex_stride: size_of::(), + parts: vec![RayTracingGeometryPart { + index_count: mesh.indices.len(), + index_offset: 0, + max_vertex: mesh + .indices + .as_slice() + .iter() + .copied() + .max() + .expect("mesh must not be empty"), + }], }], - }], - }, - &self.accel_scratch, - ) - .expect("blas"); + }, + &self.accel_scratch, + ) + .expect("blas"); + + self.mesh_blas.push(Arc::new(blas)); + } mesh_buffer_dst[mesh_idx] = GpuMesh { vertex_core_offset, @@ -583,8 +592,6 @@ impl WorldRenderer { index_count: mesh.indices.len() as _, }); - self.mesh_blas.push(Arc::new(blas)); - let mesh_lights = if opts.use_lights { let emissive_materials = mesh .materials diff --git a/libdxcompiler.dynlib b/libdxcompiler.dynlib new file mode 100755 index 00000000..de14e527 Binary files /dev/null and b/libdxcompiler.dynlib differ