Skip to content

Commit

Permalink
Add support for macOS (and other GPUs without hardware ray tracing su…
Browse files Browse the repository at this point in the history
…pport) (#46)

* It works.. I guess

* Getting a device timeout

* Adjust script

* Switch to universal libdxcompiler.dynlib

* Comment out rust post combine shader

* Use a while loop in the rust post combine shader instead of a for loop

* Switch back to the rust shader for post combine

* Check updated rust shaders into this branch

* Change commented out lines into cfg statements

* Run cargo fmt

* Remove the ray-tracing feature and replace it with a runtime extension check

* Apply suggestions

* Apply suggestions, should be good to merge as a MVP afaik

* Make clippy happy

Co-authored-by: Ashley <ashley@Ashleys-MacBook-Pro.local>
Co-authored-by: Ashley Ruglys <ashley.ruglys@gmai.com>
  • Loading branch information
3 people authored Feb 27, 2022
1 parent fdf3a4e commit 79b7a74
Show file tree
Hide file tree
Showing 16 changed files with 333 additions and 208 deletions.
11 changes: 6 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ Operating systems:
* `uuid-dev`
* In case the bundled `libdxcompiler.so` doesn't work: https://github.com/microsoft/DirectXShaderCompiler#downloads

### (Some) MacOS dependencies

* `ossp-uuid` (`brew install ossp-uuid`)

## Building and running

To build `kajiya` and its tools, [you need Rust](https://www.rust-lang.org/tools/install).
Expand Down
Empty file modified assets/fonts/LICENSE.txt
100644 → 100755
Empty file.
4 changes: 1 addition & 3 deletions crates/lib/kajiya-backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ derive_builder = { version = "0.9", default-features = false }
futures = "0.3"
glam = "0.18"
gpu-allocator = { git = "https://github.com/Traverse-Research/gpu-allocator.git", rev = "e66d062cbd73a6c98834fc3e3acef98318097156" }
hassle-rs = "0.4"
hassle-rs = "0.5"
hotwatch = "0.4"
lazy_static = "1.4"
log = "0.4"
Expand All @@ -36,7 +36,5 @@ turbosloth = { git = "https://github.com/h3r2tic/turbosloth.git", rev = "92030af
vk-sync = { git = "https://github.com/h3r2tic/vk-sync-rs", rev = "cb5bbf2" }

[features]
default = ["ray-tracing"]
#default = []
ray-tracing = []
dlss = []
7 changes: 1 addition & 6 deletions crates/lib/kajiya-backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,4 @@ pub use file::{canonical_path_from_vfs, normalized_path_from_vfs, set_vfs_mount_
pub use gpu_allocator;
pub use rspirv_reflect;
pub use vk_sync;
pub use vulkan::{
device::Device,
image::*,
shader::{MAX_BINDLESS_DESCRIPTOR_COUNT, MAX_DESCRIPTOR_SETS},
RenderBackend,
};
pub use vulkan::{device::Device, image::*, shader::MAX_DESCRIPTOR_SETS, RenderBackend};
131 changes: 79 additions & 52 deletions crates/lib/kajiya-backend/src/vulkan/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ use std::{
sync::Arc,
};

/// Descriptor count to subtract from the max bindless descriptor count,
/// so that we don't overflow the max when using bindless _and_ non-bindless descriptors
/// in the same shader stage.
pub const RESERVED_DESCRIPTOR_COUNT: u32 = 32;

pub struct Queue {
pub raw: vk::Queue,
pub family: QueueFamily,
Expand Down Expand Up @@ -145,6 +150,8 @@ pub struct Device {
pub ray_tracing_pipeline_properties: vk::PhysicalDeviceRayTracingPipelinePropertiesKHR,

frames: [Mutex<Arc<DeviceFrame>>; 2],

ray_tracing_enabled: bool,
}

// Allowing `Send` on `frames` is technically unsound. There are some checks
Expand All @@ -156,8 +163,26 @@ unsafe impl Send for Device {}
unsafe impl Sync for Device {}

impl Device {
fn extension_names(pdevice: &Arc<PhysicalDevice>) -> Vec<*const i8> {
let mut device_extension_names_raw = vec![
pub fn create(pdevice: &Arc<PhysicalDevice>) -> Result<Arc<Self>> {
let supported_extensions: HashSet<String> = unsafe {
let extension_properties = pdevice
.instance
.raw
.enumerate_device_extension_properties(pdevice.raw)?;
debug!("Extension properties:\n{:#?}", &extension_properties);

extension_properties
.iter()
.map(|ext| {
std::ffi::CStr::from_ptr(ext.extension_name.as_ptr() as *const c_char)
.to_string_lossy()
.as_ref()
.to_owned()
})
.collect()
};

let mut device_extension_names = vec![
vk::ExtDescriptorIndexingFn::name().as_ptr(),
vk::ExtScalarBlockLayoutFn::name().as_ptr(),
vk::KhrMaintenance1Fn::name().as_ptr(),
Expand All @@ -168,10 +193,8 @@ impl Device {
vk::KhrImagelessFramebufferFn::name().as_ptr(),
vk::KhrImageFormatListFn::name().as_ptr(),
vk::KhrDescriptorUpdateTemplateFn::name().as_ptr(),
vk::KhrDrawIndirectCountFn::name().as_ptr(),
// Rust-GPU
vk::KhrShaderFloat16Int8Fn::name().as_ptr(),
vk::KhrVulkanMemoryModelFn::name().as_ptr(),
// DLSS
#[cfg(feature = "dlss")]
{
Expand All @@ -185,55 +208,47 @@ impl Device {
vk::NvxImageViewHandleFn::name().as_ptr(),
];

#[cfg(feature = "ray-tracing")]
{
device_extension_names_raw.extend(
[
vk::KhrPipelineLibraryFn::name().as_ptr(), // rt dep
vk::KhrDeferredHostOperationsFn::name().as_ptr(), // rt dep
vk::KhrBufferDeviceAddressFn::name().as_ptr(), // rt dep
vk::KhrAccelerationStructureFn::name().as_ptr(),
vk::KhrRayTracingPipelineFn::name().as_ptr(),
//vk::KhrRayQueryFn::name().as_ptr(),
]
.iter(),
);
}
let ray_tracing_extensions = [
vk::KhrVulkanMemoryModelFn::name().as_ptr(), // used in ray tracing shaders
vk::KhrPipelineLibraryFn::name().as_ptr(), // rt dep
vk::KhrDeferredHostOperationsFn::name().as_ptr(), // rt dep
vk::KhrBufferDeviceAddressFn::name().as_ptr(), // rt dep
vk::KhrAccelerationStructureFn::name().as_ptr(),
vk::KhrRayTracingPipelineFn::name().as_ptr(),
];

if pdevice.presentation_requested {
device_extension_names_raw.push(khr::Swapchain::name().as_ptr());
}
let ray_tracing_enabled = unsafe {
ray_tracing_extensions.iter().all(|ext| {
let ext = std::ffi::CStr::from_ptr(*ext).to_string_lossy();

device_extension_names_raw
}
let supported = supported_extensions.contains(ext.as_ref());

pub fn create(pdevice: &Arc<PhysicalDevice>) -> Result<Arc<Self>> {
let device_extension_names = Self::extension_names(pdevice);
if !supported {
log::info!("Ray tracing extension not supported: {}", ext);
}

unsafe {
let extension_properties = pdevice
.instance
.raw
.enumerate_device_extension_properties(pdevice.raw)?;
debug!("Extension properties:\n{:#?}", &extension_properties);
supported
})
};

let supported_extensions: HashSet<String> = extension_properties
.iter()
.map(|ext| {
std::ffi::CStr::from_ptr(ext.extension_name.as_ptr() as *const c_char)
.to_string_lossy()
.as_ref()
.to_owned()
})
.collect();
if ray_tracing_enabled {
log::info!("All ray tracing extensions are supported");

device_extension_names.extend(ray_tracing_extensions.iter());
}

if pdevice.presentation_requested {
device_extension_names.push(khr::Swapchain::name().as_ptr());
}

unsafe {
for &ext in &device_extension_names {
let ext = std::ffi::CStr::from_ptr(ext).to_string_lossy();
if !supported_extensions.contains(ext.as_ref()) {
panic!("Device extension not supported: {}", ext);
}
}
};
}

let priorities = [1.0];

Expand Down Expand Up @@ -264,11 +279,9 @@ impl Device {
let mut get_buffer_device_address_features =
ash::vk::PhysicalDeviceBufferDeviceAddressFeatures::default();

#[cfg(feature = "ray-tracing")]
let mut acceleration_structure_features =
ash::vk::PhysicalDeviceAccelerationStructureFeaturesKHR::default();

#[cfg(feature = "ray-tracing")]
let mut ray_tracing_pipeline_features =
ash::vk::PhysicalDeviceRayTracingPipelineFeaturesKHR::default();

Expand All @@ -283,8 +296,7 @@ impl Device {
.push_next(&mut vulkan_memory_model)
.push_next(&mut get_buffer_device_address_features);

#[cfg(feature = "ray-tracing")]
{
if ray_tracing_enabled {
features2 = features2
.push_next(&mut acceleration_structure_features)
.push_next(&mut ray_tracing_pipeline_features);
Expand All @@ -311,9 +323,7 @@ impl Device {

assert!(descriptor_indexing.shader_uniform_texel_buffer_array_dynamic_indexing != 0);
assert!(descriptor_indexing.shader_storage_texel_buffer_array_dynamic_indexing != 0);
assert!(descriptor_indexing.shader_uniform_buffer_array_non_uniform_indexing != 0);
assert!(descriptor_indexing.shader_sampled_image_array_non_uniform_indexing != 0);
assert!(descriptor_indexing.shader_storage_buffer_array_non_uniform_indexing != 0);
assert!(descriptor_indexing.shader_storage_image_array_non_uniform_indexing != 0);
assert!(descriptor_indexing.shader_uniform_texel_buffer_array_non_uniform_indexing != 0);
assert!(descriptor_indexing.shader_storage_texel_buffer_array_non_uniform_indexing != 0);
Expand All @@ -327,18 +337,20 @@ impl Device {

assert!(shader_float16_int8.shader_int8 != 0);

assert!(vulkan_memory_model.vulkan_memory_model != 0);
if ray_tracing_enabled {
assert!(descriptor_indexing.shader_uniform_buffer_array_non_uniform_indexing != 0);
assert!(descriptor_indexing.shader_storage_buffer_array_non_uniform_indexing != 0);

assert!(vulkan_memory_model.vulkan_memory_model != 0);

#[cfg(feature = "ray-tracing")]
{
assert!(acceleration_structure_features.acceleration_structure != 0);
assert!(acceleration_structure_features.descriptor_binding_acceleration_structure_update_after_bind != 0);

assert!(ray_tracing_pipeline_features.ray_tracing_pipeline != 0);
assert!(ray_tracing_pipeline_features.ray_tracing_pipeline_trace_rays_indirect != 0);
}

assert!(get_buffer_device_address_features.buffer_device_address != 0);
assert!(get_buffer_device_address_features.buffer_device_address != 0);
}
}

let device_create_info = vk::DeviceCreateInfo::builder()
Expand Down Expand Up @@ -412,6 +424,7 @@ impl Device {
Mutex::new(Arc::new(frame1)),
//Mutex::new(Arc::new(frame2)),
],
ray_tracing_enabled,
}))
}
}
Expand Down Expand Up @@ -601,6 +614,20 @@ impl Device {
pub fn debug_utils(&self) -> Option<&DebugUtils> {
self.instance.debug_utils.as_ref()
}

pub fn max_bindless_descriptor_count(&self) -> u32 {
(512 * 1024).min(
self.pdevice
.properties
.limits
.max_per_stage_descriptor_sampled_images
- RESERVED_DESCRIPTOR_COUNT,
)
}

pub fn ray_tracing_enabled(&self) -> bool {
self.ray_tracing_enabled
}
}

impl Drop for Device {
Expand Down
3 changes: 1 addition & 2 deletions crates/lib/kajiya-backend/src/vulkan/shader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ use std::{
};

pub const MAX_DESCRIPTOR_SETS: usize = 4;
pub const MAX_BINDLESS_DESCRIPTOR_COUNT: usize = 512 * 1024;

type DescriptorSetLayout = HashMap<u32, rspirv_reflect::DescriptorInfo>;
type StageDescriptorSetLayouts = HashMap<u32, DescriptorSetLayout>;
Expand Down Expand Up @@ -196,7 +195,7 @@ pub fn create_descriptor_set_layouts(
rspirv_reflect::DescriptorDimensionality::Single => 1,
rspirv_reflect::DescriptorDimensionality::Array(size) => size,
rspirv_reflect::DescriptorDimensionality::RuntimeArray => {
MAX_BINDLESS_DESCRIPTOR_COUNT as u32
device.max_bindless_descriptor_count()
}
};

Expand Down
5 changes: 0 additions & 5 deletions crates/lib/kajiya-rg/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,3 @@ log = "0.4"
parking_lot = "0.11"
puffin = "0.11.0"
turbosloth = { git = "https://github.com/h3r2tic/turbosloth.git", rev = "92030af" }

[features]
default = ["ray-tracing"]
#default = []
ray-tracing = []
4 changes: 4 additions & 0 deletions crates/lib/kajiya-rg/src/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ impl TemporalRenderGraph {
temporal_state: state,
}
}

pub fn device(&self) -> &Device {
self.device.as_ref()
}
}

pub trait GetOrCreateTemporal<Desc: ResourceDesc> {
Expand Down
8 changes: 4 additions & 4 deletions crates/lib/kajiya/src/bindless_descriptor_set.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::HashMap;

use kajiya_backend::{ash::vk, rspirv_reflect, vulkan::device, MAX_BINDLESS_DESCRIPTOR_COUNT};
use kajiya_backend::{ash::vk, rspirv_reflect, vulkan::device};

lazy_static::lazy_static! {
pub static ref BINDLESS_DESCRIPTOR_SET_LAYOUT: HashMap<u32, rspirv_reflect::DescriptorInfo> = [
Expand Down Expand Up @@ -60,7 +60,7 @@ pub fn create_bindless_descriptor_set(device: &device::Device) -> vk::Descriptor
.build(),
vk::DescriptorSetLayoutBinding::builder()
.binding(2)
.descriptor_count(MAX_BINDLESS_DESCRIPTOR_COUNT as _)
.descriptor_count(device.max_bindless_descriptor_count() as _)
.descriptor_type(vk::DescriptorType::SAMPLED_IMAGE)
.stage_flags(vk::ShaderStageFlags::ALL)
.build(),
Expand All @@ -80,7 +80,7 @@ pub fn create_bindless_descriptor_set(device: &device::Device) -> vk::Descriptor
},
vk::DescriptorPoolSize {
ty: vk::DescriptorType::SAMPLED_IMAGE,
descriptor_count: MAX_BINDLESS_DESCRIPTOR_COUNT as _,
descriptor_count: device.max_bindless_descriptor_count() as _,
},
];

Expand All @@ -95,7 +95,7 @@ pub fn create_bindless_descriptor_set(device: &device::Device) -> vk::Descriptor
.unwrap()
};

let variable_descriptor_count = MAX_BINDLESS_DESCRIPTOR_COUNT as _;
let variable_descriptor_count = device.max_bindless_descriptor_count() as _;
let mut variable_descriptor_count_allocate_info =
vk::DescriptorSetVariableDescriptorCountAllocateInfo::builder()
.descriptor_counts(std::slice::from_ref(&variable_descriptor_count))
Expand Down
4 changes: 3 additions & 1 deletion crates/lib/kajiya/src/default_world_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ impl WorldRenderer {
}

// Build an empty TLAS to create the resources. We'll update it at runtime.
world_renderer.build_ray_tracing_top_level_acceleration();
if backend.device.ray_tracing_enabled() {
world_renderer.build_ray_tracing_top_level_acceleration();
}

Ok(world_renderer)
}
Expand Down
Loading

0 comments on commit 79b7a74

Please sign in to comment.