From 6c452f38f49f12bffe0125414415eed2011c342c Mon Sep 17 00:00:00 2001 From: Nathan Adams Date: Sat, 14 Jan 2023 01:15:29 +0100 Subject: [PATCH 1/5] Cache programs in GLES backend by their stage info, to avoid recreating the same program untold many times --- CHANGELOG.md | 1 + wgpu-hal/src/gles/adapter.rs | 2 + wgpu-hal/src/gles/device.rs | 109 ++++++++++++++++++++++++++--------- wgpu-hal/src/gles/mod.rs | 25 +++++++- 4 files changed, 109 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90f7335877..68b94618c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -135,6 +135,7 @@ Additionally `Surface::get_default_config` now returns an Option and returns Non - Browsers that support `OVR_multiview2` now report the `MULTIVIEW` feature by @expenses in [#3121](https://github.com/gfx-rs/wgpu/pull/3121). - `Limits::max_push_constant_size` on GLES is now 256 by @Dinnerbone in [#3374](https://github.com/gfx-rs/wgpu/pull/3374). +- Creating multiple pipelines with the same shaders will now be faster, by @Dinnerbone in [#3380](https://github.com/gfx-rs/wgpu/pull/3380). #### Vulkan diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 833ae36d2d..ad7cdd475d 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -578,6 +578,8 @@ impl super::Adapter { features, shading_language_version, max_texture_size, + next_shader_id: Default::default(), + program_cache: Default::default(), }), }, info: Self::make_info(vendor, renderer), diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index a4eb09a974..8d7e141f5a 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -3,12 +3,14 @@ use crate::auxil::map_naga_stage; use glow::HasContext; use std::{ convert::TryInto, - iter, ptr, + ptr, sync::{Arc, Mutex}, }; +use arrayvec::ArrayVec; #[cfg(not(target_arch = "wasm32"))] use std::mem; +use std::sync::atomic::{AtomicU32, Ordering}; type ShaderStage<'a> = ( naga::ShaderStage, @@ -265,14 +267,60 @@ impl super::Device { unsafe { Self::compile_shader(gl, &output, naga_stage, stage.module.label.as_deref()) } } - unsafe fn create_pipeline<'a, I: Iterator>>( + unsafe fn create_pipeline<'a>( &self, gl: &glow::Context, - shaders: I, + shaders: ArrayVec, 3>, layout: &super::PipelineLayout, #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, multiview: Option, - ) -> Result { + ) -> Result, crate::PipelineError> { + let mut program_stages = ArrayVec::new(); + let mut group_to_binding_to_slot = Vec::with_capacity(layout.group_infos.len()); + for group in &*layout.group_infos { + group_to_binding_to_slot.push(group.binding_to_slot.clone()); + } + for &(naga_stage, stage) in &shaders { + program_stages.push(super::ProgramStage { + naga_stage: naga_stage.to_owned(), + shader_id: stage.module.id, + entry_point: stage.entry_point.to_owned(), + }); + } + let glsl_version = match self.shared.shading_language_version { + naga::back::glsl::Version::Embedded { version, .. } => version, + naga::back::glsl::Version::Desktop(_) => unreachable!(), + }; + let mut guard = self.shared.program_cache.lock(); + let program = guard + .entry((program_stages, group_to_binding_to_slot.into_boxed_slice())) + .or_insert_with(|| unsafe { + Self::create_program( + gl, + shaders, + layout, + label, + multiview, + glsl_version, + self.shared.private_caps, + ) + }) + .to_owned()?; + program.ref_count.fetch_add(1, Ordering::Relaxed); + drop(guard); + + Ok(program) + } + + unsafe fn create_program<'a>( + gl: &glow::Context, + shaders: ArrayVec, 3>, + layout: &super::PipelineLayout, + #[cfg_attr(target_arch = "wasm32", allow(unused))] label: Option<&str>, + multiview: Option, + glsl_version: u16, + private_caps: super::PrivateCapabilities, + ) -> Result, crate::PipelineError> { let program = unsafe { gl.create_program() }.unwrap(); #[cfg(not(target_arch = "wasm32"))] if let Some(label) = label { @@ -302,11 +350,7 @@ impl super::Device { // Create empty fragment shader if only vertex shader is present if has_stages == wgt::ShaderStages::VERTEX { - let version = match self.shared.shading_language_version { - naga::back::glsl::Version::Embedded { version, .. } => version, - naga::back::glsl::Version::Desktop(_) => unreachable!(), - }; - let shader_src = format!("#version {} es \n void main(void) {{}}", version,); + let shader_src = format!("#version {} es \n void main(void) {{}}", glsl_version,); log::info!("Only vertex shader is present. Creating an empty fragment shader",); let shader = unsafe { Self::compile_shader( @@ -339,11 +383,7 @@ impl super::Device { log::warn!("\tLink: {}", msg); } - if !self - .shared - .private_caps - .contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) - { + if !private_caps.contains(super::PrivateCapabilities::SHADER_BINDING_LAYOUT) { // This remapping is only needed if we aren't able to put the binding layout // in the shader. We can't remap storage buffers this way. unsafe { gl.use_program(Some(program)) }; @@ -403,11 +443,12 @@ impl super::Device { } } - Ok(super::PipelineInner { + Ok(Arc::new(super::PipelineInner { program, sampler_map, uniforms, - }) + ref_count: AtomicU32::new(0), + })) } } @@ -1059,6 +1100,7 @@ impl crate::Device for super::Device { crate::ShaderInput::Naga(naga) => naga, }, label: desc.label.map(|str| str.to_string()), + id: self.shared.next_shader_id.fetch_add(1, Ordering::Relaxed), }) } unsafe fn destroy_shader_module(&self, _module: super::ShaderModule) {} @@ -1068,11 +1110,11 @@ impl crate::Device for super::Device { desc: &crate::RenderPipelineDescriptor, ) -> Result { let gl = &self.shared.context.lock(); - let shaders = iter::once((naga::ShaderStage::Vertex, &desc.vertex_stage)).chain( - desc.fragment_stage - .as_ref() - .map(|fs| (naga::ShaderStage::Fragment, fs)), - ); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Vertex, &desc.vertex_stage)); + if let Some(ref fs) = desc.fragment_stage { + shaders.push((naga::ShaderStage::Fragment, fs)); + } let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, desc.multiview) }?; @@ -1133,8 +1175,15 @@ impl crate::Device for super::Device { }) } unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { - let gl = &self.shared.context.lock(); - unsafe { gl.delete_program(pipeline.inner.program) }; + let mut program_cache = self.shared.program_cache.lock(); + if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } } unsafe fn create_compute_pipeline( @@ -1142,14 +1191,22 @@ impl crate::Device for super::Device { desc: &crate::ComputePipelineDescriptor, ) -> Result { let gl = &self.shared.context.lock(); - let shaders = iter::once((naga::ShaderStage::Compute, &desc.stage)); + let mut shaders = ArrayVec::new(); + shaders.push((naga::ShaderStage::Compute, &desc.stage)); let inner = unsafe { self.create_pipeline(gl, shaders, desc.layout, desc.label, None) }?; Ok(super::ComputePipeline { inner }) } unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { - let gl = &self.shared.context.lock(); - unsafe { gl.delete_program(pipeline.inner.program) }; + let mut program_cache = self.shared.program_cache.lock(); + if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 { + program_cache.retain(|_, v| match *v { + Ok(ref p) => p.program != pipeline.inner.program, + Err(_) => false, + }); + let gl = &self.shared.context.lock(); + unsafe { gl.delete_program(pipeline.inner.program) }; + } } #[cfg_attr(target_arch = "wasm32", allow(unused))] diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 6ba7f89e12..b95c84fce7 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -84,6 +84,9 @@ use arrayvec::ArrayVec; use glow::HasContext; +use naga::FastHashMap; +use parking_lot::Mutex; +use std::sync::atomic::AtomicU32; use std::{fmt, ops::Range, sync::Arc}; #[derive(Clone)] @@ -197,6 +200,8 @@ struct AdapterShared { workarounds: Workarounds, shading_language_version: naga::back::glsl::Version, max_texture_size: u32, + next_shader_id: AtomicU32, + program_cache: Mutex, } pub struct Adapter { @@ -405,10 +410,13 @@ pub struct BindGroup { contents: Box<[RawBinding]>, } +type ShaderId = u32; + #[derive(Debug)] pub struct ShaderModule { naga: crate::NagaShader, label: Option, + id: ShaderId, } #[derive(Clone, Debug, Default)] @@ -468,6 +476,7 @@ struct PipelineInner { program: glow::Program, sampler_map: SamplerBindMap, uniforms: [UniformDesc; MAX_PUSH_CONSTANTS], + ref_count: AtomicU32, } #[derive(Clone, Debug)] @@ -495,8 +504,20 @@ struct ColorTargetDesc { blend: Option, } +#[derive(PartialEq, Eq, Hash)] +struct ProgramStage { + naga_stage: naga::ShaderStage, + shader_id: ShaderId, + entry_point: String, +} + +type ProgramCache = FastHashMap< + (ArrayVec, Box<[Box<[u8]>]>), + Result, crate::PipelineError>, +>; + pub struct RenderPipeline { - inner: PipelineInner, + inner: Arc, primitive: wgt::PrimitiveState, vertex_buffers: Box<[VertexBufferDesc]>, vertex_attributes: Box<[AttributeDesc]>, @@ -514,7 +535,7 @@ unsafe impl Send for RenderPipeline {} unsafe impl Sync for RenderPipeline {} pub struct ComputePipeline { - inner: PipelineInner, + inner: Arc, } // SAFE: WASM doesn't have threads From a177b414b3a096bdca55930e643af7ebb174ad71 Mon Sep 17 00:00:00 2001 From: Nathan Adams Date: Tue, 17 Jan 2023 15:35:29 +0100 Subject: [PATCH 2/5] Don't duplicate an arcs ref count in gles programs --- wgpu-hal/src/gles/device.rs | 10 +++++----- wgpu-hal/src/gles/mod.rs | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 8d7e141f5a..e4e13f5106 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -10,7 +10,7 @@ use std::{ use arrayvec::ArrayVec; #[cfg(not(target_arch = "wasm32"))] use std::mem; -use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::atomic::Ordering; type ShaderStage<'a> = ( naga::ShaderStage, @@ -306,7 +306,6 @@ impl super::Device { ) }) .to_owned()?; - program.ref_count.fetch_add(1, Ordering::Relaxed); drop(guard); Ok(program) @@ -447,7 +446,6 @@ impl super::Device { program, sampler_map, uniforms, - ref_count: AtomicU32::new(0), })) } } @@ -1176,7 +1174,8 @@ impl crate::Device for super::Device { } unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { let mut program_cache = self.shared.program_cache.lock(); - if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 { + // 1 means we're the last thing holding this, so we can safely clean it up + if Arc::strong_count(&pipeline.inner) == 1 { program_cache.retain(|_, v| match *v { Ok(ref p) => p.program != pipeline.inner.program, Err(_) => false, @@ -1199,7 +1198,8 @@ impl crate::Device for super::Device { } unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { let mut program_cache = self.shared.program_cache.lock(); - if pipeline.inner.ref_count.fetch_sub(1, Ordering::Relaxed) == 1 { + // 1 means we're the last thing holding this, so we can safely clean it up + if Arc::strong_count(&pipeline.inner) == 1 { program_cache.retain(|_, v| match *v { Ok(ref p) => p.program != pipeline.inner.program, Err(_) => false, diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index b95c84fce7..8cbaff2afb 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -476,7 +476,6 @@ struct PipelineInner { program: glow::Program, sampler_map: SamplerBindMap, uniforms: [UniformDesc; MAX_PUSH_CONSTANTS], - ref_count: AtomicU32, } #[derive(Clone, Debug)] From 77a69dd8f4b5148c9e469bfb11f4d09fff025842 Mon Sep 17 00:00:00 2001 From: Nathan Adams Date: Tue, 17 Jan 2023 15:40:29 +0100 Subject: [PATCH 3/5] Extract ProgramCacheKey from gles ProgramCache --- wgpu-hal/src/gles/device.rs | 5 ++++- wgpu-hal/src/gles/mod.rs | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index e4e13f5106..a877082a7f 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -293,7 +293,10 @@ impl super::Device { }; let mut guard = self.shared.program_cache.lock(); let program = guard - .entry((program_stages, group_to_binding_to_slot.into_boxed_slice())) + .entry(super::ProgramCacheKey { + stages: program_stages, + group_to_binding_to_slot: group_to_binding_to_slot.into_boxed_slice(), + }) .or_insert_with(|| unsafe { Self::create_program( gl, diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 8cbaff2afb..31b11a187d 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -510,10 +510,13 @@ struct ProgramStage { entry_point: String, } -type ProgramCache = FastHashMap< - (ArrayVec, Box<[Box<[u8]>]>), - Result, crate::PipelineError>, ->; +#[derive(PartialEq, Eq, Hash)] +struct ProgramCacheKey { + stages: ArrayVec, + group_to_binding_to_slot: Box<[Box<[u8]>]>, +} + +type ProgramCache = FastHashMap, crate::PipelineError>>; pub struct RenderPipeline { inner: Arc, From 9c5c376fc8bac2c83611701a25f2172a2af1b9aa Mon Sep 17 00:00:00 2001 From: Nathan Adams Date: Wed, 18 Jan 2023 22:01:16 +0100 Subject: [PATCH 4/5] gles: Panic if we can't acquire program_cache lock --- wgpu-hal/src/gles/device.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index a877082a7f..e6cdf91f5e 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -291,7 +291,11 @@ impl super::Device { naga::back::glsl::Version::Embedded { version, .. } => version, naga::back::glsl::Version::Desktop(_) => unreachable!(), }; - let mut guard = self.shared.program_cache.lock(); + let mut guard = self + .shared + .program_cache + .try_lock() + .expect("Couldn't acquire program_cache lock"); let program = guard .entry(super::ProgramCacheKey { stages: program_stages, From ecae0d9b16814a4c5993b562f91249efcc668b83 Mon Sep 17 00:00:00 2001 From: Nathan Adams Date: Fri, 20 Jan 2023 00:21:13 +0100 Subject: [PATCH 5/5] Clarify why the program cache is safe --- wgpu-hal/src/gles/device.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index e6cdf91f5e..f75ee59a10 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -296,6 +296,8 @@ impl super::Device { .program_cache .try_lock() .expect("Couldn't acquire program_cache lock"); + // This guard ensures that we can't accidentally destroy a program whilst we're about to reuse it + // The only place that destroys a pipeline is also locking on `program_cache` let program = guard .entry(super::ProgramCacheKey { stages: program_stages, @@ -1181,8 +1183,11 @@ impl crate::Device for super::Device { } unsafe fn destroy_render_pipeline(&self, pipeline: super::RenderPipeline) { let mut program_cache = self.shared.program_cache.lock(); - // 1 means we're the last thing holding this, so we can safely clean it up - if Arc::strong_count(&pipeline.inner) == 1 { + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache` + // This is safe to assume as long as: + // - `RenderPipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { program_cache.retain(|_, v| match *v { Ok(ref p) => p.program != pipeline.inner.program, Err(_) => false, @@ -1205,8 +1210,11 @@ impl crate::Device for super::Device { } unsafe fn destroy_compute_pipeline(&self, pipeline: super::ComputePipeline) { let mut program_cache = self.shared.program_cache.lock(); - // 1 means we're the last thing holding this, so we can safely clean it up - if Arc::strong_count(&pipeline.inner) == 1 { + // If the pipeline only has 2 strong references remaining, they're `pipeline` and `program_cache`` + // This is safe to assume as long as: + // - `ComputePipeline` can't be cloned + // - The only place that we can get a new reference is during `program_cache.lock()` + if Arc::strong_count(&pipeline.inner) == 2 { program_cache.retain(|_, v| match *v { Ok(ref p) => p.program != pipeline.inner.program, Err(_) => false,