From 3bdad8bd77ccd7e5bc81649b6c8ece060d7f01c4 Mon Sep 17 00:00:00 2001 From: Zoxc Date: Tue, 24 Oct 2023 03:26:23 +0200 Subject: [PATCH] Add timestamp support to OpenGL (#4267) --- Cargo.lock | 4 +- wgpu-hal/Cargo.toml | 2 +- wgpu-hal/src/gles/adapter.rs | 54 ++++++++++------- wgpu-hal/src/gles/command.rs | 34 ++++++++++- wgpu-hal/src/gles/device.rs | 9 +++ wgpu-hal/src/gles/egl.rs | 2 +- wgpu-hal/src/gles/mod.rs | 3 + wgpu-hal/src/gles/queue.rs | 103 ++++++++++++++++++++++++++------- wgpu-hal/src/gles/wgl.rs | 2 +- wgpu-hal/src/vulkan/adapter.rs | 3 +- wgpu-types/src/lib.rs | 17 ++++++ 11 files changed, 183 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e92b252960..2da917d60c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1330,9 +1330,9 @@ checksum = "b5418c17512bdf42730f9032c74e1ae39afc408745ebb2acf72fbc4691c17945" [[package]] name = "glow" -version = "0.12.3" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca0fe580e4b60a8ab24a868bc08e2f03cbcb20d3d676601fa909386713333728" +checksum = "886c2a30b160c4c6fec8f987430c26b526b7988ca71f664e6a699ddf6f9601e4" dependencies = [ "js-sys", "slotmap", diff --git a/wgpu-hal/Cargo.toml b/wgpu-hal/Cargo.toml index 0dd4fe8ca6..65221327ac 100644 --- a/wgpu-hal/Cargo.toml +++ b/wgpu-hal/Cargo.toml @@ -67,7 +67,7 @@ rustc-hash = "1.1" log = "0.4" # backend: Gles -glow = { version = "0.12.3", optional = true } +glow = { version = "0.13", optional = true } [dependencies.wgt] package = "wgpu-types" diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 6f47cba228..e8cf3d270d 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -228,6 +228,28 @@ impl super::Adapter { return None; } + if let Some(es_ver) = es_ver { + if es_ver < (3, 0) { + log::warn!( + "Returned GLES context is {}.{}, when 3.0+ was requested", + es_ver.0, + es_ver.1 + ); + return None; + } + } + + if let Some(full_ver) = full_ver { + if full_ver < (3, 3) { + log::warn!( + "Returned GL context is {}.{}, when 3.3+ is needed", + full_ver.0, + full_ver.1 + ); + return None; + } + } + let shading_language_version = { let sl_version = unsafe { gl.get_parameter_string(glow::SHADING_LANGUAGE_VERSION) }; log::trace!("SL version: {}", &sl_version); @@ -251,28 +273,6 @@ impl super::Adapter { log::trace!("Supported GL Extensions: {:#?}", extensions); - if let Some(es_ver) = es_ver { - if es_ver < (3, 0) { - log::warn!( - "Returned GLES context is {}.{}, when 3.0+ was requested", - es_ver.0, - es_ver.1 - ); - return None; - } - } - - if let Some(full_ver) = full_ver { - if full_ver < (3, 3) { - log::warn!( - "Returned GL context is {}.{}, when 3.3+ is needed", - full_ver.0, - full_ver.1 - ); - return None; - } - } - let supported = |(req_es_major, req_es_minor), (req_full_major, req_full_minor)| { let es_supported = es_ver .map(|es_ver| es_ver >= (req_es_major, req_es_minor)) @@ -411,6 +411,11 @@ impl super::Adapter { wgt::DownlevelFlags::MULTISAMPLED_SHADING, supported((3, 2), (4, 0)) || extensions.contains("OES_sample_variables"), ); + let query_buffers = extensions.contains("GL_ARB_query_buffer_object") + || extensions.contains("GL_AMD_query_buffer_object"); + if query_buffers { + downlevel_flags.set(wgt::DownlevelFlags::NONBLOCKING_QUERY_RESOLVE, true); + } let mut features = wgt::Features::empty() | wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES @@ -450,6 +455,10 @@ impl super::Adapter { supported((3, 1), (4, 2)) || extensions.contains("GL_ARB_shader_image_load_store"), ); features.set(wgt::Features::SHADER_UNUSED_VERTEX_OUTPUT, true); + if extensions.contains("GL_ARB_timer_query") { + features.set(wgt::Features::TIMESTAMP_QUERY, true); + features.set(wgt::Features::TIMESTAMP_QUERY_INSIDE_PASSES, true); + } let gl_bcn_exts = [ "GL_EXT_texture_compression_s3tc", "GL_EXT_texture_compression_rgtc", @@ -574,6 +583,7 @@ impl super::Adapter { extensions.contains("OES_texture_float_linear") }, ); + private_caps.set(super::PrivateCapabilities::QUERY_BUFFERS, query_buffers); let max_texture_size = unsafe { gl.get_parameter_i32(glow::MAX_TEXTURE_SIZE) } as u32; let max_texture_3d_size = unsafe { gl.get_parameter_i32(glow::MAX_3D_TEXTURE_SIZE) } as u32; diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 89eae94320..27a9e11fdc 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -31,6 +31,7 @@ pub(super) struct State { dirty_vbuf_mask: usize, active_first_instance: u32, push_offset_to_uniform: ArrayVec, + end_of_pass_timestamp: Option, } impl super::CommandBuffer { @@ -409,8 +410,9 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn end_query(&mut self, set: &super::QuerySet, _index: u32) { self.cmd_buffer.commands.push(C::EndQuery(set.target)); } - unsafe fn write_timestamp(&mut self, _set: &super::QuerySet, _index: u32) { - unimplemented!() + unsafe fn write_timestamp(&mut self, set: &super::QuerySet, index: u32) { + let query = set.queries[index as usize]; + self.cmd_buffer.commands.push(C::TimestampQuery(query)); } unsafe fn reset_queries(&mut self, _set: &super::QuerySet, _range: Range) { //TODO: what do we do here? @@ -439,6 +441,16 @@ impl crate::CommandEncoder for super::CommandEncoder { // render unsafe fn begin_render_pass(&mut self, desc: &crate::RenderPassDescriptor) { + debug_assert!(self.state.end_of_pass_timestamp.is_none()); + if let Some(ref t) = desc.timestamp_writes { + if let Some(index) = t.beginning_of_pass_write_index { + unsafe { self.write_timestamp(t.query_set, index) } + } + self.state.end_of_pass_timestamp = t + .end_of_pass_write_index + .map(|index| t.query_set.queries[index as usize]); + } + self.state.render_size = desc.extent; self.state.resolve_attachments.clear(); self.state.invalidate_attachments.clear(); @@ -623,6 +635,10 @@ impl crate::CommandEncoder for super::CommandEncoder { } self.state.vertex_attributes.clear(); self.state.primitive = super::PrimitiveState::default(); + + if let Some(query) = self.state.end_of_pass_timestamp.take() { + self.cmd_buffer.commands.push(C::TimestampQuery(query)); + } } unsafe fn set_bind_group( @@ -1030,6 +1046,16 @@ impl crate::CommandEncoder for super::CommandEncoder { // compute unsafe fn begin_compute_pass(&mut self, desc: &crate::ComputePassDescriptor) { + debug_assert!(self.state.end_of_pass_timestamp.is_none()); + if let Some(ref t) = desc.timestamp_writes { + if let Some(index) = t.beginning_of_pass_write_index { + unsafe { self.write_timestamp(t.query_set, index) } + } + self.state.end_of_pass_timestamp = t + .end_of_pass_write_index + .map(|index| t.query_set.queries[index as usize]); + } + if let Some(label) = desc.label { let range = self.cmd_buffer.add_marker(label); self.cmd_buffer.commands.push(C::PushDebugGroup(range)); @@ -1041,6 +1067,10 @@ impl crate::CommandEncoder for super::CommandEncoder { self.cmd_buffer.commands.push(C::PopDebugGroup); self.state.has_pass_label = false; } + + if let Some(query) = self.state.end_of_pass_timestamp.take() { + self.cmd_buffer.commands.push(C::TimestampQuery(query)); + } } unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index f77857e67f..fe664c3cfe 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1224,6 +1224,14 @@ impl crate::Device for super::Device { if gl.supports_debug() { use std::fmt::Write; + // Initialize the query so we can label it + match desc.ty { + wgt::QueryType::Timestamp => unsafe { + gl.query_counter(query, glow::TIMESTAMP) + }, + _ => (), + } + if let Some(label) = desc.label { temp_string.clear(); let _ = write!(temp_string, "{label}[{i}]"); @@ -1238,6 +1246,7 @@ impl crate::Device for super::Device { queries: queries.into_boxed_slice(), target: match desc.ty { wgt::QueryType::Occlusion => glow::ANY_SAMPLES_PASSED_CONSERVATIVE, + wgt::QueryType::Timestamp => glow::TIMESTAMP, _ => unimplemented!(), }, }) diff --git a/wgpu-hal/src/gles/egl.rs b/wgpu-hal/src/gles/egl.rs index 5332e92778..1a2250eccf 100644 --- a/wgpu-hal/src/gles/egl.rs +++ b/wgpu-hal/src/gles/egl.rs @@ -946,7 +946,7 @@ impl crate::Instance for Instance { let inner = self.inner.lock(); inner.egl.make_current(); - let gl = unsafe { + let mut gl = unsafe { glow::Context::from_loader_function(|name| { inner .egl diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 9f3a0c7a55..c46dabb96f 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -161,6 +161,8 @@ bitflags::bitflags! { const COLOR_BUFFER_FLOAT = 1 << 9; /// Supports linear flitering `f32` textures. const TEXTURE_FLOAT_LINEAR = 1 << 10; + /// Supports query buffer objects. + const QUERY_BUFFERS = 1 << 11; } } @@ -775,6 +777,7 @@ enum Command { SetIndexBuffer(glow::Buffer), BeginQuery(glow::Query, BindTarget), EndQuery(BindTarget), + TimestampQuery(glow::Query), CopyQueryResults { query_range: Range, dst: Buffer, diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 22e1d26ce1..2c57c1f52c 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -1,4 +1,4 @@ -use super::{conv::is_layered_target, Command as C}; +use super::{conv::is_layered_target, Command as C, PrivateCapabilities}; use arrayvec::ArrayVec; use glow::HasContext; use std::{mem, slice, sync::Arc}; @@ -808,34 +808,97 @@ impl super::Queue { C::EndQuery(target) => { unsafe { gl.end_query(target) }; } + C::TimestampQuery(query) => { + unsafe { gl.query_counter(query, glow::TIMESTAMP) }; + } C::CopyQueryResults { ref query_range, ref dst, dst_target, dst_offset, } => { - self.temp_query_results.clear(); - for &query in queries[query_range.start as usize..query_range.end as usize].iter() { - let result = unsafe { gl.get_query_parameter_u32(query, glow::QUERY_RESULT) }; - self.temp_query_results.push(result as u64); - } - let query_data = unsafe { - slice::from_raw_parts( - self.temp_query_results.as_ptr() as *const u8, - self.temp_query_results.len() * mem::size_of::(), - ) - }; - match dst.raw { - Some(buffer) => { - unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + if self + .shared + .private_caps + .contains(PrivateCapabilities::QUERY_BUFFERS) + && dst.raw.is_some() + { + unsafe { + // We're assuming that the only relevant queries are 8 byte timestamps or + // occlusion tests. + let query_size = 8; + + let query_range_size = query_size * query_range.len(); + + let buffer = gl.create_buffer().ok(); + gl.bind_buffer(glow::QUERY_BUFFER, buffer); + gl.buffer_data_size( + glow::QUERY_BUFFER, + query_range_size as _, + glow::STREAM_COPY, + ); + + for (i, &query) in queries + [query_range.start as usize..query_range.end as usize] + .iter() + .enumerate() + { + gl.get_query_parameter_u64_with_offset( + query, + glow::QUERY_RESULT, + query_size * i, + ) + } + gl.bind_buffer(dst_target, dst.raw); + gl.copy_buffer_sub_data( + glow::QUERY_BUFFER, + dst_target, + 0, + dst_offset as _, + query_range_size as _, + ); + if let Some(buffer) = buffer { + gl.delete_buffer(buffer) + } + } + } else { + self.temp_query_results.clear(); + for &query in + queries[query_range.start as usize..query_range.end as usize].iter() + { + let mut result: u64 = 0; unsafe { - gl.buffer_sub_data_u8_slice(dst_target, dst_offset as i32, query_data) + let result: *mut u64 = &mut result; + gl.get_query_parameter_u64_with_offset( + query, + glow::QUERY_RESULT, + result as usize, + ) }; + self.temp_query_results.push(result); } - None => { - let data = &mut dst.data.as_ref().unwrap().lock().unwrap(); - let len = query_data.len().min(data.len()); - data[..len].copy_from_slice(&query_data[..len]); + let query_data = unsafe { + slice::from_raw_parts( + self.temp_query_results.as_ptr() as *const u8, + self.temp_query_results.len() * mem::size_of::(), + ) + }; + match dst.raw { + Some(buffer) => { + unsafe { gl.bind_buffer(dst_target, Some(buffer)) }; + unsafe { + gl.buffer_sub_data_u8_slice( + dst_target, + dst_offset as i32, + query_data, + ) + }; + } + None => { + let data = &mut dst.data.as_ref().unwrap().lock().unwrap(); + let len = query_data.len().min(data.len()); + data[..len].copy_from_slice(&query_data[..len]); + } } } } diff --git a/wgpu-hal/src/gles/wgl.rs b/wgpu-hal/src/gles/wgl.rs index 677b7c55cc..84f100c0cb 100644 --- a/wgpu-hal/src/gles/wgl.rs +++ b/wgpu-hal/src/gles/wgl.rs @@ -387,7 +387,7 @@ impl crate::Instance for Instance { ) })?; - let gl = unsafe { + let mut gl = unsafe { glow::Context::from_loader_function(|name| load_gl_func(name, Some(opengl_module))) }; diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 5ad8e40023..fd62473fd7 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -326,7 +326,8 @@ impl PhysicalDeviceFeatures { | Df::UNRESTRICTED_INDEX_BUFFER | Df::INDIRECT_EXECUTION | Df::VIEW_FORMATS - | Df::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES; + | Df::UNRESTRICTED_EXTERNAL_TEXTURE_COPIES + | Df::NONBLOCKING_QUERY_RESOLVE; dl_flags.set( Df::SURFACE_VIEW_FORMATS, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index ec968f9e31..82989598ef 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1463,6 +1463,23 @@ bitflags::bitflags! { /// /// The GLES/WebGL and Vulkan on Android doesn't support this. const SURFACE_VIEW_FORMATS = 1 << 21; + + /// If this is true, calls to `CommandEncoder::resolve_query_set` will be performed on the queue timeline. + /// + /// If this is false, calls to `CommandEncoder::resolve_query_set` will be performed on the device (i.e. cpu) timeline + /// and will block that timeline until the query has data. You may work around this limitation by waiting until the submit + /// whose queries you are resolving is fully finished (through use of `queue.on_submitted_work_done`) and only + /// then submitting the resolve_query_set command. The queries will be guarenteed finished, so will not block. + /// + /// Supported by: + /// - Vulkan, + /// - DX12 + /// - Metal + /// - OpenGL 4.4+ + /// + /// Not Supported by: + /// - GL ES / WebGL + const NONBLOCKING_QUERY_RESOLVE = 1 << 22; } }