"fix" timestamp conversion on metal, comments to clarify the situatio…

…n on timestamp periods generally
gfx-rs · May 1, 2023 · 78e7896 · 78e7896
1 parent 88c7da8
commit 78e7896
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 20 deletions.
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
@@ -27,13 +27,41 @@ impl crate::Adapter<super::Api> for super::Adapter {
             .device
             .lock()
             .new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS);
+
+        // Acquiring the meaning of timestamp ticks is hard with Metal!
+        // The only thing there is is a method correlating cpu & gpu timestamps (`device.sample_timestamps`).
+        // Users are supposed to call this method twice and calculate the difference,
+        // see "Converting GPU Timestamps into CPU Time":
+        // https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time
+        // Not only does this mean we get an approximate value, this is as also *very slow*!
+        // Chromium opted to solve this using a linear regression that they stop at some point
+        // https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46
+        // Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values.
+        //
+        // We should do as Chromium does for the general case, but this requires quite some state tracking
+        // and doesn't even provide perfectly accurate values, especially at the start of the application when
+        // we didn't have the chance to sample a lot of values just yet.
+        //
+        // So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps
+        // conversions on different devices, after all Metal isn't supported on that many ;)
+        // Based on:
+        // * https://github.com/gfx-rs/wgpu/pull/2528
+        // * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326
+        let timestamp_period = if self.shared.device.lock().name().starts_with("Intel") {
+            83.333
+        } else {
+            // Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs.
+            1.0
+        };
+
         Ok(crate::OpenDevice {
             device: super::Device {
                 shared: Arc::clone(&self.shared),
                 features,
             },
             queue: super::Queue {
                 raw: Arc::new(Mutex::new(queue)),
+                timestamp_period,
             },
         })
     }
@@ -766,10 +794,13 @@ impl super::PrivateCapabilities {
             | F::TEXTURE_FORMAT_16BIT_NORM
             | F::SHADER_F16
             | F::DEPTH32FLOAT_STENCIL8
-            | F::MULTI_DRAW_INDIRECT
-            | F::TIMESTAMP_QUERY;
+            | F::MULTI_DRAW_INDIRECT;
 
-        //TODO: if not on apple silicon, we can do timestamps within pass.
+        if self.supports_timestamp_period {
+            features.insert(F::TIMESTAMP_QUERY);
+            // TODO: if not on apple silicon, we can do timestamps within pass.
+            //features.insert(F::TIMESTAMP_QUERY_INSIDE_PASSES);
+        }
 
         features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
         features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);

diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs
@@ -282,18 +282,21 @@ pub struct Adapter {
 
 pub struct Queue {
     raw: Arc<Mutex<metal::CommandQueue>>,
+    timestamp_period: f32,
 }
 
 unsafe impl Send for Queue {}
 unsafe impl Sync for Queue {}
 
 impl Queue {
-    pub unsafe fn queue_from_raw(raw: metal::CommandQueue) -> Self {
+    pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self {
         Self {
             raw: Arc::new(Mutex::new(raw)),
+            timestamp_period,
         }
     }
 }
+
 pub struct Device {
     shared: Arc<AdapterShared>,
     features: wgt::Features,
@@ -403,21 +406,7 @@ impl crate::Queue<Api> for Queue {
     }
 
     unsafe fn get_timestamp_period(&self) -> f32 {
-        let queue = self.raw.lock();
-        let (mut cpu_timestamp0, mut gpu_timestamp0) = (0_u64, 0_u64);
-        let device = queue.device().to_owned();
-        device.sample_timestamps(&mut cpu_timestamp0, &mut gpu_timestamp0);
-        if cpu_timestamp0 == 0 || gpu_timestamp0 == 0 {
-            return 1.0;
-        }
-
-        let command_buffer = queue.new_command_buffer();
-        command_buffer.commit();
-        command_buffer.wait_until_scheduled();
-        let (mut cpu_timestamp1, mut gpu_timestamp1) = (0_u64, 0_u64);
-        device.sample_timestamps(&mut cpu_timestamp1, &mut gpu_timestamp1);
-
-        (cpu_timestamp1 - cpu_timestamp0) as f32 / (gpu_timestamp1 - gpu_timestamp0) as f32
+        self.timestamp_period
     }
 }
 

diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs
@@ -2515,7 +2515,8 @@ impl crate::context::Context for Context {
         _queue: &Self::QueueId,
         _queue_data: &Self::QueueData,
     ) -> f32 {
-        1.0 //TODO
+        // Timestamp values are always in nanoseconds, see https://gpuweb.github.io/gpuweb/#timestamp
+        1.0
     }
 
     fn queue_on_submitted_work_done(

diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
@@ -4094,6 +4094,10 @@ impl Queue {
     /// Gets the amount of nanoseconds each tick of a timestamp query represents.
     ///
     /// Returns zero if timestamp queries are unsupported.
+    ///
+    /// TODO: https://github.com/gfx-rs/wgpu/issues/3741
+    /// Timestamp values are supposed to represent nanosecond values, see https://gpuweb.github.io/gpuweb/#timestamp
+    /// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required currently.
     pub fn get_timestamp_period(&self) -> f32 {
         DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
     }