Skip to content

Commit

Permalink
"fix" timestamp conversion on metal, comments to clarify the situatio…
Browse files Browse the repository at this point in the history
…n on timestamp periods generally
  • Loading branch information
Wumpf committed May 1, 2023
1 parent 88c7da8 commit 78e7896
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 20 deletions.
37 changes: 34 additions & 3 deletions wgpu-hal/src/metal/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,41 @@ impl crate::Adapter<super::Api> for super::Adapter {
.device
.lock()
.new_command_queue_with_max_command_buffer_count(MAX_COMMAND_BUFFERS);

// Acquiring the meaning of timestamp ticks is hard with Metal!
// The only thing there is is a method correlating cpu & gpu timestamps (`device.sample_timestamps`).
// Users are supposed to call this method twice and calculate the difference,
// see "Converting GPU Timestamps into CPU Time":
// https://developer.apple.com/documentation/metal/gpu_counters_and_counter_sample_buffers/converting_gpu_timestamps_into_cpu_time
// Not only does this mean we get an approximate value, this is as also *very slow*!
// Chromium opted to solve this using a linear regression that they stop at some point
// https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/dawn/src/dawn/native/metal/DeviceMTL.mm;drc=76be2f9f117654f3fe4faa477b0445114fccedda;bpv=0;bpt=1;l=46
// Generally, the assumption is that timestamp values aren't changing over time, after all all other APIs provide stable values.
//
// We should do as Chromium does for the general case, but this requires quite some state tracking
// and doesn't even provide perfectly accurate values, especially at the start of the application when
// we didn't have the chance to sample a lot of values just yet.
//
// So instead, we're doing the dangerous but easy thing and use our "knowledge" of timestamps
// conversions on different devices, after all Metal isn't supported on that many ;)
// Based on:
// * https://github.com/gfx-rs/wgpu/pull/2528
// * https://github.com/gpuweb/gpuweb/issues/1325#issuecomment-761041326
let timestamp_period = if self.shared.device.lock().name().starts_with("Intel") {
83.333
} else {
// Known for Apple Silicon (at least M1 & M2, iPad Pro 2018) and AMD GPUs.
1.0
};

Ok(crate::OpenDevice {
device: super::Device {
shared: Arc::clone(&self.shared),
features,
},
queue: super::Queue {
raw: Arc::new(Mutex::new(queue)),
timestamp_period,
},
})
}
Expand Down Expand Up @@ -766,10 +794,13 @@ impl super::PrivateCapabilities {
| F::TEXTURE_FORMAT_16BIT_NORM
| F::SHADER_F16
| F::DEPTH32FLOAT_STENCIL8
| F::MULTI_DRAW_INDIRECT
| F::TIMESTAMP_QUERY;
| F::MULTI_DRAW_INDIRECT;

//TODO: if not on apple silicon, we can do timestamps within pass.
if self.supports_timestamp_period {
features.insert(F::TIMESTAMP_QUERY);
// TODO: if not on apple silicon, we can do timestamps within pass.
//features.insert(F::TIMESTAMP_QUERY_INSIDE_PASSES);
}

features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);
Expand Down
21 changes: 5 additions & 16 deletions wgpu-hal/src/metal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,18 +282,21 @@ pub struct Adapter {

pub struct Queue {
raw: Arc<Mutex<metal::CommandQueue>>,
timestamp_period: f32,
}

unsafe impl Send for Queue {}
unsafe impl Sync for Queue {}

impl Queue {
pub unsafe fn queue_from_raw(raw: metal::CommandQueue) -> Self {
pub unsafe fn queue_from_raw(raw: metal::CommandQueue, timestamp_period: f32) -> Self {
Self {
raw: Arc::new(Mutex::new(raw)),
timestamp_period,
}
}
}

pub struct Device {
shared: Arc<AdapterShared>,
features: wgt::Features,
Expand Down Expand Up @@ -403,21 +406,7 @@ impl crate::Queue<Api> for Queue {
}

unsafe fn get_timestamp_period(&self) -> f32 {
let queue = self.raw.lock();
let (mut cpu_timestamp0, mut gpu_timestamp0) = (0_u64, 0_u64);
let device = queue.device().to_owned();
device.sample_timestamps(&mut cpu_timestamp0, &mut gpu_timestamp0);
if cpu_timestamp0 == 0 || gpu_timestamp0 == 0 {
return 1.0;
}

let command_buffer = queue.new_command_buffer();
command_buffer.commit();
command_buffer.wait_until_scheduled();
let (mut cpu_timestamp1, mut gpu_timestamp1) = (0_u64, 0_u64);
device.sample_timestamps(&mut cpu_timestamp1, &mut gpu_timestamp1);

(cpu_timestamp1 - cpu_timestamp0) as f32 / (gpu_timestamp1 - gpu_timestamp0) as f32
self.timestamp_period
}
}

Expand Down
3 changes: 2 additions & 1 deletion wgpu/src/backend/web.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2515,7 +2515,8 @@ impl crate::context::Context for Context {
_queue: &Self::QueueId,
_queue_data: &Self::QueueData,
) -> f32 {
1.0 //TODO
// Timestamp values are always in nanoseconds, see https://gpuweb.github.io/gpuweb/#timestamp
1.0
}

fn queue_on_submitted_work_done(
Expand Down
4 changes: 4 additions & 0 deletions wgpu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4094,6 +4094,10 @@ impl Queue {
/// Gets the amount of nanoseconds each tick of a timestamp query represents.
///
/// Returns zero if timestamp queries are unsupported.
///
/// TODO: https://github.com/gfx-rs/wgpu/issues/3741
/// Timestamp values are supposed to represent nanosecond values, see https://gpuweb.github.io/gpuweb/#timestamp
/// Therefore, this is always 1.0 on the web, but on wgpu-core a manual conversion is required currently.
pub fn get_timestamp_period(&self) -> f32 {
DynContext::queue_get_timestamp_period(&*self.context, &self.id, self.data.as_ref())
}
Expand Down

0 comments on commit 78e7896

Please sign in to comment.